Spaces:
Sleeping
Sleeping
Update app.py
Browse filesremoved threading causing Render issues
app.py
CHANGED
|
@@ -1,12 +1,5 @@
|
|
| 1 |
import dash
|
| 2 |
import dash_bootstrap_components as dbc
|
| 3 |
-
|
| 4 |
-
# Initialize Dash app with Bootstrap theme and Font Awesome
|
| 5 |
-
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
|
| 6 |
-
|
| 7 |
-
# Create server variable
|
| 8 |
-
server = app.server
|
| 9 |
-
|
| 10 |
import pandas as pd
|
| 11 |
from dash import dcc, html
|
| 12 |
from dash.dash_table import DataTable
|
|
@@ -17,11 +10,17 @@ from sentence_transformers import SentenceTransformer
|
|
| 17 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 18 |
from gliner_spacy.pipeline import GlinerSpacy
|
| 19 |
import warnings
|
| 20 |
-
import threading
|
| 21 |
-
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
| 22 |
import os
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 26 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
|
| 27 |
|
|
@@ -37,47 +36,31 @@ custom_spacy_config = {
|
|
| 37 |
# Model variables
|
| 38 |
nlp = None
|
| 39 |
sentence_model = None
|
| 40 |
-
model_lock = threading.Lock()
|
| 41 |
-
models_loaded = threading.Event()
|
| 42 |
|
| 43 |
# Function to load models
|
| 44 |
def load_models():
|
| 45 |
global nlp, sentence_model
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
| 50 |
-
if sentence_model is None:
|
| 51 |
-
sentence_model = SentenceTransformer('all-roberta-large-v1')
|
| 52 |
-
models_loaded.set()
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
# Function to ensure models are loaded
|
| 58 |
-
def ensure_models_loaded():
|
| 59 |
-
models_loaded.wait()
|
| 60 |
|
| 61 |
# Function to perform NER using GLiNER with spaCy
|
| 62 |
def perform_ner(text):
|
| 63 |
-
ensure_models_loaded()
|
| 64 |
doc = nlp(text)
|
| 65 |
return [(ent.text, ent.label_) for ent in doc.ents]
|
| 66 |
|
| 67 |
# Function to extract entities using GLiNER with spaCy
|
| 68 |
def extract_entities(text):
|
| 69 |
-
ensure_models_loaded()
|
| 70 |
doc = nlp(text)
|
| 71 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
| 72 |
return entities if entities else ["No specific entities found"]
|
| 73 |
|
| 74 |
-
# Load Google's content categories
|
| 75 |
-
with open(CATEGORIES_FILE, 'r') as f:
|
| 76 |
-
google_categories = [line.strip() for line in f]
|
| 77 |
-
|
| 78 |
# Function to precompute category embeddings
|
| 79 |
def compute_category_embeddings():
|
| 80 |
-
ensure_models_loaded()
|
| 81 |
return sentence_model.encode(google_categories)
|
| 82 |
|
| 83 |
# Function to perform topic modeling using sentence transformers
|
|
@@ -163,7 +146,6 @@ def sort_by_keyword_feature(f):
|
|
| 163 |
|
| 164 |
# Optimized batch processing of keywords
|
| 165 |
def batch_process_keywords(keywords, batch_size=32):
|
| 166 |
-
ensure_models_loaded()
|
| 167 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
|
| 168 |
|
| 169 |
# Precompute keyword embeddings once
|
|
@@ -207,6 +189,7 @@ def batch_process_keywords(keywords, batch_size=32):
|
|
| 207 |
|
| 208 |
# Main layout of the dashboard
|
| 209 |
app.layout = dbc.Container([
|
|
|
|
| 210 |
dbc.NavbarSimple(
|
| 211 |
children=[
|
| 212 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
|
|
@@ -221,16 +204,22 @@ app.layout = dbc.Container([
|
|
| 221 |
|
| 222 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
|
| 223 |
|
| 224 |
-
|
| 225 |
dbc.Col([
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
|
| 227 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
|
| 228 |
-
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3'),
|
| 229 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
|
| 230 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
|
| 231 |
], width=6)
|
| 232 |
], justify='center'),
|
| 233 |
-
|
| 234 |
# Loading component
|
| 235 |
dbc.Row([
|
| 236 |
dbc.Col([
|
|
@@ -351,6 +340,19 @@ app.layout = dbc.Container([
|
|
| 351 |
|
| 352 |
], fluid=True)
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
# Callback for smooth scrolling
|
| 355 |
app.clientside_callback(
|
| 356 |
"""
|
|
|
|
| 1 |
import dash
|
| 2 |
import dash_bootstrap_components as dbc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
from dash import dcc, html
|
| 5 |
from dash.dash_table import DataTable
|
|
|
|
| 10 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 11 |
from gliner_spacy.pipeline import GlinerSpacy
|
| 12 |
import warnings
|
|
|
|
|
|
|
| 13 |
import os
|
| 14 |
|
| 15 |
+
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
| 16 |
+
|
| 17 |
+
# Initialize Dash app with Bootstrap theme and Font Awesome
|
| 18 |
+
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY, 'https://use.fontawesome.com/releases/v5.8.1/css/all.css'])
|
| 19 |
+
|
| 20 |
+
# Create server variable
|
| 21 |
+
server = app.server
|
| 22 |
+
|
| 23 |
+
# Reference absolute file path
|
| 24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 25 |
CATEGORIES_FILE = os.path.join(BASE_DIR, 'google_categories(v2).txt')
|
| 26 |
|
|
|
|
| 36 |
# Model variables
|
| 37 |
nlp = None
|
| 38 |
sentence_model = None
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Function to load models
|
| 41 |
def load_models():
|
| 42 |
global nlp, sentence_model
|
| 43 |
+
nlp = spacy.blank("en")
|
| 44 |
+
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
| 45 |
+
sentence_model = SentenceTransformer('all-roberta-large-v1')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
# Load Google's content categories
|
| 48 |
+
with open(CATEGORIES_FILE, 'r') as f:
|
| 49 |
+
google_categories = [line.strip() for line in f]
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# Function to perform NER using GLiNER with spaCy
|
| 52 |
def perform_ner(text):
|
|
|
|
| 53 |
doc = nlp(text)
|
| 54 |
return [(ent.text, ent.label_) for ent in doc.ents]
|
| 55 |
|
| 56 |
# Function to extract entities using GLiNER with spaCy
|
| 57 |
def extract_entities(text):
|
|
|
|
| 58 |
doc = nlp(text)
|
| 59 |
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
| 60 |
return entities if entities else ["No specific entities found"]
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Function to precompute category embeddings
|
| 63 |
def compute_category_embeddings():
|
|
|
|
| 64 |
return sentence_model.encode(google_categories)
|
| 65 |
|
| 66 |
# Function to perform topic modeling using sentence transformers
|
|
|
|
| 146 |
|
| 147 |
# Optimized batch processing of keywords
|
| 148 |
def batch_process_keywords(keywords, batch_size=32):
|
|
|
|
| 149 |
processed_data = {'Keywords': [], 'Intent': [], 'NER Entities': [], 'Google Content Topics': []}
|
| 150 |
|
| 151 |
# Precompute keyword embeddings once
|
|
|
|
| 189 |
|
| 190 |
# Main layout of the dashboard
|
| 191 |
app.layout = dbc.Container([
|
| 192 |
+
dcc.Store(id='models-loaded', data=False),
|
| 193 |
dbc.NavbarSimple(
|
| 194 |
children=[
|
| 195 |
dbc.NavItem(dbc.NavLink("About", href="#about")),
|
|
|
|
| 204 |
|
| 205 |
dbc.Row(dbc.Col(html.H1('Keyword Intent, Named Entity Recognition (NER), & Google Topic Modeling Dashboard', className='text-center text-light mb-4 mt-4'))),
|
| 206 |
|
| 207 |
+
dbc.Row([
|
| 208 |
dbc.Col([
|
| 209 |
+
dbc.Alert(
|
| 210 |
+
"Models are loading. This may take a few minutes. Please wait...",
|
| 211 |
+
id="loading-alert",
|
| 212 |
+
color="info",
|
| 213 |
+
is_open=True,
|
| 214 |
+
),
|
| 215 |
dbc.Label('Enter keywords (one per line, maximum of 100):', className='text-light'),
|
| 216 |
dcc.Textarea(id='keyword-input', value='', style={'width': '100%', 'height': 100}),
|
| 217 |
+
dbc.Button('Submit', id='submit-button', color='primary', className='mb-3', disabled=True),
|
| 218 |
dbc.Alert(id='alert', is_open=False, duration=4000, color='danger', className='my-2'),
|
| 219 |
dbc.Alert(id='processing-alert', is_open=False, color='info', className='my-2'),
|
| 220 |
], width=6)
|
| 221 |
], justify='center'),
|
| 222 |
+
|
| 223 |
# Loading component
|
| 224 |
dbc.Row([
|
| 225 |
dbc.Col([
|
|
|
|
| 340 |
|
| 341 |
], fluid=True)
|
| 342 |
|
| 343 |
+
# Callback to load models and update the loading alert
|
| 344 |
+
@app.callback(
|
| 345 |
+
[Output('models-loaded', 'data'),
|
| 346 |
+
Output('loading-alert', 'is_open'),
|
| 347 |
+
Output('submit-button', 'disabled')],
|
| 348 |
+
[Input('models-loaded', 'data')]
|
| 349 |
+
)
|
| 350 |
+
def load_models_callback(loaded):
|
| 351 |
+
if not loaded:
|
| 352 |
+
load_models()
|
| 353 |
+
return True, False, False
|
| 354 |
+
return loaded, False, False
|
| 355 |
+
|
| 356 |
# Callback for smooth scrolling
|
| 357 |
app.clientside_callback(
|
| 358 |
"""
|