Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,10 @@ from gliner_spacy.pipeline import GlinerSpacy
|
|
| 12 |
import warnings
|
| 13 |
import os
|
| 14 |
import gc
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Suppress specific warnings
|
| 17 |
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
|
@@ -42,9 +46,12 @@ def get_nlp():
|
|
| 42 |
global nlp
|
| 43 |
if nlp is None:
|
| 44 |
try:
|
|
|
|
| 45 |
nlp = spacy.blank("en")
|
| 46 |
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
|
|
|
| 47 |
except Exception as e:
|
|
|
|
| 48 |
raise
|
| 49 |
return nlp
|
| 50 |
|
|
@@ -185,7 +192,9 @@ def batch_process_keywords(keywords, batch_size=8):
|
|
| 185 |
category_embeddings = compute_category_embeddings()
|
| 186 |
|
| 187 |
for i in range(0, len(keywords), batch_size):
|
|
|
|
| 188 |
batch = keywords[i:i+batch_size]
|
|
|
|
| 189 |
batch_embeddings = sentence_model.encode(batch, batch_size=batch_size, show_progress_bar=False)
|
| 190 |
|
| 191 |
intents = [sort_by_keyword_feature(kw) for kw in batch]
|
|
@@ -212,9 +221,9 @@ def batch_process_keywords(keywords, batch_size=8):
|
|
| 212 |
|
| 213 |
# Force garbage collection
|
| 214 |
gc.collect()
|
| 215 |
-
|
| 216 |
except Exception as e:
|
| 217 |
-
|
| 218 |
|
| 219 |
return processed_data
|
| 220 |
|
|
@@ -379,7 +388,6 @@ app.layout = dbc.Container([
|
|
| 379 |
|
| 380 |
], fluid=True)
|
| 381 |
|
| 382 |
-
# Combined callback
|
| 383 |
@app.callback(
|
| 384 |
[Output('models-loaded', 'data'),
|
| 385 |
Output('submit-button', 'disabled'),
|
|
@@ -398,13 +406,16 @@ def combined_callback(loaded, n_clicks, keyword_input):
|
|
| 398 |
ctx = callback_context
|
| 399 |
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
| 400 |
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
def handle_model_loading(loaded):
|
| 410 |
if not loaded:
|
|
@@ -430,7 +441,9 @@ def handle_keyword_processing(n_clicks, keyword_input):
|
|
| 430 |
[Input('processed-data', 'data')]
|
| 431 |
)
|
| 432 |
def update_bar_chart(processed_data):
|
|
|
|
| 433 |
if processed_data is None:
|
|
|
|
| 434 |
return {
|
| 435 |
'data': [],
|
| 436 |
'layout': {
|
|
@@ -446,6 +459,7 @@ def update_bar_chart(processed_data):
|
|
| 446 |
}
|
| 447 |
|
| 448 |
df = pd.DataFrame(processed_data)
|
|
|
|
| 449 |
intent_counts = df['Intent'].value_counts().reset_index()
|
| 450 |
intent_counts.columns = ['Intent', 'Count']
|
| 451 |
|
|
@@ -526,4 +540,4 @@ def download_csv(n_clicks, processed_data):
|
|
| 526 |
|
| 527 |
# Modified the server run command for HuggingFace Spaces
|
| 528 |
if __name__ == "__main__":
|
| 529 |
-
app.run_server(debug=False, host="0.0.0.0", port=7860)
|
|
|
|
| 12 |
import warnings
|
| 13 |
import os
|
| 14 |
import gc
|
| 15 |
+
import logging
|
| 16 |
+
|
| 17 |
+
logging.basicConfig(level=logging.INFO)
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
# Suppress specific warnings
|
| 21 |
warnings.filterwarnings("ignore", message="The sentencepiece tokenizer")
|
|
|
|
| 46 |
global nlp
|
| 47 |
if nlp is None:
|
| 48 |
try:
|
| 49 |
+
logger.info("Loading spaCy model")
|
| 50 |
nlp = spacy.blank("en")
|
| 51 |
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)
|
| 52 |
+
logger.info("spaCy model loaded successfully")
|
| 53 |
except Exception as e:
|
| 54 |
+
logger.exception("Error loading spaCy model")
|
| 55 |
raise
|
| 56 |
return nlp
|
| 57 |
|
|
|
|
| 192 |
category_embeddings = compute_category_embeddings()
|
| 193 |
|
| 194 |
for i in range(0, len(keywords), batch_size):
|
| 195 |
+
logger.info(f"Processing {len(keywords)} keywords")
|
| 196 |
batch = keywords[i:i+batch_size]
|
| 197 |
+
logger.info(f"Processing batch {i//batch_size + 1}")
|
| 198 |
batch_embeddings = sentence_model.encode(batch, batch_size=batch_size, show_progress_bar=False)
|
| 199 |
|
| 200 |
intents = [sort_by_keyword_feature(kw) for kw in batch]
|
|
|
|
| 221 |
|
| 222 |
# Force garbage collection
|
| 223 |
gc.collect()
|
| 224 |
+
logger.info("Keyword processing completed successfully")
|
| 225 |
except Exception as e:
|
| 226 |
+
logger.exception("An error occurred in batch_process_keywords")
|
| 227 |
|
| 228 |
return processed_data
|
| 229 |
|
|
|
|
| 388 |
|
| 389 |
], fluid=True)
|
| 390 |
|
|
|
|
| 391 |
@app.callback(
|
| 392 |
[Output('models-loaded', 'data'),
|
| 393 |
Output('submit-button', 'disabled'),
|
|
|
|
| 406 |
ctx = callback_context
|
| 407 |
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
| 408 |
|
| 409 |
+
try:
|
| 410 |
+
if triggered_id == 'models-loaded':
|
| 411 |
+
return handle_model_loading(loaded)
|
| 412 |
+
elif triggered_id == 'submit-button':
|
| 413 |
+
return handle_keyword_processing(n_clicks, keyword_input)
|
| 414 |
+
else:
|
| 415 |
+
return loaded, False, False, "", "success", None, '', False, ''
|
| 416 |
+
except Exception as e:
|
| 417 |
+
logger.exception("An error occurred in combined_callback")
|
| 418 |
+
return loaded, False, True, f"An error occurred: {str(e)}", "danger", None, '', False, ''
|
| 419 |
|
| 420 |
def handle_model_loading(loaded):
|
| 421 |
if not loaded:
|
|
|
|
| 441 |
[Input('processed-data', 'data')]
|
| 442 |
)
|
| 443 |
def update_bar_chart(processed_data):
|
| 444 |
+
logger.info("Updating bar chart")
|
| 445 |
if processed_data is None:
|
| 446 |
+
logger.info("No processed data available")
|
| 447 |
return {
|
| 448 |
'data': [],
|
| 449 |
'layout': {
|
|
|
|
| 459 |
}
|
| 460 |
|
| 461 |
df = pd.DataFrame(processed_data)
|
| 462 |
+
logger.info(f"Data shape: {df.shape}")
|
| 463 |
intent_counts = df['Intent'].value_counts().reset_index()
|
| 464 |
intent_counts.columns = ['Intent', 'Count']
|
| 465 |
|
|
|
|
| 540 |
|
| 541 |
# Modified the server run command for HuggingFace Spaces
|
| 542 |
if __name__ == "__main__":
|
| 543 |
+
app.run_server(debug=False, host="0.0.0.0", port=7860)
|