Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| from huggingface_hub import InferenceClient | |
| import plotly.express as px | |
| from collections import Counter | |
| # Initialize Hugging Face client | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| def parse_message(message): | |
| """Extract information from a chat message using regex.""" | |
| info = {} | |
| # Extract timestamp and phone number | |
| timestamp_match = re.search(r'\[(.*?)\]', message) | |
| phone_match = re.search(r'\] (.*?):', message) | |
| if timestamp_match and phone_match: | |
| info['timestamp'] = timestamp_match.group(1) | |
| info['phone'] = phone_match.group(1) | |
| # Extract rest of the message | |
| content = message.split(':', 1)[1].strip() | |
| # Extract name | |
| name_match = re.match(r'^([^•\n-]+)', content) | |
| if name_match: | |
| info['name'] = name_match.group(1).strip() | |
| # Extract affiliation | |
| affiliation_match = re.search(r'[Aa]ffiliation:?\s*([^•\n]+)', content) | |
| if affiliation_match: | |
| info['affiliation'] = affiliation_match.group(1).strip() | |
| # Extract research field/interests | |
| field_match = re.search(r'([Ff]ield of [Ii]nterest|[Dd]omaine de recherche|[Rr]esearch area|[Aa]reas of interest):?\s*([^•\n]+)', content) | |
| if field_match: | |
| info['research_field'] = field_match.group(2).strip() | |
| # Extract thesis topic | |
| thesis_match = re.search(r'[Tt]hesis:?\s*([^•\n]+)', content) | |
| if thesis_match: | |
| info['thesis_topic'] = thesis_match.group(1).strip() | |
| return info | |
| def create_researcher_df(chat_history): | |
| """Convert chat messages to structured DataFrame.""" | |
| researchers = [] | |
| messages = chat_history.split('\n') | |
| for message in messages: | |
| if message.strip(): | |
| info = parse_message(message) | |
| if info: | |
| researchers.append(info) | |
| df = pd.DataFrame(researchers) | |
| return df | |
| def analyze_research_fields(df): | |
| """Analyze and categorize research fields.""" | |
| if 'research_field' not in df.columns: | |
| return pd.Series() | |
| fields = df['research_field'].dropna() | |
| # Split fields and flatten | |
| all_fields = [field.strip().lower() for fields_list in fields for field in fields_list.split(',')] | |
| return pd.Series(Counter(all_fields)) | |
| def create_visualizations(df): | |
| """Create visualizations from the researcher data.""" | |
| figures = [] | |
| # 1. Affiliation Distribution | |
| if 'affiliation' in df.columns and not df['affiliation'].empty: | |
| affiliation_counts = df['affiliation'].value_counts() | |
| fig_affiliation = px.pie( | |
| values=affiliation_counts.values, | |
| names=affiliation_counts.index, | |
| title='Distribution of Researchers by Affiliation' | |
| ) | |
| figures.append(fig_affiliation) | |
| # 2. Research Fields Analysis | |
| field_counts = analyze_research_fields(df) | |
| if not field_counts.empty: | |
| fig_fields = px.bar( | |
| x=field_counts.index, | |
| y=field_counts.values, | |
| title='Popular Research Fields', | |
| labels={'x': 'Field', 'y': 'Count'} | |
| ) | |
| figures.append(fig_fields) | |
| return figures[0] if figures else None | |
| def analyze_chat_history(chat_history_text): | |
| """Analyze chat history and return DataFrame, plot, and summary.""" | |
| if not chat_history_text.strip(): | |
| return None, None, "No chat history provided." | |
| df = create_researcher_df(chat_history_text) | |
| if df.empty: | |
| return None, None, "No data could be extracted from the chat history." | |
| # Generate analysis summary | |
| summary = f"Analysis of {len(df)} researchers:\n" | |
| if 'affiliation' in df.columns: | |
| summary += f"- Institutions represented: {df['affiliation'].nunique()}\n" | |
| field_counts = analyze_research_fields(df) | |
| if not field_counts.empty: | |
| top_fields = field_counts.nlargest(3) | |
| summary += "- Top research fields:\n" | |
| for field, count in top_fields.items(): | |
| summary += f" • {field}: {count} researchers\n" | |
| # Create visualization | |
| fig = create_visualizations(df) | |
| return df, fig, summary | |
| def process_message( | |
| message, | |
| chat_history, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p | |
| ): | |
| """Process message and return response.""" | |
| try: | |
| # Generate response using the LLM | |
| messages = [{"role": "system", "content": system_message}] | |
| for user_msg, bot_msg in chat_history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": bot_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| response = client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ) | |
| bot_message = response.choices[0].message.content | |
| chat_history.append((message, bot_message)) | |
| return chat_history | |
| except Exception as e: | |
| error_message = f"Error: {str(e)}" | |
| chat_history.append((message, error_message)) | |
| return chat_history | |
| with gr.Blocks(title="CohortBot") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot(label="Chat History") | |
| msg = gr.Textbox(label="Message", placeholder="Type your message here...") | |
| with gr.Row(): | |
| system_msg = gr.Textbox(value="You are a friendly Research Community Chatbot.", label="System message") | |
| with gr.Row(): | |
| max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") | |
| temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p") | |
| with gr.Column(scale=1): | |
| chat_history_text = gr.Textbox(label="Chat History for Analysis", lines=10) | |
| analyze_btn = gr.Button("Analyze Chat History", variant="primary") | |
| with gr.Row(): | |
| analysis_text = gr.Textbox(label="Analysis Summary", lines=4) | |
| with gr.Row(): | |
| researcher_table = gr.Dataframe(label="Extracted Researcher Data") | |
| with gr.Row(): | |
| plot = gr.Plot(label="Community Analysis") | |
| msg.submit( | |
| process_message, | |
| [msg, chatbot, system_msg, max_tokens, temperature, top_p], | |
| [chatbot] | |
| ) | |
| analyze_btn.click( | |
| analyze_chat_history, | |
| inputs=[chat_history_text], | |
| outputs=[researcher_table, plot, analysis_text] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |