| from pattern.web import Twitter | |
| import preprocessor as p | |
| from pattern.en import sentiment | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from wordcloud import WordCloud | |
| import gradio as gr | |
| import plotly.express as px | |
| from collections import Counter | |
| from gensim.parsing.preprocessing import remove_stopwords | |
| import re | |
| def top_10_plots(df): | |
| pos_text = ' '.join(list(df[df['sentiment']==1]['Cleaned Tweet'])) | |
| neg_text = ' '.join(list(df[df['sentiment']==0]['Cleaned Tweet'])) | |
| pos_filtered_sentence = remove_stopwords(pos_text) | |
| neg_filtered_sentence = remove_stopwords(neg_text) | |
| pos_split_it = pos_filtered_sentence.split() | |
| neg_split_it = neg_filtered_sentence.split() | |
| pos_Counter = Counter(pos_split_it) | |
| neg_Counter = Counter(neg_split_it) | |
| pos_most_occur = dict(pos_Counter.most_common(11)) | |
| neg_most_occur = dict(neg_Counter.most_common(11)) | |
| pos_top10 = px.bar(x=pos_most_occur.keys(), y=pos_most_occur.values(),title="Top 10 words used in positively classified tweets", | |
| labels=dict(x="Word", y="Count")) | |
| neg_top10 = px.bar(x=neg_most_occur.keys(), y=neg_most_occur.values(),title="Top 10 words used in negatively classified tweets", | |
| labels=dict(x="Word", y="Count")) | |
| return pos_top10,neg_top10 | |
| def cleaner(row): | |
| row = p.clean(row) | |
| row = row.replace(":","") | |
| row = row.lower() | |
| row = remove_stopwords(row) | |
| row = re.sub(r'[^\w\s]', '', row) | |
| return row | |
| def twitter_viz(keyword, Count): | |
| twitter = Twitter() | |
| search_results = [] | |
| clean = [] | |
| sentimnets = [] | |
| subjectivity = [] | |
| temp = twitter.search(keyword, count=100) | |
| for results in temp: | |
| search_results.append(results.text) | |
| clean_tweet = cleaner(results.text) | |
| clean.append(clean_tweet) | |
| sentimnets.append(1 if sentiment(clean_tweet[1:])[0] > 0 else 0) | |
| subjectivity.append(round(sentiment(clean_tweet[1:])[1],2)) | |
| zipped = list(zip(search_results, clean, sentimnets, subjectivity)) | |
| df_raw = pd.DataFrame(zipped, columns=['Tweet', 'Cleaned Tweet', 'sentiment', 'Subjectivity']) | |
| df = df_raw[['Cleaned Tweet', 'sentiment', 'Subjectivity']] | |
| df_raw = df_raw[['Tweet']] | |
| t = dict(df['sentiment'].value_counts()) | |
| t['Positive'] = t.pop(1) | |
| t['Negative'] = t.pop(0) | |
| sent_dist = px.pie(df, values=t.values(), names=t.keys(), title='Sentiment Distribution') | |
| sent_dist.update_traces(textposition='inside', textinfo='percent+label') | |
| text = ' '.join(list(df['Cleaned Tweet'])) | |
| word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text) | |
| wc = plt.figure() | |
| plt.imshow(word_cloud, interpolation='bilinear') | |
| plt.axis("off") | |
| plt.title("Word Cloud",loc='left',fontdict={'fontsize': 18}) | |
| pos_top10,neg_top10 = top_10_plots(df) | |
| return df_raw.head(Count),df.head(Count),sent_dist,wc,pos_top10,neg_top10 | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| keyword = gr.Textbox(placeholder="Enter A Hashtag") | |
| count = gr.Slider(1, 10, step=1,label='how many tweets you want to extract',interactive=True) | |
| with gr.Row(): | |
| btn = gr.Button("Magic!") | |
| with gr.Tab("Data"): | |
| with gr.Row(): | |
| df_raw = gr.Dataframe(interactive=False,wrap=True) | |
| with gr.Tab("Analysis"): | |
| with gr.Row(): | |
| df_rep = gr.Dataframe(interactive=False,wrap=True) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pos_top10 = gr.Plot() | |
| sent_dist = gr.Plot() | |
| with gr.Column(scale=1): | |
| neg_top10 = gr.Plot() | |
| wc = gr.Plot() | |
| btn.click(fn=twitter_viz, inputs=[keyword,count], outputs=[df_raw,df_rep,sent_dist,wc,pos_top10,neg_top10]) | |
| demo.launch() | |