Spaces:

student-abdullah
/

SARS

Build error

App Files Files Community

student-abdullah commited on Aug 2, 2024

Commit

ee275ef

0 Parent(s):

Initial commit

Browse files

Files changed (9) hide show

.idea/.gitignore +3 -0
.idea/inspectionProfiles/Project_Default.xml +14 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/senti-analy-repo.iml +8 -0
.idea/vcs.xml +6 -0
app.py +182 -0
requirements.txt +0 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,14 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="1">
+            <item index="0" class="java.lang.String" itemvalue="tf_keras" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (senti-analy-repo)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/senti-analy-repo.iml" filepath="$PROJECT_DIR$/.idea/senti-analy-repo.iml" />
+    </modules>
+  </component>
+</project>

.idea/senti-analy-repo.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.11 (senti-analy-repo)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

app.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud, STOPWORDS
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+from reportlab.lib.units import inch
+from io import BytesIO
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import chardet
+import os
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+# Function to analyze sentiment
+def analyze_sentiment(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    scores = outputs.logits.softmax(dim=1)
+    labels = ['NEGATIVE', 'POSITIVE']
+    score, label = torch.max(scores, dim=1)
+    return {"label": labels[label.item()], "score": score.item()}
+# Function to detect file encoding
+def detect_encoding(file):
+    rawdata = file.read()
+    result = chardet.detect(rawdata)
+    return result['encoding']
+def generate_pdf(pie_chart_path, pos_wordcloud_path, neg_wordcloud_path):
+    pdf_output = BytesIO()
+    pdf_height = 16.5 * inch  # Total vertical height calculated
+    pdf_width = 8.27 * inch  # A4 width
+    c = canvas.Canvas(pdf_output, pagesize=(pdf_width, pdf_height))
+    # Set starting vertical position
+    y_position = pdf_height - 1 * inch
+    # Add title
+    c.setFont("Helvetica-Bold", 20)
+    c.drawString(2.2 * inch, y_position, "Sentiment Analysis Report")
+    # Update vertical position after title
+    y_position -= 2 * inch
+    # Add pie chart with width 5 inches and height double the width
+    pie_chart_width = 5 * inch
+    pie_chart_height = 4 * inch
+    c.drawImage(pie_chart_path, 1.5 * inch, y_position - pie_chart_height, width=pie_chart_width, height=pie_chart_height)
+    # Update vertical position after pie chart
+    y_position -= (pie_chart_height + 1 * inch)  # Add some spacing
+    # Add Positive Keywords heading
+    c.setFont("Helvetica-Bold", 12)
+    c.drawString(3 * inch, y_position, "Positive Keywords")
+    # Add positive word cloud
+    c.drawImage(pos_wordcloud_path, 1 * inch, y_position - 3.3 * inch, width=6 * inch, height=3 * inch)  # 2:1 ratio
+    # Update vertical position after positive word cloud
+    y_position -= (3 * inch + 1 * inch)  # Add some spacing
+    # Add Negative Keywords heading
+    c.setFont("Helvetica-Bold", 12)
+    c.drawString(3 * inch, y_position, "Negative Keywords")
+    # Add negative word cloud
+    c.drawImage(neg_wordcloud_path, 1 * inch, y_position - 3.3 * inch, width=6 * inch, height=3 * inch)  # 2:1 ratio
+    c.save()
+    pdf_output.seek(0)
+    return pdf_output
+# Streamlit UI
+st.title("Sentiment Analysis and Reporting")
+# Initialize session state for button visibility
+if 'show_pdf_download' not in st.session_state:
+    st.session_state.show_pdf_download = False
+# Sidebar for encoding detection and reset button
+st.sidebar.header("File Encoding Checker")
+# File uploader in the sidebar
+uploaded_file = st.sidebar.file_uploader("Upload CSV file for Encoding Check", type=["csv"])
+if uploaded_file:
+    # Detect the encoding
+    encoding = detect_encoding(uploaded_file)
+    st.sidebar.write(f"Detected encoding: {encoding}")
+# Reset button in the sidebar
+if st.sidebar.button("Reset Analysis"):
+    if os.path.exists("sentiment_pie_chart.png"):
+        os.remove("sentiment_pie_chart.png")
+    if os.path.exists("pos_wordcloud.png"):
+        os.remove("pos_wordcloud.png")
+    if os.path.exists("neg_wordcloud.png"):
+        os.remove("neg_wordcloud.png")
+    st.sidebar.write("Files deleted. Please re-upload a file to start over.")
+# File uploader for sentiment analysis
+uploaded_file = st.file_uploader("Upload CSV file for Sentiment Analysis", type=["csv"])
+# Dropdown for encoding specification in the main panel
+encodings = ['utf-8', 'latin-1', 'ISO-8859-1', 'ASCII', 'UTF-16', 'UTF-32', 'ANSI', "Windows-1251", 'Windows-1252']
+user_encoding = st.selectbox("Select Encoding", options=encodings, index=0)
+# Button to start processing
+if st.button("Go"):
+    if uploaded_file:
+        try:
+            # Load the CSV file into DataFrame with specified encoding
+            uploaded_file.seek(0)  # Reset the file pointer to the beginning
+            df = pd.read_csv(uploaded_file, encoding=user_encoding)
+        except UnicodeDecodeError:
+            st.error("Error decoding the file. Please specify the correct encoding.")
+        else:
+            # Check if the DataFrame has exactly one column
+            if df.shape[1] != 1:
+                st.warning("The CSV file should only contain one column with review data.")
+            else:
+                # Rename the column to 'review'
+                df.columns = ['review']
+                # Clean up the DataFrame
+                df['review'] = df['review'].astype(str).str.strip()
+                df = df[df['review'].apply(len) <= 512]
+                # Apply sentiment analysis
+                df['sentiment'] = df['review'].apply(analyze_sentiment)
+                df['sentiment_label'] = df['sentiment'].apply(lambda x: x['label'])
+                df['sentiment_score'] = df['sentiment'].apply(lambda x: x['score'])
+                # Drop the original 'sentiment' column
+                df = df.drop(columns=['sentiment'])
+                # Pie chart data
+                sentiment_counts = df['sentiment_label'].value_counts()
+                # Create pie chart
+                fig, ax = plt.subplots()
+                ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=45)
+                ax.set_title('Distribution of Sentiment')
+                pie_chart_path = "sentiment_pie_chart.png"
+                plt.savefig(pie_chart_path)
+                # Create word clouds
+                stopwords = set(STOPWORDS)
+                pos_reviews = df[df['sentiment_label'] == 'POSITIVE']['review'].str.cat(sep=' ')
+                neg_reviews = df[df['sentiment_label'] == 'NEGATIVE']['review'].str.cat(sep=' ')
+                pos_wordcloud = WordCloud(max_font_size=80, max_words=10, background_color='white', stopwords=stopwords).generate(pos_reviews)
+                neg_wordcloud = WordCloud(max_font_size=80, max_words=10, background_color='white', stopwords=stopwords).generate(neg_reviews)
+                # Save word clouds to files
+                pos_wordcloud_path = "pos_wordcloud.png"
+                neg_wordcloud_path = "neg_wordcloud.png"
+                pos_wordcloud.to_file(pos_wordcloud_path)
+                neg_wordcloud.to_file(neg_wordcloud_path)
+                # Create PDF
+                pdf_output = generate_pdf(pie_chart_path, pos_wordcloud_path, neg_wordcloud_path)
+                # Display options
+                st.write("Processing complete!")
+                # Update session state to show the appropriate buttons
+                st.session_state.show_pdf_download = True
+                # Display buttons
+                download_pdf = st.download_button("Download PDF Report", pdf_output, file_name="sentiment_analysis_report.pdf", mime="application/pdf")
+    else:
+        st.info("Please upload a CSV file to get started.")

requirements.txt ADDED Viewed

Binary file (3.18 kB). View file