Spaces:

Alishbah
/

AIPlagiarism

Sleeping

App Files Files Community

Alishbah commited on Feb 17, 2025

Commit

4d2cd01

verified ·

1 Parent(s): d9ae89a

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -22

app.py CHANGED Viewed

@@ -1,31 +1,157 @@
 import streamlit as st
 from transformers import pipeline
 @st.cache_resource
-def load_model():
-    """
-    Loads a pre-trained sentiment analysis model from Hugging Face Transformers.
-    """
-    model = pipeline("sentiment-analysis")  # You can replace this with your desired model
-    return model
 def main():
-    """
-    Main function to run the Streamlit app.
-    """
-    st.title("Hugging Face Model Demo")
-    # Load the model
-    model = load_model()
-    # Create an input text box
-    input_text = st.text_input("Enter your text", "")
-    # Create a button to trigger model inference
-    if st.button("Analyze"):
-        # Perform inference using the loaded model
-        result = model(input_text)
-        st.write("Prediction:", result[0]['label'], "| Score:", result[0]['score'])
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import pipeline
+import io
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+from pdfminer.converter import TextConverter
+from pdfminer.layout import LAParams
+from pdfminer.pdfpage import PDFPage
+from docx import Document
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# Functions for file processing
+def extract_text_from_pdf(pdf_file):
+    resource_manager = PDFResourceManager()
+    output_string = io.StringIO()
+    codec = 'utf-8'
+    laparams = LAParams()
+    device = TextConverter(resource_manager, output_string, codec=codec, laparams=laparams)
+    interpreter = PDFPageInterpreter(resource_manager, device)
+    for page in PDFPage.get_pages(pdf_file, caching=True, check_extractable=True):
+        interpreter.process_page(page)
+    text = output_string.getvalue()
+    device.close()
+    output_string.close()
+    return text
+def extract_text_from_docx(docx_file):
+    doc = Document(docx_file)
+    full_text = []
+    for paragraph in doc.paragraphs:
+        full_text.append(paragraph.text)
+    return '\n'.join(full_text)
+# Functions for AI and Plagiarism detection
+@st.cache_resource
+def load_ai_detection_model():
+    try:
+        ai_detection = pipeline("text-classification", model="roberta-base-openai-detector")
+        return ai_detection
+    except Exception as e:
+        st.error(f"Error loading AI detection model: {e}")
+        return None
+def detect_ai_content(text, ai_detection_model):
+    try:
+        result = ai_detection_model(text)
+        return result
+    except Exception as e:
+        st.error(f"Error during AI content detection: {e}")
+        return None
 @st.cache_resource
+def load_plagiarism_model(model_name="jpwahle/longformer-base-plagiarism-detection"):
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSequenceClassification.from_pretrained(model_name)
+        return tokenizer, model
+    except Exception as e:
+        st.error(f"Error loading plagiarism detection model: {e}")
+        return None
+def plagiarism_check(text, tokenizer, model):
+    try:
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            predicted_class = torch.argmax(outputs.logits, dim=-1).item()
+        return predicted_class
+    except Exception as e:
+        st.error(f"Error during plagiarism detection: {e}")
+        return None
+# Streamlit app
 def main():
+    st.title("AI & Plagiarism Detection")
+    # Load models
+    ai_detection_model = load_ai_detection_model()
+    tokenizer, plagiarism_model = load_plagiarism_model()
+    # File uploader with custom styling
+    st.markdown(
+        """
+        <style>
+        .stFileUploader > div > div:nth-child(1) > div > button {
+            background-color: #4CAF50;
+            color: white;
+            padding: 10px 24px;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        .stFileUploader > div > div:nth-child(1) > div > button:hover {
+            background-color: #367C39;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    uploaded_file = st.file_uploader("Upload a file (PDF, DOCX)", type=["pdf", "docx"], help="Maximum file size: 1GB")
+    if uploaded_file is not None:
+        file_size = len(uploaded_file.getvalue())
+        if file_size > 1000000000:  # 1 GB limit
+            st.error("File size exceeds the 1GB limit.")
+            return
+        try:
+            if uploaded_file.type == "application/pdf":
+                raw_text = extract_text_from_pdf(uploaded_file)
+            elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+                raw_text = extract_text_from_docx(uploaded_file)
+            else:
+                raw_text = None
+                st.error("Unsupported file type")
+                return
+        except Exception as e:
+            st.error(f"Error processing file: {e}")
+            return
+        if raw_text:
+            # AI Detection
+            if ai_detection_model:
+                ai_result = detect_ai_content(raw_text, ai_detection_model)
+            else:
+                ai_result = None
+            # Plagiarism Check
+            if tokenizer and plagiarism_model:
+                plagiarism_result = plagiarism_check(raw_text, tokenizer, plagiarism_model)
+            else:
+                plagiarism_result = None
+            # Report Generation
+            st.subheader("Analysis Report")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("AI Detection:")
+                if ai_result:
+                    ai_label = ai_result[0]['label']
+                    ai_score = ai_result[0]['score']
+                    st.metric(label="AI Content", value=f"{ai_score:.2%}", delta=ai_label)
+                else:
+                    st.write("Not available")
+            with col2:
+                st.markdown("Plagiarism Detection:")
+                if plagiarism_result is not None:
+                    plagiarism_status = "Plagiarized" if plagiarism_result == 1 else "Original"
+                    st.metric(label="Plagiarism", value=plagiarism_status)
+                else:
+                    st.write("Not available")
 if __name__ == "__main__":
     main()