| import gradio as gr | |
| import re | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| import numpy as np | |
| model = AutoModelForSequenceClassification.from_pretrained("zionia/email-phishing-detector") | |
| tokenizer = AutoTokenizer.from_pretrained("zionia/email-phishing-detector") | |
| pipe = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
| PHISHY_KEYWORDS = ["verify", "urgent", "login", "click", "bank", "account", "update", "password", | |
| "security", "alert", "confirm", "immediately"] | |
| ATTACHMENT_KEYWORDS = [".xls", ".xlsx", ".pdf", ".doc", ".docx", "attachment", "attached", "file"] | |
| OPERATIONAL_KEYWORDS = ["nom", "actual", "vols", "schedule", "attached", "report", "data", "summary"] | |
| DATE_RELATED = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", | |
| "january", "february", "march", "april", "may", "june", "july", "august", | |
| "september", "october", "november", "december"} | {str(y) for y in range(2001, 2026)} | |
| def detect_phishing(email_text): | |
| result = pipe(email_text) | |
| label = result[0]['label'] | |
| score = result[0]['score'] | |
| if label == "LABEL_1": | |
| return f"Phishing detected! (Confidence: {score:.2%})" | |
| else: | |
| return f"Legitimate email (Confidence: {score:.2%})" | |
| def highlight_suspicious_text(email_text): | |
| highlighted = email_text | |
| for word in PHISHY_KEYWORDS: | |
| pattern = re.compile(rf'\b({re.escape(word)})\b', re.IGNORECASE) | |
| highlighted = pattern.sub(r'<mark style="background-color: #ffcccc">\1</mark>', highlighted) | |
| return highlighted | |
| def extract_features(email_text): | |
| tokens = email_text.lower().split() | |
| token_count = len(tokens) | |
| avg_token_len = sum(len(token) for token in tokens) / token_count if token_count > 0 else 0 | |
| date_tokens = sum(1 for token in tokens if token in DATE_RELATED) | |
| attachment_present = any(ext in email_text.lower() for ext in ATTACHMENT_KEYWORDS) | |
| operational_terms = any(word in email_text.lower() for word in OPERATIONAL_KEYWORDS) | |
| phishy_terms = [word for word in PHISHY_KEYWORDS if word in email_text.lower()] | |
| features = { | |
| "Text Length": len(email_text), | |
| "Token Count": token_count, | |
| "Avg Token Length": round(avg_token_len, 2), | |
| "Date References": date_tokens, | |
| "Contains Attachment": "Yes" if attachment_present else "No", | |
| "Operational Terms Present": "Yes" if operational_terms else "No", | |
| "Suspicious Keywords": ", ".join(phishy_terms) if phishy_terms else "None" | |
| } | |
| feature_str = "\n".join([f"{k}: {v}" for k, v in features.items()]) | |
| return feature_str | |
| with gr.Blocks(title="Email Phishing Detector") as app: | |
| gr.Markdown("# Zion's Email Phishing Detector") | |
| gr.Markdown("Use this tool to analyse suspicious emails. It will tell you if the email is legitimate or a phishing attempt!") | |
| with gr.Row(): | |
| email_input = gr.Textbox(label="Email Text", placeholder="Paste the email content here...", lines=10) | |
| with gr.Tabs(): | |
| with gr.TabItem("Detection"): | |
| detection_output = gr.Textbox(label="Result") | |
| with gr.TabItem("Suspicious Highlights"): | |
| suspicious_output = gr.HTML(label="Suspicious Keywords Highlighted") | |
| with gr.TabItem("Feature Breakdown"): | |
| feature_output = gr.Textbox(label="Analysed Features", lines=8) | |
| examples = [ | |
| ["Dear customer, your account has been compromised. Click here to verify your identity: http://bit.ly/2XyZABC"], | |
| ["Hi team, please review the attached document for our quarterly meeting tomorrow."], | |
| ["URGENT: Your PayPal account will be suspended unless you confirm your details now!"], | |
| ["Hello John, just following up on our conversation yesterday about the project timeline."], | |
| ["You've won a $1000 Amazon gift card! Click to claim your prize within 24 hours!"] | |
| ] | |
| gr.Examples( | |
| examples=examples, | |
| inputs=email_input | |
| ) | |
| def full_analysis(email_text): | |
| return detect_phishing(email_text), highlight_suspicious_text(email_text), extract_features(email_text) | |
| email_input.change(fn=full_analysis, inputs=email_input, | |
| outputs=[detection_output, suspicious_output, feature_output]) | |
| app.launch() | |