Spaces:

EdysorEdutech
/

human_final

Paused

App Files Files Community

EdysorEdutech commited on Jul 26, 2025

Commit

8660488

verified ·

1 Parent(s): 61dd362

Update app.py

Browse files

Files changed (1) hide show

app.py +530 -697

app.py CHANGED Viewed

@@ -26,34 +26,24 @@ class HumanLikeVariations:
     """Add human-like variations and intentional imperfections"""
     def __init__(self):
-        # Common human writing patterns - MASSIVELY EXPANDED for Originality AI
         self.casual_transitions = [
-            "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
-            "You know, ", "I mean, ", "Thing is, ", "Honestly, ",
-            "Look, ", "Listen, ", "See, ", "Okay, ", "Right, ",
-            "Anyway, ", "Besides, ", "Plus, ", "Also, ", "Oh, ",
-            "Hey, ", "Alright, ", "Sure, ", "Fine, ", "Obviously, ",
-            "Clearly, ", "Seriously, ", "Literally, ", "Frankly, ",
-            "To be honest, ", "Truth is, ", "In fact, ", "Believe it or not, ",
-            "Here's the thing, ", "Let me tell you, ", "Get this, ",
-            "Funny thing is, ", "Interestingly, ", "Surprisingly, ",
-            "Let's be real here, ", "Can we talk about ", "Quick question: ",
-            "Real talk: ", "Hot take: ", "Unpopular opinion: ", "Fun fact: ",
-            "Pro tip: ", "Side note: ", "Random thought: ", "Food for thought: ",
-            "Just saying, ", "Not gonna lie, ", "For what it's worth, ",
-            "If you ask me, ", "Between you and me, ", "Here's my take: ",
-            "Let's face it, ", "No kidding, ", "Seriously though, ",
-            "But wait, ", "Hold on, ", "Check this out: ", "Guess what? ",
-            "You know what? ", "Tell you what, ", "I'll be honest, ",
-            "Here's the deal: ", "Bottom line: ", "Long story short, ",
-            "Point is, ", "Fact is, ", "Reality is, ", "Thing is though, ",
-            "What's more, ", "Better yet, ", "Even better, ", "Even worse, ",
-            "Funny enough, ", "Weird thing is, ", "Strange but true: ",
-            "Believe me when I say, ", "Trust me on this, ", "I kid you not, ",
-            "No joke, ", "For real though, ", "I'm telling you, ",
-            "And get this - ", "But here's the kicker: ", "Plot twist: ",
-            "Spoiler alert: ", "News flash: ", "Reality check: ",
-            "Let me break it down: ", "Here's what happened: ", "So here's the thing: "
         ]
         self.filler_phrases = [
@@ -78,12 +68,7 @@ class HumanLikeVariations:
             "to the best of my knowledge", "if I'm not mistaken", "correct me if I'm wrong",
             "you know what", "here's the deal", "bottom line", "at any rate",
             "all in all", "when you think about it", "come to think of it",
-            "now that I think about it", "if we're being honest", "to be fair",
-            "like I said", "as I mentioned", "as we discussed", "going back to",
-            "on that note", "speaking of which", "which reminds me", "by the way",
-            "just a thought", "just my two cents", "if you ask me", "in my book",
-            "the way I see it", "from where I'm standing", "in my humble opinion",
-            "not to mention", "let alone", "much less", "aside from that"
         ]
         self.human_connectors = [
@@ -111,46 +96,34 @@ class HumanLikeVariations:
             ". And honestly?", ". But seriously,", ". And you know what?",
             ", which brings me to", ". This reminds me of", ", speaking of which",
             ". Funny enough,", ". Weird thing is,", ". Strange but true:",
-            ", and I mean", ". I'm not kidding when I say", ", and trust me on this",
-            ". But here's where it gets interesting:", ". Now here's the crazy part:",
-            ", and this is important", ", and this is key", ", and this matters because",
-            ". I'll tell you why:", ". Here's my reasoning:", ". Let me put it this way:",
-            ", which - by the way -", ", and - no joke -", ", but - and this is crucial -"
         ]
-        # NEW: Common human typos and variations - EXPANDED
         self.common_typos = {
-            "the": ["teh", "th", "hte", "thhe"],
-            "and": ["adn", "nad", "an", "andd"],
-            "that": ["taht", "htat", "tha", "thatt"],
-            "with": ["wiht", "wtih", "iwth", "withh"],
-            "have": ["ahve", "hvae", "hav", "havve"],
-            "from": ["form", "fro", "frmo", "fromm"],
-            "they": ["tehy", "thye", "htey", "tehyy"],
-            "which": ["whihc", "wich", "whcih", "whichh"],
-            "their": ["thier", "theri", "tehir", "theirr"],
-            "would": ["woudl", "wuold", "woul", "wouldd"],
-            "there": ["tehre", "theer", "ther", "theree"],
-            "could": ["coudl", "cuold", "coud", "couldd"],
-            "people": ["poeple", "peopel", "pepole", "peolpe"],
-            "through": ["thorugh", "throught", "trhough", "thoruhg"],
-            "because": ["becuase", "becasue", "beacuse", "becausee"],
-            "before": ["beofre", "befroe", "befor", "beforee"],
-            "different": ["differnt", "differnet", "diferent", "differrent"],
-            "between": ["bewteen", "betwen", "betewen", "beetween"],
-            "important": ["improtant", "importnat", "importan", "importantt"],
-            "information": ["infromation", "informaiton", "informaton", "informatoin"],
-            "really": ["realy", "raelly", "realyl", "reallyy"],
-            "something": ["someting", "somethign", "sometihng", "somethhing"],
-            "actually": ["actualy", "acutally", "atcually", "actuallyy"],
-            "probably": ["probaly", "probalby", "probabily", "probablyy"],
-            "definitely": ["definately", "definitly", "definatly", "defintely"],
-            "necessary": ["neccessary", "neccesary", "necessery", "nesessary"],
-            "government": ["goverment", "governmnet", "govermnet", "govenrment"],
-            "business": ["buisness", "busines", "businness", "bussiness"]
         }
-        # NEW: Human-like sentence starters for variety - MASSIVELY EXPANDED
         self.varied_starters = [
             "When it comes to", "As for", "Regarding", "In terms of",
             "With respect to", "Concerning", "Speaking of", "About",
@@ -167,81 +140,16 @@ class HumanLikeVariations:
             "You might wonder", "You might ask", "You may think",
             "Some people say", "Many believe", "It's often said",
             "Research shows", "Studies indicate", "Evidence suggests",
-            "Experience tells us", "History shows", "Time has shown",
-            "I've noticed that", "I've found that", "I've seen that",
-            "In my experience,", "From what I understand,", "As I see it,",
-            "Let me be clear:", "Let me clarify:", "To be specific:",
-            "Here's my thought:", "Here's my view:", "My take is:",
-            "Can we just acknowledge", "Let's be real about", "Time to admit",
-            "Nobody talks about how", "Everyone forgets that", "People overlook",
-            "It's funny how", "It's weird that", "It's strange how",
-            "Ever notice how", "Ever wonder why", "Ever think about",
-            "You gotta admit", "You have to agree", "You can't deny",
-            "I used to think", "I always thought", "I never realized",
-            "Turns out,", "As it happens,", "Funny story:",
-            "Real quick -", "Side note -", "Random thought -",
-            "Not to be that person, but", "Call me crazy, but", "Maybe it's just me, but",
-            "This might sound weird, but", "This might be controversial, but",
-            "Hot take:", "Unpopular opinion:", "Controversial thought:",
-            "Life hack:", "Pro tip:", "Word of advice:",
-            "Question for you:", "Riddle me this:", "Tell me this:",
-            "PSA:", "Reminder:", "Don't forget:",
-            "Breaking news:", "Update:", "FYI:",
-            "Confession time:", "True story:", "No lie:"
-        ]
-        # NEW: Personal opinions and reactions
-        self.personal_reactions = [
-            "And honestly? I'm here for it.",
-            "Which, like, blew my mind.",
-            "And I was like, wait, what?",
-            "Not gonna lie, this surprised me.",
-            "I mean, who would've thought?",
-            "This literally changed everything for me.",
-            "And that's when it hit me.",
-            "I had to do a double-take on this one.",
-            "This is where things get wild.",
-            "Okay, but here's where it gets good.",
-            "And this is the part that gets me every time.",
-            "I'm still processing this, to be honest.",
-            "This keeps me up at night, not gonna lie.",
-            "Every time I think about this, I'm amazed.",
-            "This is the kind of thing that makes you go 'hmm'.",
-            "And yes, I'm totally serious about this.",
-            "I know, I know, it sounds crazy, but hear me out.",
-            "This might be my favorite part, actually.",
-            "And this - this is why I love this topic.",
-            "Hold up, because this next part is crucial.",
-            "Brace yourself for this one.",
-            "You're gonna want to sit down for this.",
-            "This is the game-changer right here.",
-            "And this, my friends, is where the magic happens.",
-            "This right here? This is the good stuff.",
-            "I could talk about this all day, honestly.",
-            "This never gets old for me.",
-            "Every single time, this amazes me.",
-            "And boom - mind blown.",
-            "This is what we call a mic drop moment.",
-            "Can we just take a moment to appreciate this?",
-            "This deserves way more attention, if you ask me.",
-            "Why isn't everyone talking about this?",
-            "This should be common knowledge by now.",
-            "How is this not a bigger deal?",
-            "Seriously, why don't they teach this in school?",
-            "This changed my whole perspective, not even joking.",
-            "Once you see this, you can't unsee it.",
-            "This is one of those 'aha!' moments.",
-            "And that's when everything clicked for me."
         ]
     def add_human_touch(self, text):
-        """Add subtle human-like imperfections - ULTRA NATURAL PATTERNS"""
         sentences = text.split('. ')
         modified_sentences = []
         # Track what we've used to avoid patterns
-        used_transitions = set()
-        used_reactions = set()
         for i, sent in enumerate(sentences):
             if not sent.strip():
@@ -250,106 +158,23 @@ class HumanLikeVariations:
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
-            # Add personal voice (25% chance)
-            if random.random() < 0.25 and i > 0:
-                # Pick unused reaction
-                available_reactions = [r for r in self.personal_reactions if r not in used_reactions]
-                if available_reactions:
-                    reaction = random.choice(available_reactions)
-                    used_reactions.add(reaction)
-                    sent = sent + " " + reaction
-            # Add thinking-out-loud elements (20% chance)
-            if random.random() < 0.20 and len(sent.split()) > 10:
-                thinking_phrases = [
-                    "- wait, actually, ",
-                    "- hmm, let me think - ",
-                    "- okay so ",
-                    "- oh right, ",
-                    "- correction: ",
-                    "- or wait, maybe ",
-                    "- scratch that, "
                 ]
-                pos = random.randint(len(sent.split())//3, 2*len(sent.split())//3)
-                words = sent.split()
-                insert_phrase = random.choice(thinking_phrases)
-                words.insert(pos, insert_phrase)
-                sent = ' '.join(words)
-            # Add natural errors (15% chance)
-            if random.random() < 0.15 and len(sent.split()) > 15:
-                sent = self.add_realistic_errors(sent)
             modified_sentences.append(sent)
         return '. '.join(modified_sentences)
-    def add_realistic_errors(self, text):
-        """Add very realistic human errors"""
-        error_type = random.choice(['typo', 'double_word', 'comma', 'homophone', 'capitalization'])
-        if error_type == 'typo':
-            words = text.split()
-            if len(words) > 5:
-                # Pick a common word to typo
-                for _ in range(3):  # Try 3 times to find a typo-able word
-                    idx = random.randint(2, len(words)-2)
-                    word = words[idx].lower().strip('.,!?;:')
-                    if word in self.common_typos:
-                        typo = random.choice(self.common_typos[word])
-                        # Preserve original capitalization and punctuation
-                        if words[idx][0].isupper():
-                            typo = typo[0].upper() + typo[1:]
-                        # Re-add punctuation if any
-                        if words[idx][-1] in '.,!?;:':
-                            typo += words[idx][-1]
-                        words[idx] = typo
-                        break
-                text = ' '.join(words)
-        elif error_type == 'double_word':
-            words = text.split()
-            if len(words) > 10:
-                # Common words that get doubled
-                double_candidates = ['the', 'a', 'to', 'in', 'on', 'at', 'for', 'and', 'but', 'or']
-                for _ in range(3):
-                    idx = random.randint(3, len(words)-3)
-                    if words[idx].lower() in double_candidates:
-                        words.insert(idx+1, words[idx].lower())
-                        break
-                text = ' '.join(words)
-        elif error_type == 'comma':
-            # Remove Oxford comma or add unnecessary comma
-            if ', and' in text and random.random() < 0.5:
-                text = text.replace(', and', ' and', 1)
-            elif ' and' in text and ', and' not in text and random.random() < 0.3:
-                text = text.replace(' and', ', and', 1)
-        elif error_type == 'homophone':
-            homophones = [
-                ('your', "you're"), ("you're", 'your'),
-                ('its', "it's"), ("it's", 'its'),
-                ('their', 'there'), ('there', 'their'),
-                ('then', 'than'), ('than', 'then'),
-                ('to', 'too'), ('effect', 'affect')
-            ]
-            for original, replacement in homophones:
-                if f' {original} ' in text and random.random() < 0.3:
-                    text = text.replace(f' {original} ', f' {replacement} ', 1)
-                    break
-        elif error_type == 'capitalization':
-            # Occasionally fail to capitalize after period
-            matches = list(re.finditer(r'\. ([a-z])', text))
-            if matches and random.random() < 0.3:
-                match = random.choice(matches)
-                # Don't change if it's a common lowercase starter like "e.g."
-                if match.group(1) not in ['e', 'i', 'v']:
-                    text = text  # Keep lowercase for more natural error
-        return text
     def apply_contractions(self, text):
         """Apply common contractions - EXPANDED"""
         contractions = {
@@ -370,47 +195,75 @@ class HumanLikeVariations:
             "we would": "we'd", "they would": "they'd", "could have": "could've",
             "should have": "should've", "would have": "would've", "might have": "might've",
             "must have": "must've", "there has": "there's", "here is": "here's",
-            "let us": "let's", "that will": "that'll", "who will": "who'll",
-            "shall not": "shan't", "need not": "needn't", "dare not": "daren't",
-            "ought not": "oughtn't", "might not": "mightn't", "must not": "mustn't",
-            "there are": "there're", "where are": "where're", "what are": "what're",
-            "how are": "how're", "why are": "why're", "who are": "who're"
         }
-        # Apply contractions with very high probability (95%)
         for full, contr in contractions.items():
-            if random.random() < 0.95:
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
-        """Add very minor, human-like errors - ULTRA REALISTIC"""
-        # Occasionally miss Oxford comma (20% chance)
-        if random.random() < 0.20:
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
-        # Sometimes use 'which' instead of 'that' (10% chance)
-        if random.random() < 0.10:
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
-            for match in list(matches)[:1]:
-                if match.group(1).lower() not in ['believe', 'think', 'know', 'say', 'so']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
-        # Add occasional typos (5% chance per sentence)
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
-            if random.random() < 0.05 and len(sent.split()) > 15:
-                sent = self.add_realistic_errors(sent)
-                sentences[i] = sent
         text = '. '.join(sentences)
-        # Mix up dash styles occasionally
-        if random.random() < 0.15:
-            text = text.replace(' - ', ' – ', 1)  # em dash
-        elif random.random() < 0.15:
-            text = text.replace(' - ', ' — ', 1)  # en dash
         return text
@@ -426,86 +279,79 @@ class HumanLikeVariations:
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
-            # Add varied sentence starters (30% chance)
-            if random.random() < 0.30 and i > 0:
-                starter = random.choice(self.varied_starters)
-                sentence = starter + " " + sentence[0].lower() + sentence[1:] if len(sentence) > 1 else sentence
-            # Add filler phrases naturally (25% chance)
-            if random.random() < 0.25 and len(sentence.split()) > 8:
                 words = sentence.split()
-                pos = random.randint(2, len(words)-2)
-                filler = random.choice(self.filler_phrases)
-                words.insert(pos, filler)
-                sentence = ' '.join(words)
-            # Add personal asides (15% chance)
-            if random.random() < 0.15:
-                asides = [
-                    " (yeah, I know)",
-                    " (trust me on this)",
-                    " (I checked)",
-                    " (not even kidding)",
-                    " (seriously)",
-                    " (I mean it)",
-                    " (for real)",
-                    " (no joke)",
-                    " (true story)",
-                    " (I promise)"
-                ]
-                aside_pos = random.randint(len(sentence)//3, 2*len(sentence)//3)
-                sentence = sentence[:aside_pos] + random.choice(asides) + sentence[aside_pos:]
-            # Natural sentence combinations (25% chance)
-            if i < len(sentences) - 1 and random.random() < 0.25:
                 next_sent = sentences[i+1].strip()
-                if next_sent and len(sentence.split()) + len(next_sent.split()) < 30:
-                    connector = random.choice(self.human_connectors)
-                    sentence = sentence.rstrip('.') + connector + next_sent[0].lower() + next_sent[1:]
-                    sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
         return ' '.join([s for s in result_sentences if s])
-    def split_into_sentences_advanced(self, text):
-        """Split text into sentences"""
-        # Simple regex-based splitting
-        sentences = re.split(r'(?<=[.!?])\s+', text)
-        return [s for s in sentences if s and len(s.strip()) > 0]
     def vary_sentence_start(self, sentence):
         """Vary sentence beginning to avoid repetitive patterns"""
-        if not sentence or len(sentence.split()) < 5:
             return sentence
-        # Much more variety in sentence transformations
         variations = [
-            lambda s: random.choice(self.varied_starters) + " " + s[0].lower() + s[1:],
-            lambda s: "You know what? " + s,
-            lambda s: "Here's the thing: " + s[0].lower() + s[1:],
-            lambda s: "Funny enough, " + s[0].lower() + s[1:],
-            lambda s: s + " Just saying.",
-            lambda s: s + " Think about it.",
-            lambda s: s + " Makes sense, right?",
-            lambda s: "Okay, so " + s[0].lower() + s[1:],
-            lambda s: "Real talk - " + s[0].lower() + s[1:],
-            lambda s: s + " And that's facts.",
-            lambda s: "Not gonna lie, " + s[0].lower() + s[1:],
-            lambda s: s + " Period.",
-            lambda s: "Can we talk about how " + s[0].lower() + s[1:] + "?",
             lambda s: s,  # Keep original sometimes
         ]
-        # Higher chance of variation
-        if random.random() < 0.4:
-            variation = random.choice(variations)
-            try:
-                return variation(sentence)
-            except:
-                return sentence
-        return sentence
 class SelectiveGrammarFixer:
     """Minimal grammar fixes to maintain human-like quality while fixing critical errors"""
@@ -551,6 +397,9 @@ class SelectiveGrammarFixer:
         result = ' '.join(fixed_sentences)
         return result
     def fix_basic_punctuation_errors(self, text):
@@ -558,42 +407,42 @@ class SelectiveGrammarFixer:
         if not text:
             return text
-        # Fix double spaces (human-like error to keep some)
-        text = re.sub(r'\s{3,}', ' ', text)  # Only fix 3+ spaces
-        # Fix space before punctuation (but might keep some for naturalness)
-        if random.random() < 0.8:  # 80% chance to fix
-            text = re.sub(r'\s+([.,!?;:])', r'\1', text)
         # Fix missing space after punctuation (human-like)
         text = re.sub(r'([.,!?])([A-Z])', r'\1 \2', text)
-        # Fix accidental double punctuation (but keep some ..)
-        text = re.sub(r'([!?])\1+', r'\1', text)
-        text = re.sub(r'\.{4,}', '...', text)  # Fix 4+ periods to ellipsis
-        # Fix "i" capitalization (but miss some for naturalness)
-        if random.random() < 0.9:  # 90% chance to fix
-            text = re.sub(r'\bi\b', 'I', text)
         return text
     def preserve_natural_variations(self, text):
         """Keep some natural human-like variations"""
         # Only fix if really broken
         if text.count('.') == 0 and len(text.split()) > 20:
             # Long text with no periods - needs fixing
             words = text.split()
-            # Add periods every 15-25 words naturally
             new_text = []
             for i, word in enumerate(words):
                 new_text.append(word)
-                if i > 0 and i % random.randint(15, 30) == 0:
                     if word[-1] not in '.!?,;:':
                         new_text[-1] = word + '.'
-                        # Capitalize next word
                         if i + 1 < len(words) and words[i + 1][0].islower():
-                            words[i + 1] = words[i + 1][0].upper() + words[i + 1][1:]
             text = ' '.join(new_text)
         return text
@@ -631,12 +480,12 @@ class EnhancedDipperHumanizer:
                 print("spaCy model not found, using NLTK for sentence splitting")
         try:
-            # Load Dipper paraphraser
             print("Loading Dipper paraphraser model...")
             self.tokenizer = T5Tokenizer.from_pretrained('google/t5-v1_1-xxl')
             self.model = T5ForConditionalGeneration.from_pretrained(
                 "kalpeshk2011/dipper-paraphraser-xxl",
-                device_map="auto",
                 torch_dtype=torch.float16,
                 low_cpu_mem_usage=True
             )
@@ -667,7 +516,7 @@ class EnhancedDipperHumanizer:
             self.bart_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "eugenesiow/bart-paraphrase",
                 torch_dtype=torch.float16,
-                device_map="auto"
             )
             self.bart_tokenizer = AutoTokenizer.from_pretrained("eugenesiow/bart-paraphrase")
             self.use_bart = True
@@ -680,16 +529,118 @@ class EnhancedDipperHumanizer:
         self.human_variations = HumanLikeVariations()
     def add_natural_human_patterns(self, text):
-        """Add natural human writing patterns"""
-        return self.human_variations.add_natural_human_patterns(text)
     def vary_sentence_start(self, sentence):
-        """Vary sentence beginning"""
-        return self.human_variations.vary_sentence_start(sentence)
     def apply_contractions(self, text):
-        """Apply contractions"""
-        return self.human_variations.apply_contractions(text)
     def preserve_keywords(self, text, keywords):
         """Mark keywords to preserve them during paraphrasing"""
@@ -723,7 +674,7 @@ class EnhancedDipperHumanizer:
         return modified_text, keyword_map
     def restore_keywords_robust(self, text, keyword_map):
-        """Restore keywords with more flexible pattern matching - FIXED VERSION"""
         if not keyword_map:
             return text
@@ -753,9 +704,8 @@ class EnhancedDipperHumanizer:
             if match:
                 num = match.group(1)
-                # EXPANDED patterns to catch more variations
                 patterns = [
-                    # Standard variations
                     (f'__KW{num}__', keyword),
                     (f'__ KW{num}__', keyword),
                     (f'__KW {num}__', keyword),
@@ -770,97 +720,32 @@ class EnhancedDipperHumanizer:
                     (f'__KW{num}_', keyword),
                     (f'_KW{num}__', keyword),
                     (f'kw{num}', keyword),
-                    (f'``KW{num}__', keyword),
-                    (f'``KKW{num}', keyword),
-                    # New patterns to catch the issues in your output
-                    (f'KW{num}:', keyword),  # Catches "KW0:12:"
-                    (f'KW{num}123', keyword),  # Catches "KW0123"
-                    (f'Kw{num}', keyword),
-                    (f'kW{num}', keyword),
-                    (f'KW{num}[^0-9]', keyword),  # Catches KW followed by non-digit
-                    (f'KW{num}(?![0-9])', keyword),  # Lookahead to ensure no digit follows
-                    # Patterns with spaces and punctuation
-                    (f'KW {num}:', keyword),
-                    (f'KW{num} ', keyword),
-                    (f' KW{num}', keyword),
-                    (f'KW{num},', keyword),
-                    (f'KW{num}.', keyword),
-                    (f'KW{num};', keyword),
-                    (f'KW{num}!', keyword),
-                    (f'KW{num}?', keyword),
-                    # Triple patterns (for "KW kw kw")
-                    (f'KW kw kw', keyword),
-                    (f'kw kw kw', keyword),
-                    (f'Kw kw kw', keyword),
                 ]
                 for pattern, replacement in patterns:
-                    # Use regex for more flexible matching
-                    if '(?!' in pattern or '[^' in pattern:
-                        # This is already a regex pattern
-                        regex_pattern = pattern
-                    else:
-                        # Escape the pattern for regex
-                        regex_pattern = re.escape(pattern)
-                    matches = list(re.finditer(regex_pattern, restored_text))
-                    for match in matches:
-                        start_pos = match.start()
-                        end_pos = match.end()
                         # Check if this position has already been replaced
-                        if not any(pos in replaced_positions for pos in range(start_pos, end_pos)):
-                            print(f"Found pattern '{match.group()}' at position {start_pos}, replacing with {replacement}")
-                            # Replace this specific occurrence
-                            before = restored_text[:start_pos]
-                            after = restored_text[end_pos:]
-                            restored_text = before + replacement + after
                             # Mark new positions as replaced
-                            replaced_positions.update(range(start_pos, start_pos + len(replacement)))
-                            # Break after first replacement to avoid issues
-                            break
-        # Third pass: Clean up any remaining KW patterns with numbers
-        # This catches cases like "KW0:12:" where the number might vary
-        remaining_kw_patterns = re.findall(r'\bKW\d+[:;.,!?\s]|\bKW\d+\d+\b|\bKw\d+\b|\bkw\d+\b|\bKW\s*kw\s*kw\b', restored_text)
-        if remaining_kw_patterns:
-            print(f"Found remaining KW patterns: {remaining_kw_patterns}")
-            # Replace remaining patterns with keywords in order
-            keyword_values = list(keyword_map.values())
-            keyword_index = 0
-            for pattern in remaining_kw_patterns:
-                if keyword_index < len(keyword_values):
-                    # Find the position of this pattern
-                    pattern_pos = restored_text.find(pattern)
-                    if pattern_pos != -1 and not any(pos in replaced_positions for pos in range(pattern_pos, pattern_pos + len(pattern))):
-                        # Extract just the KW part and any trailing punctuation
-                        clean_pattern = pattern.rstrip('0123456789:;.,!?\s')
-                        trailing = pattern[len(clean_pattern):]
-                        # Replace with keyword + any trailing punctuation
-                        replacement = keyword_values[keyword_index]
-                        if trailing and trailing[0] in ':;.,!?':
-                            replacement += trailing[0]
-                        before = restored_text[:pattern_pos]
-                        after = restored_text[pattern_pos + len(pattern):]
-                        restored_text = before + replacement + after
-                        replaced_positions.update(range(pattern_pos, pattern_pos + len(replacement)))
-                        keyword_index += 1
-        # Fourth pass: Clean up any backticks or quotes that shouldn't be there
         restored_text = re.sub(r'``+', '', restored_text)
         restored_text = re.sub(r"''", '"', restored_text)
         restored_text = re.sub(r'""', '"', restored_text)
-        # Fifth pass: Look for any remaining underscore patterns
         if '___' in restored_text and keyword_map:
             # Find all occurrences of multiple underscores
             underscore_matches = list(re.finditer(r'_{3,}', restored_text))
@@ -878,13 +763,10 @@ class EnhancedDipperHumanizer:
                         replaced_positions.update(range(start, start + len(keyword_values[i])))
         # Final cleanup: Remove any remaining KW patterns that weren't caught
-        # This is a last resort to clean up any stragglers
-        restored_text = re.sub(r'\bKW\d+\b', '', restored_text)
-        restored_text = re.sub(r'\bKw\d+\b', '', restored_text)
-        restored_text = re.sub(r'\bkw\d+\b', '', restored_text)
-        # Clean up any double spaces created by removals
-        restored_text = re.sub(r'\s+', ' ', restored_text)
         # Log final result
         print(f"Final restored text: {restored_text[:100]}...")
@@ -914,6 +796,7 @@ class EnhancedDipperHumanizer:
             return True
         # Special handling for content inside tables
         if parent:
             # Check if we're inside a table
             is_in_table = any(p.name == 'table' for p in parent.parents)
@@ -941,7 +824,7 @@ class EnhancedDipperHumanizer:
             if any(handler in parent.attrs for handler in event_handlers):
                 return True
-        # Special check for testimonial cards
         if parent:
             ancestors_to_check = []
             current = parent
@@ -960,7 +843,7 @@ class EnhancedDipperHumanizer:
                     elif isinstance(classes, str) and 'testimonial-card' in classes:
                         return True
-        # Skip if parent or element has skip-worthy classes/IDs
         skip_indicators = [
             'cta-', 'button', 'btn', 'heading', 'title', 'caption',
             'toc-', 'contents', 'quiz', 'tip', 'note', 'alert',
@@ -974,7 +857,7 @@ class EnhancedDipperHumanizer:
             'comparision-tables', 'process-flowcharts', 'infographics', 'cost-breakdown'
         ]
-        # Check only immediate parent and grandparent
         elements_to_check = [parent]
         if parent and parent.parent:
             elements_to_check.append(parent.parent)
@@ -1043,7 +926,7 @@ class EnhancedDipperHumanizer:
         return False
     def clean_model_output_enhanced(self, text):
-        """Enhanced cleaning that preserves more natural structure and keywords"""
         if not text:
             return ""
@@ -1075,20 +958,15 @@ class EnhancedDipperHumanizer:
         text = re.sub(r'- or maybe I should say -', '', text)
         text = re.sub(r'- or rather,', '', text)
         text = re.sub(r'- think about it -', '', text)
-        text = re.sub(r'- hmm, let me think -', '', text)
-        text = re.sub(r'- correction:', '', text)
-        text = re.sub(r'- or wait, maybe', '', text)
-        text = re.sub(r'- scratch that,', '', text)
         # Clean up multiple spaces
         text = re.sub(r'\s+', ' ', text)
-        # IMPORTANT: Be very careful about removing leading characters
-        # Check for keyword placeholders more thoroughly
-        if not re.match(r'^(__KW\d+__|_?KW\d+|kw\d+|Kw\d+)', text):
-            # Only remove leading non-letter characters if it's definitely not a placeholder
-            # But be more conservative - only remove clearly wrong characters
-            text = re.sub(r'^[^\w_]+', '', text)
         # If we accidentally removed too much, use original
         if len(text) < len(original) * 0.5:
@@ -1122,17 +1000,17 @@ class EnhancedDipperHumanizer:
                 continue
             try:
-                # MAXIMUM diversity for Originality AI
                 has_keywords = any(placeholder in sentence for placeholder in keyword_map.keys())
                 if has_keywords:
-                    lex_diversity = 70  # High for keywords
-                    order_diversity = 30
                 elif len(sentence.split()) < 10:
-                    lex_diversity = 90  # Very high for short
-                    order_diversity = 45
                 else:
-                    lex_diversity = 98  # MAXIMUM diversity
-                    order_diversity = 60  # MAXIMUM order diversity
                 lex_code = int(100 - lex_diversity)
                 order_code = int(100 - order_diversity)
@@ -1159,23 +1037,23 @@ class EnhancedDipperHumanizer:
                 else:
                     inputs = {k: v.to(self.device) for k, v in inputs.items()}
-                # Generate with maximum variation
                 original_length = len(sentence.split())
-                max_new_length = int(original_length * 1.5)
-                # Maximum variation parameters
-                temp = 1.0 if has_keywords else 1.4
-                top_p_val = 0.92
                 with torch.no_grad():
                     outputs = self.model.generate(
                         **inputs,
                         max_length=max_new_length + 20,
-                        min_length=max(5, int(original_length * 0.6)),
                         do_sample=True,
                         top_p=top_p_val,
                         temperature=temp,
-                        no_repeat_ngram_size=5,  # Allow more repetition for naturalness
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
@@ -1267,8 +1145,8 @@ class EnhancedDipperHumanizer:
             last_word = words[-1]
             # Remove if it's clearly cut off (1-2 chars, no vowels)
-            # But don't remove valid short words
-            short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go', 'as', 'it'}
             if (len(last_word) <= 2 and
                 last_word.lower() not in short_valid_words and
                 not any(c in 'aeiouAEIOU' for c in last_word)):
@@ -1289,7 +1167,7 @@ class EnhancedDipperHumanizer:
                     generated += '.'
             elif orig_stripped.endswith('!'):
                 # Check if generated seems exclamatory
-                exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent', 'wow', 'oh']
                 if any(word in generated.lower() for word in exclaim_words):
                     generated += '!'
                 else:
@@ -1359,12 +1237,12 @@ class EnhancedDipperHumanizer:
                 with torch.no_grad():
                     outputs = self.bart_model.generate(
                         **inputs,
-                        max_length=int(original_length * 1.5) + 10,
-                        min_length=max(5, int(original_length * 0.5)),
                         num_beams=2,
-                        temperature=1.2,  # Higher temperature for more variation
                         do_sample=True,
-                        top_p=0.92,
                         early_stopping=True
                     )
@@ -1390,13 +1268,12 @@ class EnhancedDipperHumanizer:
             return text
     def apply_sentence_variation(self, text):
-        """Apply natural sentence structure variations - ULTRA HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
         # Track patterns to ensure variety
         last_sentence_length = 0
-        sentence_rhythms = []
         for i, sentence in enumerate(sentences):
             if not sentence.strip():
@@ -1405,154 +1282,39 @@ class EnhancedDipperHumanizer:
             words = sentence.split()
             current_length = len(words)
-            # Create natural rhythm variation
-            if i > 0:
-                # After short sentence, maybe go longer
-                if last_sentence_length < 10 and random.random() < 0.7:
-                    # Add elaboration
-                    elaborations = [
-                        " Let me explain what I mean.",
-                        " Here's why this matters.",
-                        " And that's just the beginning.",
-                        " But there's more to it.",
-                        " This is important to understand.",
-                        " Think about the implications.",
-                        " Consider what this means."
-                    ]
-                    sentence += random.choice(elaborations)
-                # After long sentence, maybe go shorter
-                elif last_sentence_length > 25 and random.random() < 0.6:
-                    # Truncate if possible
-                    if ',' in sentence and sentence.count(',') > 1:
-                        # Keep only first part
-                        parts = sentence.split(',')
-                        sentence = parts[0] + '.'
-            # Natural sentence combinations for flow
             if (i < len(sentences) - 1 and
-                current_length < 12 and
-                len(sentences[i+1].split()) < 12 and
-                random.random() < 0.35):
                 next_sent = sentences[i+1].strip()
-                # Combine with natural connectors
-                connectors = [
-                    ', and ', ', but ', ', so ', ', which means ',
-                    ' - and ', ' - but ', ', though ',
-                    '. Actually, ', '. Plus, ', '. Also, '
-                ]
-                connector = random.choice(connectors)
-                if connector.startswith('.'):
-                    combined = sentence + connector + next_sent
-                else:
-                    combined = sentence.rstrip('.') + connector + next_sent[0].lower() + next_sent[1:]
-                varied_sentences.append(combined)
-                sentences[i+1] = ""
-                last_sentence_length = len(combined.split())
-                continue
-            # Add rhetorical questions occasionally
-            if random.random() < 0.08 and i < len(sentences) - 1:
-                rhetorical = [
-                    " Make sense?",
-                    " See what I mean?",
-                    " Getting the picture?",
-                    " Following me so far?",
-                    " Sound familiar?",
-                    " Crazy, right?",
-                    " Wild, isn't it?"
-                ]
-                sentence += random.choice(rhetorical)
             varied_sentences.append(sentence)
             last_sentence_length = current_length
         return ' '.join([s for s in varied_sentences if s])
-    def add_natural_flow_variations(self, text):
-        """Add more natural flow and rhythm variations for Originality AI"""
-        sentences = self.split_into_sentences_advanced(text)
-        enhanced_sentences = []
-        for i, sentence in enumerate(sentences):
-            if not sentence.strip():
-                continue
-            # Add stream-of-consciousness elements (15% chance)
-            if random.random() < 0.15 and len(sentence.split()) > 10:
-                stream_elements = [
-                    " - wait, actually, ",
-                    " - hmm, ",
-                    " - okay so ",
-                    " - oh right, ",
-                    " - correction: ",
-                    " - or wait, maybe ",
-                    " - scratch that, "
-                ]
-                words = sentence.split()
-                pos = random.randint(len(words)//4, 3*len(words)//4)
-                words.insert(pos, random.choice(stream_elements))
-                sentence = ' '.join(words)
-            # Add human-like self-corrections (10% chance)
-            if random.random() < 0.10:
-                corrections = [
-                    " - or rather, ",
-                    " - well, actually, ",
-                    " - I mean, ",
-                    " - or should I say, ",
-                    " - correction: ",
-                    " - let me rephrase: ",
-                    " - wait, no, "
-                ]
-                words = sentence.split()
-                if len(words) > 8:
-                    pos = random.randint(len(words)//2, len(words)-3)
-                    correction = random.choice(corrections)
-                    words.insert(pos, correction)
-                sentence = ' '.join(words)
-            # Add thinking-out-loud patterns (12% chance)
-            if random.random() < 0.12 and i > 0:
-                thinking_patterns = [
-                    "Come to think of it, ",
-                    "Actually, you know what? ",
-                    "Wait, here's a thought: ",
-                    "Oh, and another thing - ",
-                    "Speaking of which, ",
-                    "This reminds me, ",
-                    "Now that I mention it, ",
-                    "Funny you should ask, because ",
-                    "You know what's interesting? ",
-                    "Here's what gets me: ",
-                    "Can I be honest? ",
-                    "Between you and me, "
-                ]
-                pattern = random.choice(thinking_patterns)
-                sentence = pattern + sentence[0].lower() + sentence[1:] if len(sentence) > 1 else sentence
-            # Add emphatic repetitions (8% chance)
-            if random.random() < 0.08 and len(sentence.split()) > 6:
-                # Find a key word to repeat for emphasis
-                words = sentence.split()
-                important_words = [w for w in words if len(w) > 4 and w[0].islower()]
-                if important_words:
-                    word_to_repeat = random.choice(important_words)
-                    emphatic_patterns = [
-                        f". {word_to_repeat.capitalize()}.",
-                        f" - yes, {word_to_repeat} -",
-                        f". I said {word_to_repeat}.",
-                        f" ({word_to_repeat}!)",
-                        f". {word_to_repeat.capitalize()}, people!"
-                    ]
-                    sentence += random.choice(emphatic_patterns)
-            enhanced_sentences.append(sentence)
-        return ' '.join(enhanced_sentences)
     def fix_punctuation(self, text):
         """Comprehensive punctuation and formatting fixes"""
         if not text:
@@ -1564,27 +1326,26 @@ class EnhancedDipperHumanizer:
         # Fix weird symbols and characters using safe replacements
         text = text.replace('<>', '')  # Remove empty angle brackets
-        # Normalize quotes
         text = text.replace('«', '"').replace('»', '"')
         text = text.replace('„', '"').replace('"', '"').replace('"', '"')
         text = text.replace(''', "'").replace(''', "'")
         text = text.replace('–', '-').replace('—', '-')
         # Fix colon issues
-        text = re.sub(r'\.:', ':', text)
-        text = re.sub(r':\s*\.', ':', text)
-        # Fix basic spacing (but keep some human errors)
-        text = re.sub(r'\s{3,}', ' ', text)  # Only fix 3+ spaces
-        if random.random() < 0.9:  # 90% chance to fix
-            text = re.sub(r'\s+([.,!?;:])', r'\1', text)
-        text = re.sub(r'([.,!?;:])\s*([.,!?;:])', r'\1', text)
-        text = re.sub(r'([.!?])\s*\1+', r'\1', text)
         # Fix colons
-        text = re.sub(r':\s*([.,!?])', ':', text)
-        text = re.sub(r'([.,!?])\s*:', ':', text)
-        text = re.sub(r':+', ':', text)
         # Fix quotes and parentheses
         text = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', text)
@@ -1592,6 +1353,7 @@ class EnhancedDipperHumanizer:
         text = re.sub(r'\(\s*([^)]*?)\s*\)', r'(\1)', text)
         # Fix sentence capitalization more carefully
         sentences = re.split(r'(?<=[.!?])\s+', text)
         fixed_sentences = []
@@ -1599,44 +1361,51 @@ class EnhancedDipperHumanizer:
             if not sentence:
                 continue
-            # Only capitalize if needed
             words = sentence.split()
             if words:
                 first_word = words[0]
                 if (first_word[0].islower() and
                     not self.is_likely_acronym_or_proper_noun(first_word) and
                     not first_word.startswith('__KW') and
                     not first_word.startswith('_kw')):
                     sentence = first_word[0].upper() + first_word[1:] + ' ' + ' '.join(words[1:])
             fixed_sentences.append(sentence)
         text = ' '.join(fixed_sentences)
-        # Fix common issues (but not all for naturalness)
-        if random.random() < 0.95:  # 95% chance to fix
-            text = re.sub(r'\bi\b', 'I', text)
-        text = re.sub(r'\.{4,}', '...', text)  # Fix 4+ periods
-        text = re.sub(r',{3,}', ',', text)  # Fix 3+ commas
         # Fix abbreviations
         text = re.sub(r'\betc\s*\.\s*\.', 'etc.', text)
         text = re.sub(r'\be\.g\s*\.\s*[,\s]', 'e.g., ', text)
         text = re.sub(r'\bi\.e\s*\.\s*[,\s]', 'i.e., ', text)
-        # Fix numbers with periods
         text = re.sub(r'(\d+)\.\s+', r'\1. ', text)
         # Fix bold/strong tags punctuation
         text = self.fix_bold_punctuation(text)
-        # Clean up remaining issues
-        text = re.sub(r'\s+([.,!?;:])', r'\1', text)
-        text = re.sub(r'([.,!?;:])\s{2,}', r'\1 ', text)
         # Ensure ending punctuation
         text = text.strip()
         if text and text[-1] not in '.!?':
             if not text.endswith(':'):
                 text += '.'
@@ -1646,11 +1415,13 @@ class EnhancedDipperHumanizer:
         """Fix punctuation issues around bold/strong tags"""
         # Check if this is likely a list item with colon pattern
         def is_list_item_with_colon(text):
             list_pattern = r'^\s*(?:[-•*▪▫◦‣⁃]\s*)?<(?:strong|b)>[^<]+:</(?:strong|b)>'
             return bool(re.search(list_pattern, text))
         # If it's a list item with colon, preserve the format
         if is_list_item_with_colon(text):
             text = re.sub(r'<(strong|b)>\s*([^:]+)\s*:\s*</\1>', r'<\1>\2:</\1>', text)
             return text
@@ -1666,12 +1437,14 @@ class EnhancedDipperHumanizer:
             # Check if this is a list header (contains colon at the end)
             if content.endswith(':'):
                 return f'<{tag}>{content}</{tag}>'
             # Remove any periods at the start or end of bold content
             content = content.strip('.')
             # Check if this bold text is at the start of a sentence
             start_pos = match.start()
             is_sentence_start = (start_pos == 0 or
                                (start_pos > 2 and text[start_pos-2:start_pos] in ['. ', '! ', '? ', '\n\n']))
@@ -1685,24 +1458,25 @@ class EnhancedDipperHumanizer:
         # Fix bold/strong tags
         text = re.sub(bold_pattern, fix_bold_match, text)
-        # Fix spacing around bold/strong tags
         if not is_list_item_with_colon(text):
-            text = re.sub(r'\.\s*<(strong|b)>', r'. <\1>', text)
-            text = re.sub(r'</(strong|b)>\s*\.', r'</\1>.', text)
-            text = re.sub(r'([.!?])\s*<(strong|b)>', r'\1 <\2>', text)
-            text = re.sub(r'</(strong|b)>\s+([a-z])', lambda m: f'</{m.group(1)}> {m.group(2)}', text)
             # Remove duplicate periods around bold tags
             text = re.sub(r'\.\s*</(strong|b)>\s*\.', r'</\1>.', text)
             text = re.sub(r'\.\s*<(strong|b)>\s*\.', r'. <\1>', text)
             # Fix cases where bold content ends a sentence
             text = re.sub(r'</(strong|b)>\s+([A-Z])', r'</\1>. \2', text)
         # Don't remove these for list items
         if not is_list_item_with_colon(text):
-            text = re.sub(r'<(strong|b)>\s*:\s*</\1>', ':', text)
-            text = re.sub(r'<(strong|b)>\s*\.\s*</\1>', '.', text)
         return text
@@ -1711,7 +1485,7 @@ class EnhancedDipperHumanizer:
         soup = BeautifulSoup(html_content, 'html.parser')
         text_elements = []
-        # Get all text nodes
         for element in soup.find_all(string=True):
             # Skip script, style, and noscript content completely
             if element.parent.name in ['script', 'style', 'noscript']:
@@ -1733,11 +1507,11 @@ class EnhancedDipperHumanizer:
         html_text = re.sub(r'<!\s*DOCTYPE', '<!DOCTYPE', html_text, flags=re.IGNORECASE)
         # Fix spacing issues
-        html_text = re.sub(r'>\s+<', '><', html_text)
-        html_text = re.sub(r'\s+>', '>', html_text)
-        html_text = re.sub(r'<\s+', '<', html_text)
-        # Fix common word errors
         html_text = html_text.replace('down loaded', 'downloaded')
         html_text = html_text.replace('But your document', 'Your document')
@@ -1751,6 +1525,7 @@ class EnhancedDipperHumanizer:
         # Find all paragraph tags
         for p_tag in soup.find_all('p'):
             # Skip paragraphs that are inside special elements
             skip_parents = ['div.author-intro', 'div.cta-box', 'div.testimonial-card',
                           'div.news-box', 'button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
                           'div.quiz-container', 'div.question-container', 'div.results']
@@ -1799,6 +1574,7 @@ class EnhancedDipperHumanizer:
                     continue
                 # Skip if the text node's immediate parent isn't the p tag
                 if text_node.parent != p_tag:
                     continue
@@ -1836,6 +1612,68 @@ class EnhancedDipperHumanizer:
                         text_node.insert_after(new_node)
                     text_node.extract()
     def process_html(self, html_content, primary_keywords="", secondary_keywords="", progress_callback=None):
         """Main processing function with progress callback"""
         if not html_content.strip():
@@ -1868,9 +1706,10 @@ class EnhancedDipperHumanizer:
         # Combine keywords and clean them
         all_keywords = []
         if primary_keywords:
             for k in primary_keywords.split(','):
                 cleaned = k.strip()
-                if cleaned and len(cleaned) > 1:
                     all_keywords.append(cleaned)
         if secondary_keywords:
             for k in secondary_keywords.split(','):
@@ -1915,7 +1754,7 @@ class EnhancedDipperHumanizer:
                 if text_has_keywords:
                     print(f"Debug: Processing text with keywords: {original_text[:50]}...")
-                # First pass with Dipper (with maximum diversity)
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
                     keywords=all_keywords
@@ -1924,7 +1763,7 @@ class EnhancedDipperHumanizer:
                 # Verify no placeholders remain
                 if '__KW' in paraphrased_text or '___' in paraphrased_text:
                     print(f"Warning: Placeholder or underscores found in paraphrased text: {paraphrased_text[:100]}...")
-                    # Try to restore again
                     temp_map = {}
                     for j, keyword in enumerate(all_keywords):
                         temp_map[f'__KW{j:03d}__'] = keyword
@@ -1932,27 +1771,24 @@ class EnhancedDipperHumanizer:
                 # Second pass with BART for longer texts (increased probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
-                    # 60% chance to use BART for maximum variation
-                    if random.random() < 0.6:
                         paraphrased_text = self.paraphrase_with_bart(
                             paraphrased_text,
                             keywords=all_keywords
                         )
-                # Apply maximum sentence variation
                 paraphrased_text = self.apply_sentence_variation(paraphrased_text)
                 # Add natural flow variations
                 paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
-                # Add extra human touch
-                paraphrased_text = self.human_variations.add_human_touch(paraphrased_text)
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
-                # Final check for any remaining placeholders
-                if '___' in paraphrased_text or '__KW' in paraphrased_text or 'KW0' in paraphrased_text:
                     print(f"Error: Unresolved placeholders in final text")
                     # Use original text if we can't resolve placeholders
                     paraphrased_text = original_text
@@ -1973,20 +1809,17 @@ class EnhancedDipperHumanizer:
             # Wrap keywords with <strong> tags in paragraphs
             self.wrap_keywords_in_paragraphs(soup, all_keywords)
-            # Post-process the entire HTML
             result = str(soup)
             result = self.post_process_html(result)
-            # Final safety check for any remaining placeholders
-            if '__KW' in result or re.search(r'_{3,}', result) or re.search(r'\bKW\d+', result):
-                print("Warning: Found placeholders in final HTML output")
-                # Attempt final cleanup
                 for i, keyword in enumerate(all_keywords):
                     result = result.replace(f'__KW{i:03d}__', keyword)
-                    result = re.sub(f'\\bKW{i:03d}\\b', keyword, result)
-                    result = re.sub(f'\\bKW{i}\\b', keyword, result)
-                result = re.sub(r'_{3,}', '', result)
-                result = re.sub(r'\bKW\d+\b', '', result)
             # Restore all script tags
             for idx, script_content in enumerate(preserved_scripts):
@@ -2001,7 +1834,7 @@ class EnhancedDipperHumanizer:
             # Validate and fix HTML syntax
             result = self.validate_and_fix_html(result)
-            # Count skipped elements
             all_text_elements = soup.find_all(string=True)
             skipped = len([e for e in all_text_elements if e.strip() and e.parent.name not in ['script', 'style', 'noscript']]) - total_elements
@@ -2015,13 +1848,14 @@ class EnhancedDipperHumanizer:
             import traceback
             error_msg = f"Error processing HTML: {str(e)}\n{traceback.format_exc()}"
             print(error_msg)
             return f"<!-- {error_msg} -->\n{html_content}"
     def post_process_html(self, html_text):
         """Post-process the entire HTML to fix formatting issues"""
-        # Fix empty angle brackets
-        html_text = re.sub(r'<>\s*([^<>]+?)\s*(?=\.|\s|<)', r'\1', html_text)
-        html_text = re.sub(r'<>', '', html_text)
         # Fix double angle brackets around bold tags
         html_text = re.sub(r'<<b>>', '<b>', html_text)
@@ -2030,9 +1864,9 @@ class EnhancedDipperHumanizer:
         html_text = re.sub(r'<</strong>>', '</strong>', html_text)
         # Fix periods around bold/strong tags
-        html_text = re.sub(r'\.\s*<(b|strong)>', '. <\1>', html_text)
-        html_text = re.sub(r'</(b|strong)>\s*\.', '</\1>.', html_text)
-        html_text = re.sub(r'\.<<(b|strong)>>', '. <\1>', html_text)
         html_text = re.sub(r'</(b|strong)>>\.', '</\1>.', html_text)
         # Fix periods after colons
@@ -2044,15 +1878,19 @@ class EnhancedDipperHumanizer:
             # Check if this line contains a list pattern with bold
             list_pattern = r'(?:^|\s)(?:[-•*▪▫◦‣⁃]\s*)?<(?:strong|b)>[^<]+:</(?:strong|b)>'
             if re.search(list_pattern, line):
                 return line
             # Not a list item, apply regular fixes
             line = re.sub(r'<(strong|b)>\s*\.\s*([^<]+)\s*\.\s*</\1>', r'<\1>\2</\1>', line)
             line = re.sub(r'</(strong|b)>\s*([.!?])', r'</\1>\2', line)
             return line
-        # Process line by line
         lines = html_text.split('\n')
         processed_lines = [process_line(line) for line in lines]
         html_text = '\n'.join(processed_lines)
@@ -2078,7 +1916,8 @@ class EnhancedDipperHumanizer:
         # Look for bold/strong tags and check their context
         html_text = re.sub(r'(^|.*?)(<(?:strong|b)>)([a-zA-Z])', fix_bold_sentence_start, html_text)
-        # Clean up spacing around bold tags
         segments = re.split(r'(<(?:strong|b)>[^<]*:</(?:strong|b)>)', html_text)
         cleaned_segments = []
@@ -2089,7 +1928,9 @@ class EnhancedDipperHumanizer:
                 # Apply spacing fixes to non-list segments
                 segment = re.sub(r'\s+<(strong|b)>', r' <\1>', segment)
                 segment = re.sub(r'</(strong|b)>\s+', r'</\1> ', segment)
                 segment = re.sub(r'([.,!?;:])\s*([.,!?;:])', r'\1', segment)
                 segment = re.sub(r'\.<(strong|b)>\.', '. <\1>', segment)
                 segment = re.sub(r'\.</(strong|b)>\.', '</\1>.', segment)
                 cleaned_segments.append(segment)
@@ -2097,15 +1938,16 @@ class EnhancedDipperHumanizer:
         html_text = ''.join(cleaned_segments)
         # Final cleanup
-        html_text = re.sub(r'\.{2,}', '.', html_text)
-        html_text = re.sub(r',{2,}', ',', html_text)
-        html_text = re.sub(r':{2,}', ':', html_text)
-        html_text = re.sub(r'\s+([.,!?;:])', r'\1', html_text)
-        # Fix empty bold tags
         html_text = re.sub(r'<(strong|b)>\s*</\1>', '', html_text)
         # Fix specific patterns in lists/stats
         html_text = re.sub(r'(\d+[,\d]*\+?)\s*\.\s*\n', r'\1\n', html_text)
         # Clean up any remaining double brackets
@@ -2169,33 +2011,24 @@ iface = gr.Interface(
         lines=10,
         label="Humanized HTML Output"
     ),
-    title="Enhanced Dipper AI Humanizer v2 - 100% Human Score Optimized",
     description="""
     Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
-    🚀 **NEW ENHANCEMENTS for 100% Human Score:**
-    - **MAXIMUM diversity settings** (98% lexical, 60% order) for extreme variation
-    - **Personal voice injection**: Opinions, reactions, and thinking-out-loud elements
-    - **Stream-of-consciousness patterns**: Natural self-corrections and tangents
-    - **Emphatic repetitions**: Human-like emphasis patterns ("Yes, I said X!")
-    - **Ultra-realistic errors**: Typos, double words, homophone mix-ups
-    - **Rhetorical questions**: "Make sense?" "Following me?" "Wild, right?"
-    - **60+ casual transitions**: From "So," to "Plot twist:" to "Between you and me,"
-    - **Natural asides**: "(yeah, I know)" "(trust me on this)" "(not even kidding)"
-    ✅ **Key Features:**
-    - Fixed keyword restoration system - no more KW0 or placeholder issues!
     - Keywords inside <p> tags are automatically wrapped with <strong> tags
-    - Preserves all HTML structure, scripts, and styles
-    - Skips headings, CTAs, tables, testimonials, and existing bold/strong content
-    - Designed to fool even the strictest AI detectors!
-    💡 **Tips for Best Results:**
-    - Use 3-5 primary keywords for best preservation
-    - Longer content = better humanization results
-    - Processing takes 5-10 minutes for large documents
-    The tool creates genuinely human-like writing that passes Originality AI's strictest tests!
     """,
     examples=[
         ["""<article>

     """Add human-like variations and intentional imperfections"""
     def __init__(self):
+        # Common human writing patterns - EXPANDED for Originality AI
         self.casual_transitions = [
+             "So, ", "Well, ", "Now, ", "Actually, ", "Basically, ",
+             "You know, ", "I mean, ", "Thing is, ", "Honestly, ",
+             "Look, ", "Listen, ", "See, ", "Okay, ", "Right, ",
+             "Anyway, ", "Besides, ", "Plus, ", "Also, ", "Oh, ",
+             "Hey, ", "Alright, ", "Sure, ", "Fine, ", "Obviously, ",
+             "Clearly, ", "Seriously, ", "Literally, ", "Frankly, ",
+             "To be honest, ", "Truth is, ", "In fact, ", "Believe it or not, ",
+             "Here's the thing, ", "Let me tell you, ", "Get this, ",
+             "Funny thing is, ", "Interestingly, ", "Surprisingly, ",
+             "Let's be real here, ", "Can we talk about ", "Quick question: ",
+             "Real talk: ", "Hot take: ", "Unpopular opinion: ", "Fun fact: ",
+             "Pro tip: ", "Side note: ", "Random thought: ", "Food for thought: ",
+             "Just saying, ", "Not gonna lie, ", "For what it's worth, ",
+             "If you ask me, ", "Between you and me, ", "Here's my take: ",
+             "Let's face it, ", "No kidding, ", "Seriously though, ",
+             "But wait, ", "Hold on, ", "Check this out: ", "Guess what? "
         ]
         self.filler_phrases = [
             "to the best of my knowledge", "if I'm not mistaken", "correct me if I'm wrong",
             "you know what", "here's the deal", "bottom line", "at any rate",
             "all in all", "when you think about it", "come to think of it",
+            "now that I think about it", "if we're being honest", "to be fair"
         ]
         self.human_connectors = [
             ". And honestly?", ". But seriously,", ". And you know what?",
             ", which brings me to", ". This reminds me of", ", speaking of which",
             ". Funny enough,", ". Weird thing is,", ". Strange but true:",
+            ", and I mean", ". I'm not kidding when I say", ", and trust me on this"
         ]
+        # NEW: Common human typos and variations
         self.common_typos = {
+            "the": ["teh", "th", "hte"],
+            "and": ["adn", "nad", "an"],
+            "that": ["taht", "htat", "tha"],
+            "with": ["wiht", "wtih", "iwth"],
+            "have": ["ahve", "hvae", "hav"],
+            "from": ["form", "fro", "frmo"],
+            "they": ["tehy", "thye", "htey"],
+            "which": ["whihc", "wich", "whcih"],
+            "their": ["thier", "theri", "tehir"],
+            "would": ["woudl", "wuold", "woul"],
+            "there": ["tehre", "theer", "ther"],
+            "could": ["coudl", "cuold", "coud"],
+            "people": ["poeple", "peopel", "pepole"],
+            "through": ["thorugh", "throught", "trhough"],
+            "because": ["becuase", "becasue", "beacuse"],
+            "before": ["beofre", "befroe", "befor"],
+            "different": ["differnt", "differnet", "diferent"],
+            "between": ["bewteen", "betwen", "betewen"],
+            "important": ["improtant", "importnat", "importan"],
+            "information": ["infromation", "informaiton", "informaton"]
         }
+        # NEW: Human-like sentence starters for variety
         self.varied_starters = [
             "When it comes to", "As for", "Regarding", "In terms of",
             "With respect to", "Concerning", "Speaking of", "About",
             "You might wonder", "You might ask", "You may think",
             "Some people say", "Many believe", "It's often said",
             "Research shows", "Studies indicate", "Evidence suggests",
+            "Experience tells us", "History shows", "Time has shown"
         ]
     def add_human_touch(self, text):
+        """Add subtle human-like imperfections - NATURAL PATTERNS ONLY"""
         sentences = text.split('. ')
         modified_sentences = []
         # Track what we've used to avoid patterns
+        used_transitions = []
         for i, sent in enumerate(sentences):
             if not sent.strip():
             # Always use contractions where natural
             sent = self.apply_contractions(sent)
+            # Add VERY occasional natural errors (5% chance)
+            if random.random() < 0.05 and len(sent.split()) > 15:
+                error_types = [
+                    # Missing comma in compound sentence
+                    lambda s: s.replace(", and", " and", 1) if ", and" in s else s,
+                    # Wrong homophone
+                    lambda s: s.replace("their", "there", 1) if "their" in s and random.random() < 0.3 else s,
+                    # Missing apostrophe
+                    lambda s: s.replace("it's", "its", 1) if "it's" in s and random.random() < 0.3 else s,
                 ]
+                error_func = random.choice(error_types)
+                sent = error_func(sent)
             modified_sentences.append(sent)
         return '. '.join(modified_sentences)
     def apply_contractions(self, text):
         """Apply common contractions - EXPANDED"""
         contractions = {
             "we would": "we'd", "they would": "they'd", "could have": "could've",
             "should have": "should've", "would have": "would've", "might have": "might've",
             "must have": "must've", "there has": "there's", "here is": "here's",
+            "let us": "let's", "that will": "that'll", "who will": "who'll"
         }
         for full, contr in contractions.items():
+            if random.random() < 0.8:  # 80% chance to apply each contraction
                 text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
         return text
     def add_minor_errors(self, text):
+        """Add very minor, human-like errors - MORE REALISTIC BUT CONTROLLED"""
+        # Occasionally miss Oxford comma (15% chance)
+        if random.random() < 0.15:
+            # Only in lists, not random commas
             text = re.sub(r'(\w+), (\w+), and (\w+)', r'\1, \2 and \3', text)
+        # Sometimes use 'which' instead of 'that' (8% chance)
+        if random.random() < 0.08:
+            # Only for non-restrictive clauses
             matches = re.finditer(r'\b(\w+) that (\w+)', text)
+            for match in list(matches)[:1]:  # Only first occurrence
+                if match.group(1).lower() not in ['believe', 'think', 'know', 'say']:
                     text = text.replace(match.group(0), f"{match.group(1)} which {match.group(2)}", 1)
+        # NEW: Add very occasional typos (2% chance per sentence) - REDUCED AND CONTROLLED
         sentences = text.split('. ')
         for i, sent in enumerate(sentences):
+            if random.random() < 0.02 and len(sent.split()) > 15:  # Only in longer sentences
+                words = sent.split()
+                # Pick a random word to potentially typo
+                word_idx = random.randint(len(words)//2, len(words)-2)  # Avoid start/end
+                word = words[word_idx].lower()
+                # Only typo common words where typo won't break meaning
+                safe_typos = {
+                    'the': 'teh',
+                    'and': 'adn',
+                    'that': 'taht',
+                    'with': 'wtih',
+                    'from': 'form',
+                    'because': 'becuase'
+                }
+                if word in safe_typos and random.random() < 0.5:
+                    typo = safe_typos[word]
+                    # Preserve original capitalization
+                    if words[word_idx][0].isupper():
+                        typo = typo[0].upper() + typo[1:]
+                    words[word_idx] = typo
+                    sentences[i] = ' '.join(words)
         text = '. '.join(sentences)
+        # Skip double words - too distracting
+        # Mix up common homophones occasionally (2% chance) - ONLY SAFE ONES
+        if random.random() < 0.02:
+            safe_homophones = [
+                ('its', "it's"),  # Very common mistake
+                ('your', "you're"),  # Another common one
+            ]
+            for pair in safe_homophones:
+                # Check context to avoid breaking meaning
+                if f" {pair[0]} " in text and random.random() < 0.3:
+                    # Find one instance and check it's safe to replace
+                    pattern = rf'\b{pair[0]}\s+(\w+ing|\w+ed)\b'  # its + verb = likely should be it's
+                    if re.search(pattern, text):
+                        text = re.sub(pattern, f"{pair[1]} \\1", text, count=1)
+                        break
         return text
             # Natural contractions throughout
             sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (15% chance)
+            if random.random() < 0.15 and len(sentence.split()) > 10:
+                # Natural interruptions that humans actually use
+                if random.random() < 0.5:
+                    # Add "you know" or "I mean" naturally
+                    words = sentence.split()
+                    if len(words) > 6:
+                        pos = random.randint(3, len(words)-3)
+                        if random.random() < 0.5:
+                            words.insert(pos, "you know,")
+                        else:
+                            words.insert(pos, "I mean,")
+                        sentence = ' '.join(words)
+                else:
+                    # Start with natural opener
+                    openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
+                    sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (8% chance)
+            if random.random() < 0.08:
                 words = sentence.split()
+                if len(words) > 5:
+                    # Common comma omissions
+                    if ", and" in sentence and random.random() < 0.3:
+                        sentence = sentence.replace(", and", " and", 1)
+                    # Double words occasionally
+                    elif random.random() < 0.2:
+                        idx = random.randint(1, len(words)-2)
+                        if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
+                            words.insert(idx+1, words[idx])
+                            sentence = ' '.join(words)
+            # Natural sentence combinations (20% chance)
+            if i < len(sentences) - 1 and random.random() < 0.2:
                 next_sent = sentences[i+1].strip()
+                if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
+                    # Natural connectors based on content
+                    if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
+                        sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
+                        sentences[i+1] = ""  # Mark as processed
+                    elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
+                        sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
+                        sentences[i+1] = ""  # Mark as processed
             result_sentences.append(sentence)
         return ' '.join([s for s in result_sentences if s])
     def vary_sentence_start(self, sentence):
         """Vary sentence beginning to avoid repetitive patterns"""
+        if not sentence:
+            return sentence
+        words = sentence.split()
+        if len(words) < 5:
             return sentence
+        # Different ways to start sentences naturally
         variations = [
+            lambda s: "When " + s[0].lower() + s[1:] + ", it makes sense.",
+            lambda s: "If you think about it, " + s[0].lower() + s[1:],
+            lambda s: s + " This is important.",
+            lambda s: "The thing about " + words[0].lower() + " " + ' '.join(words[1:]) + " is clear.",
+            lambda s: "What's interesting is " + s[0].lower() + s[1:],
             lambda s: s,  # Keep original sometimes
         ]
+        # Pick a random variation
+        variation = random.choice(variations)
+        try:
+            return variation(sentence)
+        except:
+            return sentence
 class SelectiveGrammarFixer:
     """Minimal grammar fixes to maintain human-like quality while fixing critical errors"""
         result = ' '.join(fixed_sentences)
+        # Add natural human variations (but we need to reference the main class method)
+        # This will be called from the smart_fix method instead
         return result
     def fix_basic_punctuation_errors(self, text):
         if not text:
             return text
+        # Fix double spaces (human-like error)
+        text = re.sub(r'\s{2,}', ' ', text)
+        # Fix space before punctuation (common error)
+        text = re.sub(r'\s+([.,!?;:])', r'\1', text)
         # Fix missing space after punctuation (human-like)
         text = re.sub(r'([.,!?])([A-Z])', r'\1 \2', text)
+        # Fix accidental double punctuation
+        text = re.sub(r'([.!?])\1+', r'\1', text)
+        # Fix "i" capitalization (common human error to fix)
+        text = re.sub(r'\bi\b', 'I', text)
         return text
     def preserve_natural_variations(self, text):
         """Keep some natural human-like variations"""
+        # Don't fix everything - leave some variety
         # Only fix if really broken
         if text.count('.') == 0 and len(text.split()) > 20:
             # Long text with no periods - needs fixing
             words = text.split()
+            # Add periods every 15-25 words naturally (more variation)
             new_text = []
             for i, word in enumerate(words):
                 new_text.append(word)
+                if i > 0 and i % random.randint(12, 25) == 0:
                     if word[-1] not in '.!?,;:':
                         new_text[-1] = word + '.'
+                        # Capitalize next word if it's not an acronym
                         if i + 1 < len(words) and words[i + 1][0].islower():
+                            # Check if it's not likely an acronym
+                            if not words[i + 1].isupper():
+                                words[i + 1] = words[i + 1][0].upper() + words[i + 1][1:]
             text = ' '.join(new_text)
         return text
                 print("spaCy model not found, using NLTK for sentence splitting")
         try:
+            # Load Dipper paraphraser WITHOUT 8-bit quantization for better performance
             print("Loading Dipper paraphraser model...")
             self.tokenizer = T5Tokenizer.from_pretrained('google/t5-v1_1-xxl')
             self.model = T5ForConditionalGeneration.from_pretrained(
                 "kalpeshk2011/dipper-paraphraser-xxl",
+                device_map="auto",  # This will distribute across 4xL40S automatically
                 torch_dtype=torch.float16,
                 low_cpu_mem_usage=True
             )
             self.bart_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "eugenesiow/bart-paraphrase",
                 torch_dtype=torch.float16,
+                device_map="auto"  # Distribute across GPUs
             )
             self.bart_tokenizer = AutoTokenizer.from_pretrained("eugenesiow/bart-paraphrase")
             self.use_bart = True
         self.human_variations = HumanLikeVariations()
     def add_natural_human_patterns(self, text):
+        """Add natural human writing patterns that Originality AI associates with human text"""
+        sentences = self.split_into_sentences_advanced(text)
+        result_sentences = []
+        for i, sentence in enumerate(sentences):
+            if not sentence.strip():
+                continue
+            # Natural contractions throughout
+            sentence = self.apply_contractions(sentence)
+            # Add natural speech patterns (15% chance)
+            if random.random() < 0.15 and len(sentence.split()) > 10:
+                # Natural interruptions that humans actually use
+                if random.random() < 0.5:
+                    # Add "you know" or "I mean" naturally
+                    words = sentence.split()
+                    if len(words) > 6:
+                        pos = random.randint(3, len(words)-3)
+                        if random.random() < 0.5:
+                            words.insert(pos, "you know,")
+                        else:
+                            words.insert(pos, "I mean,")
+                        sentence = ' '.join(words)
+                else:
+                    # Start with natural opener
+                    openers = ["Look,", "See,", "Thing is,", "Honestly,", "Actually,"]
+                    sentence = random.choice(openers) + " " + sentence[0].lower() + sentence[1:]
+            # Add subtle errors that humans make (8% chance)
+            if random.random() < 0.08:
+                words = sentence.split()
+                if len(words) > 5:
+                    # Common comma omissions
+                    if ", and" in sentence and random.random() < 0.3:
+                        sentence = sentence.replace(", and", " and", 1)
+                    # Double words occasionally
+                    elif random.random() < 0.2:
+                        idx = random.randint(1, len(words)-2)
+                        if words[idx].lower() in ['the', 'a', 'to', 'in', 'on', 'at']:
+                            words.insert(idx+1, words[idx])
+                            sentence = ' '.join(words)
+            # Natural sentence combinations (20% chance)
+            if i < len(sentences) - 1 and random.random() < 0.2:
+                next_sent = sentences[i+1].strip()
+                if next_sent and len(sentence.split()) + len(next_sent.split()) < 25:
+                    # Natural connectors based on content
+                    if any(w in next_sent.lower() for w in ['but', 'however', 'although']):
+                        sentence = sentence.rstrip('.') + ", but " + next_sent[0].lower() + next_sent[1:]
+                        sentences[i+1] = ""  # Mark as processed
+                    elif any(w in next_sent.lower() for w in ['also', 'too', 'as well']):
+                        sentence = sentence.rstrip('.') + " and " + next_sent[0].lower() + next_sent[1:]
+                        sentences[i+1] = ""  # Mark as processed
+            result_sentences.append(sentence)
+        return ' '.join([s for s in result_sentences if s])
     def vary_sentence_start(self, sentence):
+        """Vary sentence beginning to avoid repetitive patterns"""
+        if not sentence:
+            return sentence
+        words = sentence.split()
+        if len(words) < 5:
+            return sentence
+        # Different ways to start sentences naturally
+        variations = [
+            lambda s: "When " + s[0].lower() + s[1:] + ", it makes sense.",
+            lambda s: "If you think about it, " + s[0].lower() + s[1:],
+            lambda s: s + " This is important.",
+            lambda s: "The thing about " + words[0].lower() + " " + ' '.join(words[1:]) + " is clear.",
+            lambda s: "What's interesting is " + s[0].lower() + s[1:],
+            lambda s: s,  # Keep original sometimes
+        ]
+        # Pick a random variation
+        variation = random.choice(variations)
+        try:
+            return variation(sentence)
+        except:
+            return sentence
     def apply_contractions(self, text):
+        """Apply common contractions to make text more natural"""
+        contractions = {
+            "it is": "it's", "that is": "that's", "there is": "there's",
+            "he is": "he's", "she is": "she's", "what is": "what's",
+            "where is": "where's", "who is": "who's", "how is": "how's",
+            "cannot": "can't", "will not": "won't", "do not": "don't",
+            "does not": "doesn't", "did not": "didn't", "could not": "couldn't",
+            "should not": "shouldn't", "would not": "wouldn't", "is not": "isn't",
+            "are not": "aren't", "was not": "wasn't", "were not": "weren't",
+            "have not": "haven't", "has not": "hasn't", "had not": "hadn't",
+            "I am": "I'm", "you are": "you're", "we are": "we're",
+            "they are": "they're", "I have": "I've", "you have": "you've",
+            "we have": "we've", "they have": "they've", "I will": "I'll",
+            "you will": "you'll", "he will": "he'll", "she will": "she'll",
+            "we will": "we'll", "they will": "they'll", "I would": "I'd",
+            "you would": "you'd", "he would": "he'd", "she would": "she'd",
+            "we would": "we'd", "they would": "they'd", "could have": "could've",
+            "should have": "should've", "would have": "would've", "might have": "might've",
+            "must have": "must've", "there has": "there's", "here is": "here's",
+            "let us": "let's", "that will": "that'll", "who will": "who'll"
+        }
+        for full, contr in contractions.items():
+            text = re.sub(r'\b' + full + r'\b', contr, text, flags=re.IGNORECASE)
+        return text
     def preserve_keywords(self, text, keywords):
         """Mark keywords to preserve them during paraphrasing"""
         return modified_text, keyword_map
     def restore_keywords_robust(self, text, keyword_map):
+        """Restore keywords with more flexible pattern matching"""
         if not keyword_map:
             return text
             if match:
                 num = match.group(1)
+                # Various patterns the model might create
                 patterns = [
                     (f'__KW{num}__', keyword),
                     (f'__ KW{num}__', keyword),
                     (f'__KW {num}__', keyword),
                     (f'__KW{num}_', keyword),
                     (f'_KW{num}__', keyword),
                     (f'kw{num}', keyword),
+                    (f'``KW{num}__', keyword),  # Handle backtick corruption
+                    (f'``KKW{num}', keyword),    # Handle double K corruption
+                    (f'KW{num}', keyword),       # Simple pattern
                 ]
                 for pattern, replacement in patterns:
+                    if pattern in restored_text:
                         # Check if this position has already been replaced
+                        start_pos = restored_text.find(pattern)
+                        if start_pos != -1 and not any(pos in replaced_positions for pos in range(start_pos, start_pos + len(pattern))):
+                            print(f"Found pattern '{pattern}', replacing with {replacement}")
+                            restored_text = restored_text.replace(pattern, replacement, 1)  # Replace only first occurrence
                             # Mark new positions as replaced
+                            for match in re.finditer(re.escape(replacement), restored_text):
+                                replaced_positions.update(range(match.start(), match.end()))
+                            break  # Move to next placeholder after successful replacement
+        # Third pass: Clean up any backticks or quotes that shouldn't be there
+        # Remove double backticks
         restored_text = re.sub(r'``+', '', restored_text)
+        # Fix double quotes
         restored_text = re.sub(r"''", '"', restored_text)
         restored_text = re.sub(r'""', '"', restored_text)
+        # Fourth pass: Look for remaining underscore patterns
+        # But be more careful about replacement
         if '___' in restored_text and keyword_map:
             # Find all occurrences of multiple underscores
             underscore_matches = list(re.finditer(r'_{3,}', restored_text))
                         replaced_positions.update(range(start, start + len(keyword_values[i])))
         # Final cleanup: Remove any remaining KW patterns that weren't caught
+        # But only if they're not part of an already replaced keyword
+        remaining_kw_patterns = re.findall(r'\bKW\d{3}\b', restored_text)
+        if remaining_kw_patterns:
+            print(f"Warning: Found remaining KW patterns: {remaining_kw_patterns}")
         # Log final result
         print(f"Final restored text: {restored_text[:100]}...")
             return True
         # Special handling for content inside tables
+        # Skip if it's inside strong/b/h1-h6 tags AND also inside a table
         if parent:
             # Check if we're inside a table
             is_in_table = any(p.name == 'table' for p in parent.parents)
             if any(handler in parent.attrs for handler in event_handlers):
                 return True
+        # Special check for testimonial cards - check up to 3 levels of ancestors
         if parent:
             ancestors_to_check = []
             current = parent
                     elif isinstance(classes, str) and 'testimonial-card' in classes:
                         return True
+        # Skip if IMMEDIATE parent or element itself has skip-worthy classes/IDs
         skip_indicators = [
             'cta-', 'button', 'btn', 'heading', 'title', 'caption',
             'toc-', 'contents', 'quiz', 'tip', 'note', 'alert',
             'comparision-tables', 'process-flowcharts', 'infographics', 'cost-breakdown'
         ]
+        # Check only immediate parent and grandparent (not all ancestors)
         elements_to_check = [parent]
         if parent and parent.parent:
             elements_to_check.append(parent.parent)
         return False
     def clean_model_output_enhanced(self, text):
+        """Enhanced cleaning that preserves more natural structure"""
         if not text:
             return ""
         text = re.sub(r'- or maybe I should say -', '', text)
         text = re.sub(r'- or rather,', '', text)
         text = re.sub(r'- think about it -', '', text)
         # Clean up multiple spaces
         text = re.sub(r'\s+', ' ', text)
+        # Remove leading non-letter characters carefully
+        # IMPORTANT: Preserve keyword placeholders
+        if not re.match(r'^(__KW\d+__|KW\d+)', text):
+            # Only remove if it doesn't start with a placeholder
+            text = re.sub(r'^[^a-zA-Z_]+', '', text)
         # If we accidentally removed too much, use original
         if len(text) < len(original) * 0.5:
                 continue
             try:
+                # ULTRA-HIGH diversity for Originality AI
                 has_keywords = any(placeholder in sentence for placeholder in keyword_map.keys())
                 if has_keywords:
+                    lex_diversity = 60  # Moderate for keywords
+                    order_diversity = 20
                 elif len(sentence.split()) < 10:
+                    lex_diversity = 85  # Very high for short
+                    order_diversity = 40
                 else:
+                    lex_diversity = 95  # Maximum diversity
+                    order_diversity = 50  # Maximum order diversity
                 lex_code = int(100 - lex_diversity)
                 order_code = int(100 - order_diversity)
                 else:
                     inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                # Generate with appropriate variation
                 original_length = len(sentence.split())
+                max_new_length = int(original_length * 1.4)
+                # High variation parameters
+                temp = 0.95 if has_keywords else 1.3
+                top_p_val = 0.9
                 with torch.no_grad():
                     outputs = self.model.generate(
                         **inputs,
                         max_length=max_new_length + 20,
+                        min_length=max(5, int(original_length * 0.7)),
                         do_sample=True,
                         top_p=top_p_val,
                         temperature=temp,
+                        no_repeat_ngram_size=4,  # Allow more repetition for naturalness
                         num_beams=1,  # Greedy for more randomness
                         early_stopping=True
                     )
             last_word = words[-1]
             # Remove if it's clearly cut off (1-2 chars, no vowels)
+            # But don't remove valid short words like "is", "of", "to", etc.
+            short_valid_words = {'is', 'of', 'to', 'in', 'on', 'at', 'by', 'or', 'if', 'so', 'up', 'no', 'we', 'he', 'me', 'be', 'do', 'go'}
             if (len(last_word) <= 2 and
                 last_word.lower() not in short_valid_words and
                 not any(c in 'aeiouAEIOU' for c in last_word)):
                     generated += '.'
             elif orig_stripped.endswith('!'):
                 # Check if generated seems exclamatory
+                exclaim_words = ['amazing', 'incredible', 'fantastic', 'terrible', 'awful', 'wonderful', 'excellent']
                 if any(word in generated.lower() for word in exclaim_words):
                     generated += '!'
                 else:
                 with torch.no_grad():
                     outputs = self.bart_model.generate(
                         **inputs,
+                        max_length=int(original_length * 1.4) + 10,
+                        min_length=max(5, int(original_length * 0.6)),
                         num_beams=2,
+                        temperature=1.1,  # Higher temperature
                         do_sample=True,
+                        top_p=0.9,
                         early_stopping=True
                     )
             return text
     def apply_sentence_variation(self, text):
+        """Apply natural sentence structure variations - HUMAN-LIKE FLOW"""
         sentences = self.split_into_sentences_advanced(text)
         varied_sentences = []
         # Track patterns to ensure variety
         last_sentence_length = 0
         for i, sentence in enumerate(sentences):
             if not sentence.strip():
             words = sentence.split()
             current_length = len(words)
+            # Natural sentence length variation
+            if last_sentence_length > 20 and current_length > 20:
+                # Break up if two long sentences in a row
+                if ',' in sentence:
+                    parts = sentence.split(',', 1)
+                    if len(parts) == 2 and len(parts[1].split()) > 8:
+                        varied_sentences.append(parts[0].strip() + '.')
+                        second_part = parts[1].strip()
+                        if second_part and second_part[0].islower():
+                            second_part = second_part[0].upper() + second_part[1:]
+                        varied_sentences.append(second_part)
+                        last_sentence_length = len(parts[1].split())
+                        continue
+            # Natural combinations for flow
             if (i < len(sentences) - 1 and
+                current_length < 10 and
+                len(sentences[i+1].split()) < 10):
                 next_sent = sentences[i+1].strip()
+                # Only combine if it makes semantic sense
+                if next_sent and any(next_sent.lower().startswith(w) for w in ['it', 'this', 'that', 'which']):
+                    combined = sentence.rstrip('.') + ' ' + next_sent[0].lower() + next_sent[1:]
+                    varied_sentences.append(combined)
+                    sentences[i+1] = ""
+                    last_sentence_length = len(combined.split())
+                    continue
             varied_sentences.append(sentence)
             last_sentence_length = current_length
         return ' '.join([s for s in varied_sentences if s])
     def fix_punctuation(self, text):
         """Comprehensive punctuation and formatting fixes"""
         if not text:
         # Fix weird symbols and characters using safe replacements
         text = text.replace('<>', '')  # Remove empty angle brackets
+        # Normalize quotes - use replace instead of regex for problematic characters
         text = text.replace('«', '"').replace('»', '"')
         text = text.replace('„', '"').replace('"', '"').replace('"', '"')
         text = text.replace(''', "'").replace(''', "'")
         text = text.replace('–', '-').replace('—', '-')
         # Fix colon issues
+        text = re.sub(r'\.:', ':', text)  # Remove period before colon
+        text = re.sub(r':\s*\.', ':', text)  # Remove period after colon
+        # Fix basic spacing
+        text = re.sub(r'\s+', ' ', text)  # Multiple spaces to single
+        text = re.sub(r'\s+([.,!?;:])', r'\1', text)  # Remove space before punctuation
+        text = re.sub(r'([.,!?;:])\s*([.,!?;:])', r'\1', text)  # Remove double punctuation
+        text = re.sub(r'([.!?])\s*\1+', r'\1', text)  # Remove repeated punctuation
         # Fix colons
+        text = re.sub(r':\s*([.,!?])', ':', text)  # Remove punctuation after colon
+        text = re.sub(r'([.,!?])\s*:', ':', text)  # Remove punctuation before colon
+        text = re.sub(r':+', ':', text)  # Multiple colons to one
         # Fix quotes and parentheses
         text = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', text)
         text = re.sub(r'\(\s*([^)]*?)\s*\)', r'(\1)', text)
         # Fix sentence capitalization more carefully
+        # Split on ACTUAL sentence endings only
         sentences = re.split(r'(?<=[.!?])\s+', text)
         fixed_sentences = []
             if not sentence:
                 continue
+            # Only capitalize the first letter if it's actually lowercase
+            # and not part of a special case (like iPhone, eBay, etc.)
             words = sentence.split()
             if words:
                 first_word = words[0]
+                # Check if it's not an acronym or proper noun that should stay lowercase
                 if (first_word[0].islower() and
                     not self.is_likely_acronym_or_proper_noun(first_word) and
                     not first_word.startswith('__KW') and
                     not first_word.startswith('_kw')):
+                    # Only capitalize if it's a regular word
                     sentence = first_word[0].upper() + first_word[1:] + ' ' + ' '.join(words[1:])
             fixed_sentences.append(sentence)
         text = ' '.join(fixed_sentences)
+        # Fix common issues
+        text = re.sub(r'\bi\b', 'I', text)  # Capitalize 'I'
+        text = re.sub(r'\.{2,}', '.', text)  # Multiple periods to one
+        text = re.sub(r',{2,}', ',', text)  # Multiple commas to one
+        text = re.sub(r'\s*,\s*,\s*', ', ', text)  # Double commas with spaces
+        # Remove weird artifacts
+        text = re.sub(r'\b(CHAPTER\s+[IVX]+|SECTION\s+\d+)\b[^\w]*', '', text, flags=re.IGNORECASE)
         # Fix abbreviations
         text = re.sub(r'\betc\s*\.\s*\.', 'etc.', text)
         text = re.sub(r'\be\.g\s*\.\s*[,\s]', 'e.g., ', text)
         text = re.sub(r'\bi\.e\s*\.\s*[,\s]', 'i.e., ', text)
+        # Fix numbers with periods (like "1. " at start of lists)
         text = re.sub(r'(\d+)\.\s+', r'\1. ', text)
         # Fix bold/strong tags punctuation
         text = self.fix_bold_punctuation(text)
+        # Clean up any remaining issues
+        text = re.sub(r'\s+([.,!?;:])', r'\1', text)  # Final space cleanup
+        text = re.sub(r'([.,!?;:])\s{2,}', r'\1 ', text)  # Fix multiple spaces after punctuation
         # Ensure ending punctuation
         text = text.strip()
         if text and text[-1] not in '.!?':
+            # Don't add period if it ends with colon (likely a list header)
             if not text.endswith(':'):
                 text += '.'
         """Fix punctuation issues around bold/strong tags"""
         # Check if this is likely a list item with colon pattern
         def is_list_item_with_colon(text):
+            # Pattern: starts with or contains <strong>Text:</strong> or <b>Text:</b>
             list_pattern = r'^\s*(?:[-•*▪▫◦‣⁃]\s*)?<(?:strong|b)>[^<]+:</(?:strong|b)>'
             return bool(re.search(list_pattern, text))
         # If it's a list item with colon, preserve the format
         if is_list_item_with_colon(text):
+            # Just clean up spacing but preserve the colon inside bold
             text = re.sub(r'<(strong|b)>\s*([^:]+)\s*:\s*</\1>', r'<\1>\2:</\1>', text)
             return text
             # Check if this is a list header (contains colon at the end)
             if content.endswith(':'):
+                # Preserve list headers with colons
                 return f'<{tag}>{content}</{tag}>'
             # Remove any periods at the start or end of bold content
             content = content.strip('.')
             # Check if this bold text is at the start of a sentence
+            # (preceded by nothing, or by '. ', '! ', '? ')
             start_pos = match.start()
             is_sentence_start = (start_pos == 0 or
                                (start_pos > 2 and text[start_pos-2:start_pos] in ['. ', '! ', '? ', '\n\n']))
         # Fix bold/strong tags
         text = re.sub(bold_pattern, fix_bold_match, text)
+        # Fix spacing around bold/strong tags (but not for list items)
         if not is_list_item_with_colon(text):
+            text = re.sub(r'\.\s*<(strong|b)>', r'. <\1>', text)  # Period before bold
+            text = re.sub(r'</(strong|b)>\s*\.', r'</\1>.', text)  # Period after bold
+            text = re.sub(r'([.!?])\s*<(strong|b)>', r'\1 <\2>', text)  # Space after sentence end
+            text = re.sub(r'</(strong|b)>\s+([a-z])', lambda m: f'</{m.group(1)}> {m.group(2)}', text)  # Keep lowercase after bold if mid-sentence
             # Remove duplicate periods around bold tags
             text = re.sub(r'\.\s*</(strong|b)>\s*\.', r'</\1>.', text)
             text = re.sub(r'\.\s*<(strong|b)>\s*\.', r'. <\1>', text)
             # Fix cases where bold content ends a sentence
+            # If bold is followed by a new sentence (capital letter), add period
             text = re.sub(r'</(strong|b)>\s+([A-Z])', r'</\1>. \2', text)
         # Don't remove these for list items
         if not is_list_item_with_colon(text):
+            text = re.sub(r'<(strong|b)>\s*:\s*</\1>', ':', text)  # Remove empty bold colons
+            text = re.sub(r'<(strong|b)>\s*\.\s*</\1>', '.', text)  # Remove empty bold periods
         return text
         soup = BeautifulSoup(html_content, 'html.parser')
         text_elements = []
+        # Get all text nodes using string instead of text (fixing deprecation)
         for element in soup.find_all(string=True):
             # Skip script, style, and noscript content completely
             if element.parent.name in ['script', 'style', 'noscript']:
         html_text = re.sub(r'<!\s*DOCTYPE', '<!DOCTYPE', html_text, flags=re.IGNORECASE)
         # Fix spacing issues
+        html_text = re.sub(r'>\s+<', '><', html_text)  # Remove extra spaces between tags
+        html_text = re.sub(r'\s+>', '>', html_text)  # Remove spaces before closing >
+        html_text = re.sub(r'<\s+', '<', html_text)  # Remove spaces after opening <
+        # Fix common word errors that might occur during processing
         html_text = html_text.replace('down loaded', 'downloaded')
         html_text = html_text.replace('But your document', 'Your document')
         # Find all paragraph tags
         for p_tag in soup.find_all('p'):
             # Skip paragraphs that are inside special elements
+            # Check if paragraph is inside any of these elements
             skip_parents = ['div.author-intro', 'div.cta-box', 'div.testimonial-card',
                           'div.news-box', 'button', 'a', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
                           'div.quiz-container', 'div.question-container', 'div.results']
                     continue
                 # Skip if the text node's immediate parent isn't the p tag
+                # (to avoid nested elements)
                 if text_node.parent != p_tag:
                     continue
                         text_node.insert_after(new_node)
                     text_node.extract()
+    def add_natural_flow_variations(self, text):
+        """Add more natural flow and rhythm variations for Originality AI"""
+        sentences = self.split_into_sentences_advanced(text)
+        enhanced_sentences = []
+        for i, sentence in enumerate(sentences):
+            if not sentence.strip():
+                continue
+            # Add stream-of-consciousness elements (10% chance)
+            if random.random() < 0.1 and len(sentence.split()) > 10:
+                stream_elements = [
+                    " - wait, let me back up - ",
+                    " - actually, scratch that - ",
+                    " - or maybe I should say - ",
+                    " - hmm, how do I put this - ",
+                    " - okay, here's the thing - ",
+                    " - you know what I mean? - "
+                ]
+                words = sentence.split()
+                pos = random.randint(len(words)//4, 3*len(words)//4)
+                words.insert(pos, random.choice(stream_elements))
+                sentence = ' '.join(words)
+            # Add human-like self-corrections (5% chance)
+            if random.random() < 0.05:
+                corrections = [
+                    " - or rather, ",
+                    " - well, actually, ",
+                    " - I mean, ",
+                    " - or should I say, ",
+                    " - correction: "
+                ]
+                words = sentence.split()
+                if len(words) > 8:
+                    pos = random.randint(len(words)//2, len(words)-3)
+                    correction = random.choice(corrections)
+                    # Repeat a concept with variation
+                    repeated_word_idx = random.randint(max(0, pos-5), pos-1)
+                    if repeated_word_idx < len(words):
+                        words.insert(pos, correction)
+                sentence = ' '.join(words)
+            # Add thinking-out-loud patterns (8% chance)
+            if random.random() < 0.08 and i > 0:
+                thinking_patterns = [
+                    "Come to think of it, ",
+                    "Actually, you know what? ",
+                    "Wait, here's a thought: ",
+                    "Oh, and another thing - ",
+                    "Speaking of which, ",
+                    "This reminds me, ",
+                    "Now that I mention it, ",
+                    "Funny you should ask, because "
+                ]
+                pattern = random.choice(thinking_patterns)
+                sentence = pattern + sentence[0].lower() + sentence[1:] if len(sentence) > 1 else sentence
+            enhanced_sentences.append(sentence)
+        return ' '.join(enhanced_sentences)
     def process_html(self, html_content, primary_keywords="", secondary_keywords="", progress_callback=None):
         """Main processing function with progress callback"""
         if not html_content.strip():
         # Combine keywords and clean them
         all_keywords = []
         if primary_keywords:
+            # Clean and validate each keyword
             for k in primary_keywords.split(','):
                 cleaned = k.strip()
+                if cleaned and len(cleaned) > 1:  # Skip empty or single-char keywords
                     all_keywords.append(cleaned)
         if secondary_keywords:
             for k in secondary_keywords.split(','):
                 if text_has_keywords:
                     print(f"Debug: Processing text with keywords: {original_text[:50]}...")
+                # First pass with Dipper (with adjusted diversity)
                 paraphrased_text = self.paraphrase_with_dipper(
                     original_text,
                     keywords=all_keywords
                 # Verify no placeholders remain
                 if '__KW' in paraphrased_text or '___' in paraphrased_text:
                     print(f"Warning: Placeholder or underscores found in paraphrased text: {paraphrased_text[:100]}...")
+                    # Try to restore again with the enhanced function
                     temp_map = {}
                     for j, keyword in enumerate(all_keywords):
                         temp_map[f'__KW{j:03d}__'] = keyword
                 # Second pass with BART for longer texts (increased probability)
                 if self.use_bart and len(paraphrased_text.split()) > 8:
+                    # 50% chance to use BART for more variation (reduced from 60%)
+                    if random.random() < 0.5:
                         paraphrased_text = self.paraphrase_with_bart(
                             paraphrased_text,
                             keywords=all_keywords
                         )
+                # Apply sentence variation
                 paraphrased_text = self.apply_sentence_variation(paraphrased_text)
                 # Add natural flow variations
                 paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
                 # Fix punctuation and formatting
                 paraphrased_text = self.fix_punctuation(paraphrased_text)
+                # Final check for any remaining placeholders or underscores
+                if '___' in paraphrased_text or '__KW' in paraphrased_text:
                     print(f"Error: Unresolved placeholders in final text")
                     # Use original text if we can't resolve placeholders
                     paraphrased_text = original_text
             # Wrap keywords with <strong> tags in paragraphs
             self.wrap_keywords_in_paragraphs(soup, all_keywords)
+            # Post-process the entire HTML to fix bold/strong formatting
             result = str(soup)
             result = self.post_process_html(result)
+            # Final safety check for any remaining placeholders or underscores
+            if '__KW' in result or re.search(r'_{3,}', result):
+                print("Warning: Found placeholders or multiple underscores in final HTML output")
+                # Attempt to clean them with keywords
                 for i, keyword in enumerate(all_keywords):
                     result = result.replace(f'__KW{i:03d}__', keyword)
+                    result = re.sub(r'_{3,}', keyword, result, count=1)
             # Restore all script tags
             for idx, script_content in enumerate(preserved_scripts):
             # Validate and fix HTML syntax
             result = self.validate_and_fix_html(result)
+            # Count skipped elements properly
             all_text_elements = soup.find_all(string=True)
             skipped = len([e for e in all_text_elements if e.strip() and e.parent.name not in ['script', 'style', 'noscript']]) - total_elements
             import traceback
             error_msg = f"Error processing HTML: {str(e)}\n{traceback.format_exc()}"
             print(error_msg)
+            # Return original HTML with error message prepended as HTML comment
             return f"<!-- {error_msg} -->\n{html_content}"
     def post_process_html(self, html_text):
         """Post-process the entire HTML to fix formatting issues"""
+        # Fix empty angle brackets that might appear
+        html_text = re.sub(r'<>\s*([^<>]+?)\s*(?=\.|\s|<)', r'\1', html_text)  # Remove <> around text
+        html_text = re.sub(r'<>', '', html_text)  # Remove any remaining empty <>
         # Fix double angle brackets around bold tags
         html_text = re.sub(r'<<b>>', '<b>', html_text)
         html_text = re.sub(r'<</strong>>', '</strong>', html_text)
         # Fix periods around bold/strong tags
+        html_text = re.sub(r'\.\s*<(b|strong)>', '. <\1>', html_text)  # Period before bold
+        html_text = re.sub(r'</(b|strong)>\s*\.', '</\1>.', html_text)  # Period after bold
+        html_text = re.sub(r'\.<<(b|strong)>>', '. <\1>', html_text)  # Fix double bracket cases
         html_text = re.sub(r'</(b|strong)>>\.', '</\1>.', html_text)
         # Fix periods after colons
             # Check if this line contains a list pattern with bold
             list_pattern = r'(?:^|\s)(?:[-•*▪▫◦‣⁃]\s*)?<(?:strong|b)>[^<]+:</(?:strong|b)>'
             if re.search(list_pattern, line):
+                # This is a list item, preserve the colon format
                 return line
             # Not a list item, apply regular fixes
+            # Remove periods immediately inside bold tags
             line = re.sub(r'<(strong|b)>\s*\.\s*([^<]+)\s*\.\s*</\1>', r'<\1>\2</\1>', line)
+            # Fix sentence endings with bold
             line = re.sub(r'</(strong|b)>\s*([.!?])', r'</\1>\2', line)
             return line
+        # Process line by line to preserve list formatting
         lines = html_text.split('\n')
         processed_lines = [process_line(line) for line in lines]
         html_text = '\n'.join(processed_lines)
         # Look for bold/strong tags and check their context
         html_text = re.sub(r'(^|.*?)(<(?:strong|b)>)([a-zA-Z])', fix_bold_sentence_start, html_text)
+        # Clean up spacing around bold tags (but preserve list formatting)
+        # Split into segments to handle list items separately
         segments = re.split(r'(<(?:strong|b)>[^<]*:</(?:strong|b)>)', html_text)
         cleaned_segments = []
                 # Apply spacing fixes to non-list segments
                 segment = re.sub(r'\s+<(strong|b)>', r' <\1>', segment)
                 segment = re.sub(r'</(strong|b)>\s+', r'</\1> ', segment)
+                # Fix punctuation issues
                 segment = re.sub(r'([.,!?;:])\s*([.,!?;:])', r'\1', segment)
+                # Fix periods inside/around bold
                 segment = re.sub(r'\.<(strong|b)>\.', '. <\1>', segment)
                 segment = re.sub(r'\.</(strong|b)>\.', '</\1>.', segment)
                 cleaned_segments.append(segment)
         html_text = ''.join(cleaned_segments)
         # Final cleanup
+        html_text = re.sub(r'\.{2,}', '.', html_text)  # Multiple periods
+        html_text = re.sub(r',{2,}', ',', html_text)  # Multiple commas
+        html_text = re.sub(r':{2,}', ':', html_text)  # Multiple colons
+        html_text = re.sub(r'\s+([.,!?;:])', r'\1', html_text)  # Space before punctuation
+        # Fix empty bold tags (but not those with just colons)
         html_text = re.sub(r'<(strong|b)>\s*</\1>', '', html_text)
         # Fix specific patterns in lists/stats
+        # Pattern like "5,000+" should not have period after
         html_text = re.sub(r'(\d+[,\d]*\+?)\s*\.\s*\n', r'\1\n', html_text)
         # Clean up any remaining double brackets
         lines=10,
         label="Humanized HTML Output"
     ),
+    title="Enhanced Dipper AI Humanizer - Optimized for Originality AI",
     description="""
     Ultra-aggressive humanizer optimized to achieve 100% human scores on both Undetectable AI and Originality AI.
+    Key Features:
+    - Maximum diversity settings (90% lexical, 40% order) for natural variation
+    - Enhanced human patterns: personal opinions, self-corrections, thinking-out-loud
+    - Natural typos, contractions, and conversational flow
+    - Stream-of-consciousness elements and rhetorical questions
+    - Originality AI-specific optimizations: varied sentence starters, emphatic repetitions
+    - Fixed placeholder system that preserves keywords
     - Keywords inside <p> tags are automatically wrapped with <strong> tags
+    - Skips content in <strong>, <b>, and heading tags (including inside tables)
+    - Designed to pass the strictest AI detection systems
+    The tool creates genuinely human-like writing patterns that fool even the most sophisticated detectors!
+    ⚠️ Note: Processing may take 5-10 minutes for large HTML documents.
     """,
     examples=[
         ["""<article>