EdysorEdutech commited on
Commit
dcfc371
·
verified ·
1 Parent(s): 942fc21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +253 -279
app.py CHANGED
@@ -663,122 +663,237 @@ class EnhancedDipperHumanizer:
663
  return text
664
 
665
  def should_skip_element(self, element, text):
666
- """Determine if an element should be skipped from paraphrasing"""
667
- if not text or len(text.strip()) < 3:
668
- return True
669
-
670
- # Skip JavaScript code inside script tags - CRITICAL FIX
671
- parent = element.parent
672
- if parent and parent.name in ['script', 'style', 'noscript']:
673
- return True
674
-
675
- # Also check if we're inside a script tag at any level
676
- for ancestor in element.parents:
677
- if ancestor.name in ['script', 'style', 'noscript']:
678
  return True
679
-
680
- # Rest of your existing skip logic...
681
- return False
682
-
683
- def extract_text_from_html(self, html_content):
684
- """Extract text elements from HTML with skip logic"""
685
- soup = BeautifulSoup(html_content, 'html.parser')
686
- text_elements = []
687
-
688
- # CRITICAL: Preserve all script tags completely
689
- script_tags = soup.find_all('script')
690
- script_placeholders = {}
691
-
692
- for i, script in enumerate(script_tags):
693
- placeholder = f"###SCRIPT_CONTENT_{i}###"
694
- script_placeholders[placeholder] = str(script)
695
- script.string = placeholder
696
-
697
- # Get all text nodes
698
- for element in soup.find_all(string=True):
699
- # Skip script, style, and noscript content completely
700
- if element.parent.name in ['script', 'style', 'noscript']:
701
- continue
702
-
703
- # Skip if it's a script placeholder
704
- text = element.strip()
705
- if text.startswith("###SCRIPT_CONTENT_") and text.endswith("###"):
706
- continue
707
-
708
- if text and not self.should_skip_element(element, text):
709
- text_elements.append({
710
- 'text': text,
711
- 'element': element
712
- })
713
-
714
- return soup, text_elements, script_placeholders
715
-
716
- def process_html(self, html_content, progress_callback=None):
717
- """Main processing function with progress callback"""
718
- if not html_content.strip():
719
- return "Please provide HTML content."
720
-
721
- try:
722
- # Extract text elements with script preservation
723
- soup, text_elements, script_placeholders = self.extract_text_from_html(html_content)
724
 
725
- total_elements = len(text_elements)
726
- print(f"Found {total_elements} text elements to process (after filtering)")
727
-
728
- # Process each text element
729
- processed_count = 0
730
 
731
- for i, element_info in enumerate(text_elements):
732
- original_text = element_info['text']
733
-
734
- # Skip placeholders
735
- if "###SCRIPT_" in original_text:
736
- continue
 
 
737
 
738
- # Skip very short texts
739
- if len(original_text.split()) < 3:
740
- continue
741
 
742
- # Process the text with your existing logic
743
- paraphrased_text = self.paraphrase_with_dipper(
744
- original_text,
745
- lex_diversity=60,
746
- order_diversity=20
747
- )
748
 
749
- # Apply other transformations...
750
- paraphrased_text = self.apply_sentence_variation(paraphrased_text)
751
- paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
752
- paraphrased_text = self.fix_punctuation(paraphrased_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
 
754
- # Final quality check
755
- if paraphrased_text and len(paraphrased_text.split()) >= 3:
756
- element_info['element'].replace_with(NavigableString(paraphrased_text))
757
- processed_count += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- # Progress update
760
- if progress_callback:
761
- progress_callback(i + 1, total_elements)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
 
763
- # Get the processed HTML
764
- result_html = str(soup)
 
765
 
766
- # CRITICAL: Restore all script content exactly as it was
767
- for placeholder, original_script in script_placeholders.items():
768
- result_html = result_html.replace(f"<script>{placeholder}</script>", original_script)
 
769
 
770
- # Post-process the entire HTML
771
- result_html = self.post_process_html(result_html)
772
- result_html = self.validate_and_fix_html(result_html)
 
 
 
 
 
 
 
 
 
 
 
 
 
773
 
774
- print(f"Successfully processed {processed_count} text elements")
775
- return result_html
 
 
 
 
776
 
777
- except Exception as e:
778
- import traceback
779
- error_msg = f"Error processing HTML: {str(e)}\n{traceback.format_exc()}"
780
- print(error_msg)
781
- return f"<!-- {error_msg} -->\n{html_content}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
 
783
  def is_likely_acronym_or_proper_noun(self, word):
784
  """Check if a word is likely an acronym or part of a proper noun"""
@@ -1317,63 +1432,43 @@ def process_html(self, html_content, progress_callback=None):
1317
 
1318
  return text
1319
 
1320
- def extract_text_from_html(self, html_content):
1321
- """Extract text elements from HTML with skip logic"""
1322
- soup = BeautifulSoup(html_content, 'html.parser')
1323
- text_elements = []
 
 
 
 
 
 
 
 
 
 
 
 
1324
 
1325
- # Get all text nodes using string instead of text (fixing deprecation)
1326
- for element in soup.find_all(string=True):
1327
- # Skip script, style, and noscript content completely
1328
- if element.parent.name in ['script', 'style', 'noscript']:
1329
- continue
1330
-
1331
- text = element.strip()
1332
- if text and not self.should_skip_element(element, text):
1333
- text_elements.append({
1334
- 'text': text,
1335
- 'element': element
1336
- })
1337
 
1338
- return soup, text_elements
1339
-
1340
- def validate_and_fix_html(self, html_text):
1341
- """Fix common HTML syntax errors after processing"""
1342
-
1343
- # First, protect script content
1344
- script_pattern = r'<script[^>]*>(.*?)</script>'
1345
- scripts = re.findall(script_pattern, html_text, re.DOTALL | re.IGNORECASE)
1346
- script_placeholders = {}
1347
-
1348
- for i, script_content in enumerate(scripts):
1349
- placeholder = f"<!--SCRIPT_PLACEHOLDER_{i}-->"
1350
- script_placeholders[placeholder] = script_content
1351
- html_text = html_text.replace(
1352
- f'<script>{script_content}</script>',
1353
- f'<script>{placeholder}</script>',
1354
- 1
1355
- )
1356
-
1357
- # Fix DOCTYPE
1358
- html_text = re.sub(r'<!\s*DOCTYPE', '<!DOCTYPE', html_text, flags=re.IGNORECASE)
1359
-
1360
- # Fix spacing issues (but not inside scripts)
1361
- html_text = re.sub(r'>\s+<', '><', html_text)
1362
- html_text = re.sub(r'\s+>', '>', html_text)
1363
- html_text = re.sub(r'<\s+', '<', html_text)
1364
-
1365
- # Fix common word errors that might occur during processing
1366
- html_text = html_text.replace('down loaded', 'downloaded')
1367
- html_text = html_text.replace('But your document', 'Your document')
1368
-
1369
- # Restore script content
1370
- for placeholder, script_content in script_placeholders.items():
1371
- html_text = html_text.replace(
1372
- f'<script>{placeholder}</script>',
1373
- f'<script>{script_content}</script>'
1374
- )
1375
-
1376
- return html_text
1377
 
1378
  def add_natural_flow_variations(self, text):
1379
  """Add more natural flow and rhythm variations for Originality AI"""
@@ -1437,127 +1532,6 @@ def process_html(self, html_content, progress_callback=None):
1437
 
1438
  return ' '.join(enhanced_sentences)
1439
 
1440
- def process_html(self, html_content, progress_callback=None):
1441
- """Main processing function with progress callback"""
1442
- if not html_content.strip():
1443
- return "Please provide HTML content."
1444
-
1445
- # Store all script and style content to preserve it
1446
- script_placeholder = "###SCRIPT_PLACEHOLDER_{}###"
1447
- style_placeholder = "###STYLE_PLACEHOLDER_{}###"
1448
- preserved_scripts = []
1449
- preserved_styles = []
1450
-
1451
- # Temporarily replace script and style tags with placeholders
1452
- soup_temp = BeautifulSoup(html_content, 'html.parser')
1453
-
1454
- # Preserve all script tags
1455
- for idx, script in enumerate(soup_temp.find_all('script')):
1456
- placeholder = script_placeholder.format(idx)
1457
- preserved_scripts.append(str(script))
1458
- script.replace_with(placeholder)
1459
-
1460
- # Preserve all style tags
1461
- for idx, style in enumerate(soup_temp.find_all('style')):
1462
- placeholder = style_placeholder.format(idx)
1463
- preserved_styles.append(str(style))
1464
- style.replace_with(placeholder)
1465
-
1466
- # Get the modified HTML
1467
- html_content = str(soup_temp)
1468
-
1469
- try:
1470
- # Extract text elements
1471
- soup, text_elements = self.extract_text_from_html(html_content)
1472
-
1473
- total_elements = len(text_elements)
1474
- print(f"Found {total_elements} text elements to process (after filtering)")
1475
-
1476
- # Process each text element
1477
- processed_count = 0
1478
-
1479
- for i, element_info in enumerate(text_elements):
1480
- original_text = element_info['text']
1481
-
1482
- # Skip placeholders
1483
- if "###SCRIPT_PLACEHOLDER_" in original_text or "###STYLE_PLACEHOLDER_" in original_text:
1484
- continue
1485
-
1486
- # Skip very short texts
1487
- if len(original_text.split()) < 3:
1488
- continue
1489
-
1490
- # First pass with Dipper
1491
- paraphrased_text = self.paraphrase_with_dipper(
1492
- original_text,
1493
- lex_diversity=60,
1494
- order_diversity=20
1495
- )
1496
-
1497
- # Second pass with BART for longer texts (balanced probability)
1498
- if self.use_bart and len(paraphrased_text.split()) > 8:
1499
- # 30% chance to use BART for more variation (balanced)
1500
- if random.random() < 0.3:
1501
- paraphrased_text = self.paraphrase_with_bart(paraphrased_text)
1502
-
1503
- # Apply sentence variation
1504
- paraphrased_text = self.apply_sentence_variation(paraphrased_text)
1505
-
1506
- # Add natural flow variations
1507
- paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
1508
-
1509
- # Fix punctuation and formatting
1510
- paraphrased_text = self.fix_punctuation(paraphrased_text)
1511
-
1512
- # Final quality check
1513
- if paraphrased_text and len(paraphrased_text.split()) >= 3:
1514
- element_info['element'].replace_with(NavigableString(paraphrased_text))
1515
- processed_count += 1
1516
-
1517
- # Progress update
1518
- if progress_callback:
1519
- progress_callback(i + 1, total_elements)
1520
-
1521
- if i % 10 == 0 or i == total_elements - 1:
1522
- progress = (i + 1) / total_elements * 100
1523
- print(f"Progress: {progress:.1f}%")
1524
-
1525
- # Get the processed HTML
1526
- result = str(soup)
1527
-
1528
- # Restore all script tags
1529
- for idx, script_content in enumerate(preserved_scripts):
1530
- placeholder = script_placeholder.format(idx)
1531
- result = result.replace(placeholder, script_content)
1532
-
1533
- # Restore all style tags
1534
- for idx, style_content in enumerate(preserved_styles):
1535
- placeholder = style_placeholder.format(idx)
1536
- result = result.replace(placeholder, style_content)
1537
-
1538
- # Post-process the entire HTML to fix bold/strong formatting
1539
- result = self.post_process_html(result)
1540
-
1541
- # Validate and fix HTML syntax
1542
- result = self.validate_and_fix_html(result)
1543
-
1544
- # Count skipped elements properly
1545
- all_text_elements = soup.find_all(string=True)
1546
- skipped = len([e for e in all_text_elements if e.strip() and e.parent.name not in ['script', 'style', 'noscript']]) - total_elements
1547
-
1548
- print(f"Successfully processed {processed_count} text elements")
1549
- print(f"Skipped {skipped} elements (headings, CTAs, tables, testimonials, strong/bold tags, etc.)")
1550
- print(f"Preserved {len(preserved_scripts)} script tags and {len(preserved_styles)} style tags")
1551
-
1552
- return result
1553
-
1554
- except Exception as e:
1555
- import traceback
1556
- error_msg = f"Error processing HTML: {str(e)}\n{traceback.format_exc()}"
1557
- print(error_msg)
1558
- # Return original HTML with error message prepended as HTML comment
1559
- return f"<!-- {error_msg} -->\n{html_content}"
1560
-
1561
  def post_process_html(self, html_text):
1562
  """Post-process the entire HTML to fix formatting issues"""
1563
  # Fix empty angle brackets that might appear
 
663
  return text
664
 
665
  def should_skip_element(self, element, text):
666
+ """Determine if an element should be skipped from paraphrasing"""
667
+ if not text or len(text.strip()) < 3:
 
 
 
 
 
 
 
 
 
 
668
  return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
 
670
+ # Skip JavaScript code inside script tags - CRITICAL FIX
671
+ parent = element.parent
672
+ if parent and parent.name in ['script', 'style', 'noscript']:
673
+ return True
 
674
 
675
+ # Also check if we're inside a script tag at any level
676
+ for ancestor in element.parents:
677
+ if ancestor.name in ['script', 'style', 'noscript']:
678
+ return True
679
+
680
+ # Skip headings (h1-h6)
681
+ if parent and parent.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title']:
682
+ return True
683
 
684
+ # Skip content inside <strong> and <b> tags
685
+ if parent and parent.name in ['strong', 'b']:
686
+ return True
687
 
688
+ # Skip table content
689
+ if parent and (parent.name in ['td', 'th'] or any(p.name == 'table' for p in parent.parents)):
690
+ return True
 
 
 
691
 
692
+ # Special handling for content inside tables
693
+ # Skip if it's inside strong/b/h1-h6 tags AND also inside a table
694
+ if parent:
695
+ # Check if we're inside a table
696
+ is_in_table = any(p.name == 'table' for p in parent.parents)
697
+ if is_in_table:
698
+ # If we're in a table, skip any text that's inside formatting tags
699
+ if parent.name in ['strong', 'b', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'em', 'i']:
700
+ return True
701
+ # Also check if parent's parent is a formatting tag
702
+ if parent.parent and parent.parent.name in ['strong', 'b', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
703
+ return True
704
+
705
+ # Skip table of contents
706
+ if parent:
707
+ parent_text = str(parent).lower()
708
+ if any(toc in parent_text for toc in ['table of contents', 'toc-', 'contents']):
709
+ return True
710
+
711
+ # Skip CTAs and buttons
712
+ if parent and parent.name in ['button', 'a']:
713
+ return True
714
 
715
+ # Skip if parent has onclick or other event handlers
716
+ if parent and parent.attrs:
717
+ event_handlers = ['onclick', 'onchange', 'onsubmit', 'onload', 'onmouseover', 'onmouseout']
718
+ if any(handler in parent.attrs for handler in event_handlers):
719
+ return True
720
+
721
+ # Special check for testimonial cards - check up to 3 levels of ancestors
722
+ if parent:
723
+ ancestors_to_check = []
724
+ current = parent
725
+ for _ in range(3): # Check up to 3 levels up
726
+ if current:
727
+ ancestors_to_check.append(current)
728
+ current = current.parent
729
+
730
+ # Check if any ancestor has testimonial-card class
731
+ for ancestor in ancestors_to_check:
732
+ if ancestor and ancestor.get('class'):
733
+ classes = ancestor.get('class', [])
734
+ if isinstance(classes, list):
735
+ if any('testimonial-card' in str(cls) for cls in classes):
736
+ return True
737
+ elif isinstance(classes, str) and 'testimonial-card' in classes:
738
+ return True
739
+
740
+ # Skip if IMMEDIATE parent or element itself has skip-worthy classes/IDs
741
+ skip_indicators = [
742
+ 'button', 'btn', 'heading', 'title', 'caption',
743
+ 'toc-', 'contents', 'quiz', 'tip', 'note', 'alert',
744
+ 'warning', 'info', 'success', 'error', 'code', 'pre',
745
+ 'stats-grid', 'testimonial-card',
746
+ 'cta-box', 'quiz-container', 'contact-form',
747
+ 'faq-question', 'sidebar', 'widget', 'banner',
748
+ 'author-intro', 'testimonial', 'review', 'feedback',
749
+ 'floating-', 'stat-', 'progress-', 'option', 'results',
750
+ 'question-container', 'quiz-',
751
+ 'comparision-tables', 'process-flowcharts', 'infographics', 'cost-breakdown'
752
+ ]
753
+
754
+ # Check only immediate parent and grandparent (not all ancestors)
755
+ elements_to_check = [parent]
756
+ if parent and parent.parent:
757
+ elements_to_check.append(parent.parent)
758
 
759
+ for elem in elements_to_check:
760
+ if not elem:
761
+ continue
762
+
763
+ # Check element's class
764
+ elem_class = elem.get('class', [])
765
+ if isinstance(elem_class, list):
766
+ class_str = ' '.join(str(cls).lower() for cls in elem_class)
767
+ if any(indicator in class_str for indicator in skip_indicators):
768
+ return True
769
+
770
+ # Check element's ID
771
+ elem_id = elem.get('id', '')
772
+ if any(indicator in str(elem_id).lower() for indicator in skip_indicators):
773
+ return True
774
+
775
+ # Skip short phrases that might be UI elements
776
+ word_count = len(text.split())
777
+ if word_count <= 5:
778
+ ui_patterns = [
779
+ 'click', 'download', 'learn more', 'read more', 'sign up',
780
+ 'get started', 'try now', 'buy now', 'next', 'previous',
781
+ 'back', 'continue', 'submit', 'cancel', 'get now', 'book your',
782
+ 'check out:', 'see also:', 'related:', 'question', 'of'
783
+ ]
784
+ if any(pattern in text.lower() for pattern in ui_patterns):
785
+ return True
786
+
787
+ # Skip very short content in styled containers
788
+ if parent and parent.name in ['div', 'section', 'aside', 'blockquote']:
789
+ style = parent.get('style', '')
790
+ if 'border' in style or 'background' in style:
791
+ if word_count <= 20:
792
+ # But don't skip if it's inside a paragraph
793
+ if not any(p.name == 'p' for p in parent.parents):
794
+ return True
795
+
796
+ return False
797
+
798
+ def extract_text_from_html(self, html_content):
799
+ """Extract text elements from HTML with skip logic"""
800
+ soup = BeautifulSoup(html_content, 'html.parser')
801
+ text_elements = []
802
 
803
+ # CRITICAL: Preserve all script tags completely
804
+ script_tags = soup.find_all('script')
805
+ script_placeholders = {}
806
 
807
+ for i, script in enumerate(script_tags):
808
+ placeholder = f"###SCRIPT_CONTENT_{i}###"
809
+ script_placeholders[placeholder] = str(script)
810
+ script.string = placeholder
811
 
812
+ # Get all text nodes
813
+ for element in soup.find_all(string=True):
814
+ # Skip script, style, and noscript content completely
815
+ if element.parent.name in ['script', 'style', 'noscript']:
816
+ continue
817
+
818
+ # Skip if it's a script placeholder
819
+ text = element.strip()
820
+ if text.startswith("###SCRIPT_CONTENT_") and text.endswith("###"):
821
+ continue
822
+
823
+ if text and not self.should_skip_element(element, text):
824
+ text_elements.append({
825
+ 'text': text,
826
+ 'element': element
827
+ })
828
 
829
+ return soup, text_elements, script_placeholders
830
+
831
+ def process_html(self, html_content, progress_callback=None):
832
+ """Main processing function with progress callback"""
833
+ if not html_content.strip():
834
+ return "Please provide HTML content."
835
 
836
+ try:
837
+ # Extract text elements with script preservation
838
+ soup, text_elements, script_placeholders = self.extract_text_from_html(html_content)
839
+
840
+ total_elements = len(text_elements)
841
+ print(f"Found {total_elements} text elements to process (after filtering)")
842
+
843
+ # Process each text element
844
+ processed_count = 0
845
+
846
+ for i, element_info in enumerate(text_elements):
847
+ original_text = element_info['text']
848
+
849
+ # Skip placeholders
850
+ if "###SCRIPT_" in original_text:
851
+ continue
852
+
853
+ # Skip very short texts
854
+ if len(original_text.split()) < 3:
855
+ continue
856
+
857
+ # Process the text with your existing logic
858
+ paraphrased_text = self.paraphrase_with_dipper(
859
+ original_text,
860
+ lex_diversity=60,
861
+ order_diversity=20
862
+ )
863
+
864
+ # Apply other transformations...
865
+ paraphrased_text = self.apply_sentence_variation(paraphrased_text)
866
+ paraphrased_text = self.add_natural_flow_variations(paraphrased_text)
867
+ paraphrased_text = self.fix_punctuation(paraphrased_text)
868
+
869
+ # Final quality check
870
+ if paraphrased_text and len(paraphrased_text.split()) >= 3:
871
+ element_info['element'].replace_with(NavigableString(paraphrased_text))
872
+ processed_count += 1
873
+
874
+ # Progress update
875
+ if progress_callback:
876
+ progress_callback(i + 1, total_elements)
877
+
878
+ # Get the processed HTML
879
+ result_html = str(soup)
880
+
881
+ # CRITICAL: Restore all script content exactly as it was
882
+ for placeholder, original_script in script_placeholders.items():
883
+ result_html = result_html.replace(f"<script>{placeholder}</script>", original_script)
884
+
885
+ # Post-process the entire HTML
886
+ result_html = self.post_process_html(result_html)
887
+ result_html = self.validate_and_fix_html(result_html)
888
+
889
+ print(f"Successfully processed {processed_count} text elements")
890
+ return result_html
891
+
892
+ except Exception as e:
893
+ import traceback
894
+ error_msg = f"Error processing HTML: {str(e)}\n{traceback.format_exc()}"
895
+ print(error_msg)
896
+ return f"<!-- {error_msg} -->\n{html_content}"
897
 
898
  def is_likely_acronym_or_proper_noun(self, word):
899
  """Check if a word is likely an acronym or part of a proper noun"""
 
1432
 
1433
  return text
1434
 
1435
+ def validate_and_fix_html(self, html_text):
1436
+ """Fix common HTML syntax errors after processing"""
1437
+
1438
+ # First, protect script content
1439
+ script_pattern = r'<script[^>]*>(.*?)</script>'
1440
+ scripts = re.findall(script_pattern, html_text, re.DOTALL | re.IGNORECASE)
1441
+ script_placeholders = {}
1442
+
1443
+ for i, script_content in enumerate(scripts):
1444
+ placeholder = f"<!--SCRIPT_PLACEHOLDER_{i}-->"
1445
+ script_placeholders[placeholder] = script_content
1446
+ html_text = html_text.replace(
1447
+ f'<script>{script_content}</script>',
1448
+ f'<script>{placeholder}</script>',
1449
+ 1
1450
+ )
1451
 
1452
+ # Fix DOCTYPE
1453
+ html_text = re.sub(r'<!\s*DOCTYPE', '<!DOCTYPE', html_text, flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
1454
 
1455
+ # Fix spacing issues (but not inside scripts)
1456
+ html_text = re.sub(r'>\s+<', '><', html_text)
1457
+ html_text = re.sub(r'\s+>', '>', html_text)
1458
+ html_text = re.sub(r'<\s+', '<', html_text)
1459
+
1460
+ # Fix common word errors that might occur during processing
1461
+ html_text = html_text.replace('down loaded', 'downloaded')
1462
+ html_text = html_text.replace('But your document', 'Your document')
1463
+
1464
+ # Restore script content
1465
+ for placeholder, script_content in script_placeholders.items():
1466
+ html_text = html_text.replace(
1467
+ f'<script>{placeholder}</script>',
1468
+ f'<script>{script_content}</script>'
1469
+ )
1470
+
1471
+ return html_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1472
 
1473
  def add_natural_flow_variations(self, text):
1474
  """Add more natural flow and rhythm variations for Originality AI"""
 
1532
 
1533
  return ' '.join(enhanced_sentences)
1534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1535
  def post_process_html(self, html_text):
1536
  """Post-process the entire HTML to fix formatting issues"""
1537
  # Fix empty angle brackets that might appear