Spaces:

Corin1998
/

Auto_PPT_Generator

Sleeping

App Files Files Community

Corin1998 commited on Sep 16

Commit

d229d04

verified ·

1 Parent(s): 5621a82

Update app.py

Browse files

Files changed (1) hide show

app.py +413 -20

app.py CHANGED Viewed

@@ -2,28 +2,427 @@ import os
 import io
 import time
 import sys
 import gradio as gr
-# --- Ensure we can import modules whether it's a package or a plain folder ---
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MODULES_DIR = os.path.join(BASE_DIR, "modules")
-if os.path.isdir(MODULES_DIR) and MODULES_DIR not in sys.path:
-    sys.path.insert(0, MODULES_DIR)
 try:
-    # Prefer package-style if modules/__init__.py exists
-    from modules.text_processing import process_text
-    from modules.pptx_builder import build_presentation
-    from modules.utils import safe_hex_to_rgb, ensure_tmpdir
-except ModuleNotFoundError:
-    # Fallback: flat imports from ./modules added to sys.path
-    from text_processing import process_text
-    from pptx_builder import build_presentation
-    from utils import safe_hex_to_rgb, ensure_tmpdir
 APP_NAME = "Auto-PPT Generator"
 def generate_pptx(long_text: str,
                   title: str,
                   theme_hex: str,
@@ -52,7 +451,6 @@ def generate_pptx(long_text: str,
         except Exception:
             logo_bytes = None
-    # Step 1–3: NLP pipeline (summary, sections, bullets, tables, chart data)
     result = process_text(
         text=long_text,
         use_inference_api=use_inference_api,
@@ -64,7 +462,6 @@ def generate_pptx(long_text: str,
         max_summary_words=max_summary_words,
     )
-    # Step 4: Build PPTX
     ensure_tmpdir()
     timestamp = time.strftime('%Y%m%d-%H%M%S')
     out_path = f"/tmp/auto_ppt_{timestamp}.pptx"
@@ -80,11 +477,8 @@ def generate_pptx(long_text: str,
         tables=result.get("tables", []),
         charts=result.get("charts", []),
     )
-    # Return file path for download
     return out_path
 def ui():
     with gr.Blocks(title=APP_NAME) as demo:
         gr.Markdown(f"# {APP_NAME}\n長文→要約→セクション分割→箇条書き/表/図→**PPTX出力** まで自動化")
@@ -122,7 +516,6 @@ def ui():
         """)
     return demo
 if __name__ == "__main__":
     demo = ui()
     # Spaces は自動でバインドされますが、ローカル互換のため指定可能

 import io
 import time
 import sys
+import re
+from typing import Optional, List, Tuple, Dict, Any
 import gradio as gr
+# 安全のため、GUI不要の描画バックエンドを指定
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from pptx import Presentation
+from pptx.util import Inches, Pt
+from pptx.enum.text import PP_ALIGN
+from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
+from pptx.dml.color import RGBColor
+from PIL import Image
+# transformers は任意（未インストールでも動作させる）
 try:
+    from transformers import pipeline
+except Exception:
+    pipeline = None
+import requests  # Inference API を使う場合にのみ利用
 APP_NAME = "Auto-PPT Generator"
+# =========================
+# utils
+# =========================
+def safe_hex_to_rgb(hex_color: str):
+    if not hex_color:
+        return (59, 130, 246)  # default blue
+    hx = hex_color.strip()
+    if not hx.startswith("#"):
+        hx = "#" + hx
+    if re.fullmatch(r"#[0-9A-Fa-f]{6}", hx):
+        r = int(hx[1:3], 16)
+        g = int(hx[3:5], 16)
+        b = int(hx[5:7], 16)
+        return (r, g, b)
+    return (59, 130, 246)
+def ensure_tmpdir():
+    os.makedirs("/tmp", exist_ok=True)
+# =========================
+# LLM client (local / HF API)
+# =========================
+class LLMClient:
+    def __init__(self, use_inference_api: bool = False):
+        self.use_inference_api = use_inference_api
+        self.hf_token = os.getenv("HF_TOKEN", None)
+        self._local_pipes = {}
+    # ---------- Inference API helpers ----------
+    def _hf_headers(self):
+        if not self.hf_token:
+            raise RuntimeError("HF_TOKEN is not set for Inference API usage.")
+        return {"Authorization": f"Bearer {self.hf_token}"}
+    def _hf_textgen(self, model: str, prompt: str, max_new_tokens: int = 512, temperature: float = 0.3) -> str:
+        url = f"https://api-inference.huggingface.co/models/{model}"
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": max_new_tokens,
+                "temperature": temperature,
+                "return_full_text": False,
+            },
+        }
+        r = requests.post(url, headers=self._hf_headers(), json=payload, timeout=120)
+        r.raise_for_status()
+        data = r.json()
+        if isinstance(data, list) and data and "generated_text" in data[0]:
+            return data[0]["generated_text"]
+        if isinstance(data, dict) and "generated_text" in data:
+            return data["generated_text"]
+        # summarization系モデルは list[0]['summary_text'] の場合も
+        if isinstance(data, list) and data and "summary_text" in data[0]:
+            return data[0]["summary_text"]
+        return str(data)
+    def _get_local_pipe(self, task: str, model: str):
+        key = (task, model)
+        if key in self._local_pipes:
+            return self._local_pipes[key]
+        if pipeline is None:
+            raise RuntimeError("transformers is not available")
+        pipe = pipeline(task=task, model=model)
+        self._local_pipes[key] = pipe
+        return pipe
+    # ---------- Public ----------
+    def summarize(self, text: str, model: str, max_words: int = 200) -> str:
+        # Inference API 優先
+        if self.use_inference_api and model:
+            try:
+                return self._hf_textgen(model, text[:6000], max_new_tokens=max_words * 2).strip()
+            except Exception:
+                pass
+        # ローカル（transformers）試行
+        if pipeline is not None:
+            try:
+                if "t5" in model.lower():
+                    pipe = self._get_local_pipe("text2text-generation", model)
+                    prompt = f"要約: {text[:6000]}"
+                    res = pipe(prompt, max_length=max_words * 2, do_sample=False)
+                    return res[0]["generated_text"].strip()
+                else:
+                    pipe = self._get_local_pipe("summarization", model)
+                    res = pipe(text[:6000], max_length=max_words * 2, min_length=max_words // 2, do_sample=False)
+                    return res[0]["summary_text"].strip()
+            except Exception:
+                pass
+        # フォールバック：先頭の短文を並べるだけ
+        sents = re.split(r"[。\.!?]\s*", text)
+        out = []
+        for s in sents:
+            s = s.strip()
+            if s:
+                out.append(s)
+            if len(" ".join(out)) > max_words * 6:
+                break
+        return "。".join(out)
+    def generate(self, prompt: str, model: Optional[str] = None, max_new_tokens: int = 512) -> str:
+        if self.use_inference_api and model:
+            try:
+                return self._hf_textgen(model, prompt, max_new_tokens=max_new_tokens)
+            except Exception:
+                return ""
+        return ""  # 本実装ではルールベースに依存
+# =========================
+# text processing
+# =========================
+LIST_BULLET = re.compile(r"^(?:[-*•・]|\d+\.|\d+\))\s+(.*)")
+KEYVAL_LINE = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([^\n]+?)\s*$")
+LABEL_NUM = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([+-]?\d+(?:\.\d+)?)\s*$")
+HEADER = re.compile(r"^(#+|\d+\.|\d+\))\s*(.+)$")
+def naive_section_split(text: str, target_chars: int = 1200) -> List[Tuple[str, str]]:
+    """Split into (title, content) using headings or by size."""
+    lines = text.splitlines()
+    sections: List[Tuple[str, str]] = []
+    cur_title = "セクション"
+    cur_buf: List[str] = []
+    def flush():
+        nonlocal cur_title, cur_buf
+        if cur_buf:
+            sections.append((cur_title, "\n".join(cur_buf).strip()))
+            cur_buf = []
+    for ln in lines:
+        m = HEADER.match(ln.strip())
+        if m:
+            flush()
+            cur_title = m.group(2).strip()
+            continue
+        cur_buf.append(ln)
+        if sum(len(x) for x in cur_buf) > target_chars:
+            flush()
+            cur_title = f"セクション{len(sections)+1}"
+    flush()
+    if not sections:
+        sections = [("本文", text)]
+    return sections
+def extract_bullets(section_text: str, max_items: int = 8) -> List[str]:
+    bullets: List[str] = []
+    for line in section_text.splitlines():
+        m = LIST_BULLET.match(line.strip())
+        if m:
+            bullets.append(m.group(1).strip())
+    if not bullets:
+        sents = re.split(r"[。\.!?]\s*", section_text)
+        for s in sents:
+            s = s.strip()
+            if 8 <= len(s) <= 120:
+                bullets.append(s)
+            if len(bullets) >= max_items:
+                break
+    return bullets[:max_items]
+def extract_keyval_table(section_text: str) -> List[Tuple[str, str]]:
+    pairs: List[Tuple[str, str]] = []
+    for line in section_text.splitlines():
+        m = KEYVAL_LINE.match(line)
+        if m:
+            k = m.group(1).strip()
+            v = m.group(2).strip()
+            if k and v:
+                pairs.append((k, v))
+    return pairs
+def extract_chart_data(section_text: str, top_k: int = 10) -> List[Tuple[str, float]]:
+    data: List[Tuple[str, float]] = []
+    for line in section_text.splitlines():
+        m = LABEL_NUM.match(line)
+        if m:
+            label = m.group(1).strip()
+            try:
+                val = float(m.group(2))
+            except ValueError:
+                continue
+            data.append((label, val))
+    seen = {}
+    for k, v in data:
+        seen[k] = v
+    items = list(seen.items())
+    items.sort(key=lambda x: abs(x[1]), reverse=True)
+    return items[:top_k]
+def process_text(text: str,
+                 use_inference_api: bool,
+                 summarizer_model: str,
+                 generator_model: str,
+                 want_summary: bool,
+                 want_tables: bool,
+                 want_charts: bool,
+                 max_summary_words: int = 200) -> Dict[str, Any]:
+    client = LLMClient(use_inference_api=use_inference_api)
+    summary = None
+    if want_summary:
+        summary = client.summarize(text, model=summarizer_model, max_words=max_summary_words)
+    sections = naive_section_split(text)
+    bullets_by_section: Dict[int, List[str]] = {}
+    tables: List[Dict[str, Any]] = []
+    charts: List[Dict[str, Any]] = []
+    for idx, (title, body) in enumerate(sections):
+        bullets_by_section[idx] = extract_bullets(body)
+        if want_tables:
+            kv = extract_keyval_table(body)
+            if kv:
+                tables.append({"title": f"{title} — 表", "pairs": kv})
+        if want_charts:
+            series = extract_chart_data(body)
+            if series:
+                charts.append({"title": f"{title} — チャート", "series": series})
+    return {
+        "summary": summary,
+        "sections": sections,
+        "bullets": bullets_by_section,
+        "tables": tables,
+        "charts": charts,
+    }
+# =========================
+# pptx builder
+# =========================
+def _add_logo(prs: Presentation, slide, logo_bytes: Optional[bytes]):
+    if not logo_bytes:
+        return
+    img = Image.open(io.BytesIO(logo_bytes)).convert("RGBA")
+    max_w, max_h = Inches(2.0), Inches(1.0)
+    w, h = img.size
+    ratio = min(max_w / max(w, 1), max_h / max(h, 1))
+    new_size = (max(1, int(w * ratio)), max(1, int(h * ratio)))
+    resized = img.resize(new_size)
+    b = io.BytesIO()
+    resized.save(b, format="PNG")
+    b.seek(0)
+    left = prs.slide_width - max_w - Inches(0.5)
+    top = Inches(0.2)
+    slide.shapes.add_picture(b, left, top)
+def _apply_theme_bg(slide, rgb):
+    fill = slide.background.fill
+    fill.solid()
+    fill.fore_color.rgb = RGBColor(*rgb)
+def _title_slide(prs, title_text: str, theme_rgb, logo_bytes):
+    slide_layout = prs.slide_layouts[0]
+    slide = prs.slides.add_slide(slide_layout)
+    title = slide.shapes.title
+    subtitle = slide.placeholders[1]
+    title.text = title_text
+    subtitle.text = "自動生成プレゼンテーション"
+    _apply_theme_bg(slide, theme_rgb)
+    left = Inches(0.6)
+    top = Inches(1.8)
+    width = prs.slide_width - Inches(1.2)
+    height = Inches(2.2)
+    box = slide.shapes.add_shape(MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE, left, top, width, height)
+    box.fill.solid()
+    box.fill.fore_color.rgb = RGBColor(255, 255, 255)
+    box.line.color.rgb = RGBColor(0, 0, 0)
+    box.line.transparency = 0.8
+    title.left = left + Inches(0.3)
+    title.top = top + Inches(0.3)
+    title.width = width - Inches(0.6)
+    title.height = Inches(1.4)
+    for p in title.text_frame.paragraphs:
+        p.font.size = Pt(40)
+        p.font.bold = True
+    subtitle.left = left + Inches(0.3)
+    subtitle.top = top + Inches(1.6)
+    subtitle.width = width - Inches(0.6)
+    subtitle.height = Inches(0.8)
+    for p in subtitle.text_frame.paragraphs:
+        p.font.size = Pt(16)
+        p.font.bold = False
+    _add_logo(prs, slide, logo_bytes)
+def _summary_slide(prs, summary: str):
+    if not summary:
+        return
+    slide = prs.slides.add_slide(prs.slide_layouts[1])
+    slide.shapes.title.text = "エグゼクティブサマリー"
+    tf = slide.placeholders[1].text_frame
+    tf.clear()
+    lines = [ln.strip() for ln in summary.splitlines() if ln.strip()]
+    if not lines:
+        lines = [summary]
+    for i, ln in enumerate(lines):
+        p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
+        p.text = ln
+        p.level = 0
+def _section_slide(prs, title: str, bullets: List[str]):
+    slide = prs.slides.add_slide(prs.slide_layouts[1])
+    slide.shapes.title.text = title[:90]
+    tf = slide.placeholders[1].text_frame
+    tf.clear()
+    if not bullets:
+        bullets = ["(要点なし)"]
+    for i, b in enumerate(bullets[:12]):
+        p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
+        p.text = b
+        p.level = 0
+def _table_slide(prs, title: str, pairs: List[tuple]):
+    slide = prs.slides.add_slide(prs.slide_layouts[5])
+    slide.shapes.title.text = title
+    rows = len(pairs) + 1
+    cols = 2
+    left = Inches(0.5)
+    top = Inches(1.8)
+    width = prs.slide_width - Inches(1.0)
+    height = prs.slide_height - Inches(2.6)
+    table = slide.shapes.add_table(rows, cols, left, top, width, height).table
+    table.cell(0, 0).text = "項目"
+    table.cell(0, 1).text = "値"
+    for r, (k, v) in enumerate(pairs, start=1):
+        table.cell(r, 0).text = str(k)
+        table.cell(r, 1).text = str(v)
+def _chart_slide(prs, title: str, series: List[tuple]):
+    slide = prs.slides.add_slide(prs.slide_layouts[5])
+    slide.shapes.title.text = title
+    labels = [x[0] for x in series]
+    values = [x[1] for x in series]
+    fig = plt.figure(figsize=(8, 4.5))
+    plt.bar(range(len(values)), values)
+    plt.xticks(range(len(labels)), labels, rotation=20, ha='right')
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png', dpi=200)
+    plt.close(fig)
+    buf.seek(0)
+    left = Inches(0.5)
+    top = Inches(1.6)
+    width = prs.slide_width - Inches(1.0)
+    height = prs.slide_height - Inches(2.2)
+    slide.shapes.add_picture(buf, left, top, width=width, height=height)
+def _add_footer(prs, theme_rgb):
+    for idx, slide in enumerate(prs.slides, start=1):
+        left = Inches(0.3)
+        top = prs.slide_height - Inches(0.4)
+        width = prs.slide_width - Inches(0.6)
+        height = Inches(0.3)
+        shp = slide.shapes.add_shape(MSO_AUTO_SHAPE_TYPE.RECTANGLE, left, top, width, height)
+        shp.fill.solid()
+        shp.fill.fore_color.rgb = RGBColor(*theme_rgb)
+        shp.line.fill.background()
+        tx = slide.shapes.add_textbox(prs.slide_width - Inches(1.0), top - Inches(0.05), Inches(0.8), Inches(0.3))
+        tf = tx.text_frame
+        p = tf.paragraphs[0]
+        p.text = f"{idx}"
+        p.font.size = Pt(10)
+        p.alignment = PP_ALIGN.RIGHT
+def build_presentation(output_path: str,
+                        title: str,
+                        theme_rgb: tuple,
+                        logo_bytes: Optional[bytes],
+                        executive_summary: Optional[str],
+                        sections: List[Tuple[str, str]],
+                        bullets_by_section: Dict[int, List[str]],
+                        tables: List[Dict[str, Any]],
+                        charts: List[Dict[str, Any]]):
+    prs = Presentation()
+    _title_slide(prs, title, theme_rgb, logo_bytes)
+    _summary_slide(prs, executive_summary)
+    for idx, (sec_title, _body) in enumerate(sections):
+        bullets = bullets_by_section.get(idx, [])
+        _section_slide(prs, sec_title, bullets)
+    for tbl in tables:
+        _table_slide(prs, tbl.get("title", "表"), tbl.get("pairs", []))
+    for ch in charts:
+        _chart_slide(prs, ch.get("title", "チャート"), ch.get("series", []))
+    _add_footer(prs, theme_rgb)
+    prs.save(output_path)
+# =========================
+# Gradio App
+# =========================
 def generate_pptx(long_text: str,
                   title: str,
                   theme_hex: str,
         except Exception:
             logo_bytes = None
     result = process_text(
         text=long_text,
         use_inference_api=use_inference_api,
         max_summary_words=max_summary_words,
     )
     ensure_tmpdir()
     timestamp = time.strftime('%Y%m%d-%H%M%S')
     out_path = f"/tmp/auto_ppt_{timestamp}.pptx"
         tables=result.get("tables", []),
         charts=result.get("charts", []),
     )
     return out_path
 def ui():
     with gr.Blocks(title=APP_NAME) as demo:
         gr.Markdown(f"# {APP_NAME}\n長文→要約→セクション分割→箇条書き/表/図→**PPTX出力** まで自動化")
         """)
     return demo
 if __name__ == "__main__":
     demo = ui()
     # Spaces は自動でバインドされますが、ローカル互換のため指定可能