Spaces:

Corin1998
/

Auto_PPT_Generator

Sleeping

App Files Files Community

Corin1998 commited on Sep 16

Commit

e2a2f35

verified ·

1 Parent(s): d229d04

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -53

app.py CHANGED Viewed

@@ -7,10 +7,11 @@ from typing import Optional, List, Tuple, Dict, Any
 import gradio as gr
-# 安全のため、GUI不要の描画バックエンドを指定
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from pptx import Presentation
 from pptx.util import Inches, Pt
@@ -19,19 +20,73 @@ from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
 from pptx.dml.color import RGBColor
 from PIL import Image
-# transformers は任意（未インストールでも動作させる）
 try:
     from transformers import pipeline
 except Exception:
     pipeline = None
-import requests  # Inference API を使う場合にのみ利用
 APP_NAME = "Auto-PPT Generator"
-# =========================
 # utils
-# =========================
 def safe_hex_to_rgb(hex_color: str):
     if not hex_color:
         return (59, 130, 246)  # default blue
@@ -45,19 +100,22 @@ def safe_hex_to_rgb(hex_color: str):
         return (r, g, b)
     return (59, 130, 246)
 def ensure_tmpdir():
     os.makedirs("/tmp", exist_ok=True)
-# =========================
-# LLM client (local / HF API)
-# =========================
 class LLMClient:
     def __init__(self, use_inference_api: bool = False):
         self.use_inference_api = use_inference_api
         self.hf_token = os.getenv("HF_TOKEN", None)
         self._local_pipes = {}
-    # ---------- Inference API helpers ----------
     def _hf_headers(self):
         if not self.hf_token:
             raise RuntimeError("HF_TOKEN is not set for Inference API usage.")
@@ -80,11 +138,11 @@ class LLMClient:
             return data[0]["generated_text"]
         if isinstance(data, dict) and "generated_text" in data:
             return data["generated_text"]
-        # summarization系モデルは list[0]['summary_text'] の場合も
         if isinstance(data, list) and data and "summary_text" in data[0]:
             return data[0]["summary_text"]
         return str(data)
     def _get_local_pipe(self, task: str, model: str):
         key = (task, model)
         if key in self._local_pipes:
@@ -104,8 +162,8 @@ class LLMClient:
             except Exception:
                 pass
-        # ローカル（transformers）試行
-        if pipeline is not None:
             try:
                 if "t5" in model.lower():
                     pipe = self._get_local_pipe("text2text-generation", model)
@@ -119,7 +177,7 @@ class LLMClient:
             except Exception:
                 pass
-        # フォールバック：先頭の短文を並べるだけ
         sents = re.split(r"[。\.!?]\s*", text)
         out = []
         for s in sents:
@@ -136,12 +194,13 @@ class LLMClient:
                 return self._hf_textgen(model, prompt, max_new_tokens=max_new_tokens)
             except Exception:
                 return ""
-        return ""  # 本実装ではルールベースに依存
-# =========================
-# text processing
-# =========================
 LIST_BULLET = re.compile(r"^(?:[-*•・]|\d+\.|\d+\))\s+(.*)")
 KEYVAL_LINE = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([^\n]+?)\s*$")
 LABEL_NUM = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([+-]?\d+(?:\.\d+)?)\s*$")
@@ -176,7 +235,7 @@ def naive_section_split(text: str, target_chars: int = 1200) -> List[Tuple[str,
         sections = [("本文", text)]
     return sections
-def extract_bullets(section_text: str, max_items: int = 8) -> List[str]:
     bullets: List[str] = []
     for line in section_text.splitlines():
         m = LIST_BULLET.match(line.strip())
@@ -203,7 +262,7 @@ def extract_keyval_table(section_text: str) -> List[Tuple[str, str]]:
                 pairs.append((k, v))
     return pairs
-def extract_chart_data(section_text: str, top_k: int = 10) -> List[Tuple[str, float]]:
     data: List[Tuple[str, float]] = []
     for line in section_text.splitlines():
         m = LABEL_NUM.match(line)
@@ -262,9 +321,11 @@ def process_text(text: str,
         "charts": charts,
     }
-# =========================
-# pptx builder
-# =========================
 def _add_logo(prs: Presentation, slide, logo_bytes: Optional[bytes]):
     if not logo_bytes:
         return
@@ -322,17 +383,22 @@ def _title_slide(prs, title_text: str, theme_rgb, logo_bytes):
 def _summary_slide(prs, summary: str):
     if not summary:
         return
-    slide = prs.slides.add_slide(prs.slide_layouts[1])
     slide.shapes.title.text = "エグゼクティブサマリー"
     tf = slide.placeholders[1].text_frame
     tf.clear()
     lines = [ln.strip() for ln in summary.splitlines() if ln.strip()]
     if not lines:
-        lines = [summary]
     for i, ln in enumerate(lines):
         p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
         p.text = ln
         p.level = 0
 def _section_slide(prs, title: str, bullets: List[str]):
     slide = prs.slides.add_slide(prs.slide_layouts[1])
@@ -341,45 +407,85 @@ def _section_slide(prs, title: str, bullets: List[str]):
     tf.clear()
     if not bullets:
         bullets = ["(要点なし)"]
-    for i, b in enumerate(bullets[:12]):
         p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
         p.text = b
         p.level = 0
 def _table_slide(prs, title: str, pairs: List[tuple]):
-    slide = prs.slides.add_slide(prs.slide_layouts[5])
-    slide.shapes.title.text = title
-    rows = len(pairs) + 1
-    cols = 2
-    left = Inches(0.5)
-    top = Inches(1.8)
-    width = prs.slide_width - Inches(1.0)
-    height = prs.slide_height - Inches(2.6)
-    table = slide.shapes.add_table(rows, cols, left, top, width, height).table
-    table.cell(0, 0).text = "項目"
-    table.cell(0, 1).text = "値"
-    for r, (k, v) in enumerate(pairs, start=1):
-        table.cell(r, 0).text = str(k)
-        table.cell(r, 1).text = str(v)
 def _chart_slide(prs, title: str, series: List[tuple]):
-    slide = prs.slides.add_slide(prs.slide_layouts[5])
-    slide.shapes.title.text = title
-    labels = [x[0] for x in series]
-    values = [x[1] for x in series]
-    fig = plt.figure(figsize=(8, 4.5))
-    plt.bar(range(len(values)), values)
-    plt.xticks(range(len(labels)), labels, rotation=20, ha='right')
-    plt.tight_layout()
     buf = io.BytesIO()
-    fig.savefig(buf, format='png', dpi=200)
     plt.close(fig)
     buf.seek(0)
     left = Inches(0.5)
     top = Inches(1.6)
     width = prs.slide_width - Inches(1.0)
-    height = prs.slide_height - Inches(2.2)
-    slide.shapes.add_picture(buf, left, top, width=width, height=height)
 def _add_footer(prs, theme_rgb):
     for idx, slide in enumerate(prs.slides, start=1):
@@ -420,9 +526,11 @@ def build_presentation(output_path: str,
     _add_footer(prs, theme_rgb)
     prs.save(output_path)
-# =========================
 # Gradio App
-# =========================
 def generate_pptx(long_text: str,
                   title: str,
                   theme_hex: str,
@@ -512,7 +620,7 @@ def ui():
         **Tips**
         - 日本語要約には `sonoisa/t5-base-japanese` を推奨（`text2text-generation`）。
         - Inference API を使う場合は、Space の Secrets に `HF_TOKEN` を設定してください。
-        - チャートは `Label: 123` 形式の行を自動検出して棒グラフを作成します。
         """)
     return demo

 import gradio as gr
+# ---- Matplotlib をGUI非依存で動作させる（必ず pyplot より先に実行）----
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+from matplotlib import font_manager
 from pptx import Presentation
 from pptx.util import Inches, Pt
 from pptx.dml.color import RGBColor
 from PIL import Image
+# transformers は任意（未インストールでも動作可）
 try:
     from transformers import pipeline
 except Exception:
     pipeline = None
+import requests  # Inference API を使う場合のみ実使用
 APP_NAME = "Auto-PPT Generator"
+# ======================================================
 # utils
+# ======================================================
+FALLBACK_FONT_PATH = os.getenv("JP_FONT_PATH", "./assets/fonts/IPAexGothic.ttf")
+def set_jp_font():
+    """
+    図の日本語ラベルが豆腐(□)になるのを防ぐ。
+    1) 環境にある日本語フォントを探索
+    2) 無ければ同梱フォント(IPAexGothic など)を追加して設定
+    """
+    candidates = [
+        "IPAexGothic", "Noto Sans CJK JP", "Noto Sans JP",
+        "Source Han Sans", "源ノ角ゴシック", "Yu Gothic", "Hiragino Sans"
+    ]
+    installed = {f.name for f in font_manager.fontManager.ttflist}
+    chosen = None
+    for name in candidates:
+        if any(name in fam for fam in installed):
+            chosen = name
+            break
+    if not chosen and os.path.exists(FALLBACK_FONT_PATH):
+        try:
+            font_manager.fontManager.addfont(FALLBACK_FONT_PATH)
+            chosen = font_manager.FontProperties(fname=FALLBACK_FONT_PATH).get_name()
+        except Exception:
+            chosen = None
+    if chosen:
+        plt.rcParams["font.family"] = chosen
+    matplotlib.rcParams["axes.unicode_minus"] = False
+def wrap_label(s: str, width: int = 6, max_lines: int = 2) -> str:
+    """長い日本語ラベルを改行・省略して横溢れを防止"""
+    s = str(s)
+    if len(s) <= width:
+        return s
+    chunks = [s[i:i + width] for i in range(0, len(s), width)]
+    if len(chunks) > max_lines:
+        chunks = chunks[:max_lines]
+        chunks[-1] = chunks[-1] + "…"
+    return "\n".join(chunks)
+def chunked(seq, n):
+    """seq を n 件ずつに分割して yield"""
+    buf = []
+    for x in seq:
+        buf.append(x)
+        if len(buf) == n:
+            yield buf
+            buf = []
+    if buf:
+        yield buf
 def safe_hex_to_rgb(hex_color: str):
     if not hex_color:
         return (59, 130, 246)  # default blue
         return (r, g, b)
     return (59, 130, 246)
 def ensure_tmpdir():
     os.makedirs("/tmp", exist_ok=True)
+# ======================================================
+# LLM client (local / HF Inference API)
+# ======================================================
 class LLMClient:
     def __init__(self, use_inference_api: bool = False):
         self.use_inference_api = use_inference_api
         self.hf_token = os.getenv("HF_TOKEN", None)
         self._local_pipes = {}
+    # ---------- Inference API ----------
     def _hf_headers(self):
         if not self.hf_token:
             raise RuntimeError("HF_TOKEN is not set for Inference API usage.")
             return data[0]["generated_text"]
         if isinstance(data, dict) and "generated_text" in data:
             return data["generated_text"]
         if isinstance(data, list) and data and "summary_text" in data[0]:
             return data[0]["summary_text"]
         return str(data)
+    # ---------- Local transformers ----------
     def _get_local_pipe(self, task: str, model: str):
         key = (task, model)
         if key in self._local_pipes:
             except Exception:
                 pass
+        # ローカル（transformers）
+        if pipeline is not None and model:
             try:
                 if "t5" in model.lower():
                     pipe = self._get_local_pipe("text2text-generation", model)
             except Exception:
                 pass
+        # フォールバック：先頭の短文をつなぐ
         sents = re.split(r"[。\.!?]\s*", text)
         out = []
         for s in sents:
                 return self._hf_textgen(model, prompt, max_new_tokens=max_new_tokens)
             except Exception:
                 return ""
+        return ""  # 今回はルールベース中心
+# ======================================================
+# Text processing
+# ======================================================
 LIST_BULLET = re.compile(r"^(?:[-*•・]|\d+\.|\d+\))\s+(.*)")
 KEYVAL_LINE = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([^\n]+?)\s*$")
 LABEL_NUM = re.compile(r"^\s*([^:：]+?)\s*[:：]\s*([+-]?\d+(?:\.\d+)?)\s*$")
         sections = [("本文", text)]
     return sections
+def extract_bullets(section_text: str, max_items: int = 12) -> List[str]:
     bullets: List[str] = []
     for line in section_text.splitlines():
         m = LIST_BULLET.match(line.strip())
                 pairs.append((k, v))
     return pairs
+def extract_chart_data(section_text: str, top_k: int = 16) -> List[Tuple[str, float]]:
     data: List[Tuple[str, float]] = []
     for line in section_text.splitlines():
         m = LABEL_NUM.match(line)
         "charts": charts,
     }
+# ======================================================
+# PPTX builder
+# ======================================================
 def _add_logo(prs: Presentation, slide, logo_bytes: Optional[bytes]):
     if not logo_bytes:
         return
 def _summary_slide(prs, summary: str):
     if not summary:
         return
+    slide = prs.slides.add_slide(prs.slide_layouts[1])  # Title and Content
     slide.shapes.title.text = "エグゼクティブサマリー"
     tf = slide.placeholders[1].text_frame
     tf.clear()
     lines = [ln.strip() for ln in summary.splitlines() if ln.strip()]
     if not lines:
+        lines = [summary.strip()]
+    # 行が多い場合はフォント縮小
+    MAX_LINES = 12
+    lines = lines[:MAX_LINES]
     for i, ln in enumerate(lines):
         p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
         p.text = ln
         p.level = 0
+        for run in p.runs:
+            run.font.size = Pt(14 if len(lines) <= 8 else 12)
 def _section_slide(prs, title: str, bullets: List[str]):
     slide = prs.slides.add_slide(prs.slide_layouts[1])
     tf.clear()
     if not bullets:
         bullets = ["(要点なし)"]
+    MAX_ITEMS = 12
+    bullets = bullets[:MAX_ITEMS]
+    for i, b in enumerate(bullets):
         p = tf.add_paragraph() if i > 0 else tf.paragraphs[0]
         p.text = b
         p.level = 0
+        for run in p.runs:
+            run.font.size = Pt(18 if len(bullets) <= 8 else 14)
 def _table_slide(prs, title: str, pairs: List[tuple]):
+    MAX_ROWS_PER_SLIDE = 12  # 見出し1行 + データ最大12行/枚
+    if not pairs:
+        pairs = [("（データなし）", "-")]
+    for i, chunk in enumerate(chunked(pairs, MAX_ROWS_PER_SLIDE)):
+        slide = prs.slides.add_slide(prs.slide_layouts[5])  # Title Only
+        page_title = title if i == 0 else f"{title}（続き）"
+        slide.shapes.title.text = page_title
+        rows = len(chunk) + 1
+        cols = 2
+        left = Inches(0.5)
+        top = Inches(1.8)
+        width = prs.slide_width - Inches(1.0)
+        height = prs.slide_height - Inches(2.6)
+        table = slide.shapes.add_table(rows, cols, left, top, width, height).table
+        table.cell(0, 0).text = "項目"
+        table.cell(0, 1).text = "値"
+        for r, (k, v) in enumerate(chunk, start=1):
+            table.cell(r, 0).text = str(k)
+            table.cell(r, 1).text = str(v)
+        # 文字サイズと折返し
+        for r in range(rows):
+            for c in range(cols):
+                cell = table.cell(r, c)
+                tf = cell.text_frame
+                tf.word_wrap = True
+                for p in tf.paragraphs:
+                    for run in p.runs:
+                        run.font.size = Pt(12)
 def _chart_slide(prs, title: str, series: List[tuple]):
+    # 日本語フォント設定
+    set_jp_font()
+    # ラベル整形（改行＋省略）
+    raw_labels = [str(x[0]) for x in series]
+    labels = [wrap_label(lbl, width=6, max_lines=2) for lbl in raw_labels]
+    values = [float(x[1]) for x in series]
+    # ラベル長に応じて図の高さと下余白を調整
+    max_label_len = max((len(l) for l in raw_labels), default=0)
+    base_h = 4.2
+    fig_h = max(4.0, min(7.0, base_h + 0.10 * max_label_len))  # 4.0〜7.0 inch
+    bottom_margin = min(0.35, 0.18 + 0.012 * max_label_len)
+    fig = plt.figure(figsize=(8, fig_h))
+    ax = fig.add_subplot(111)
+    ax.bar(range(len(values)), values)
+    ax.set_xticks(range(len(labels)))
+    ax.set_xticklabels(labels, rotation=0, ha='center')
+    fig.subplots_adjust(bottom=bottom_margin, left=0.10, right=0.98, top=0.90)
+    ax.set_title(title)
     buf = io.BytesIO()
+    fig.savefig(buf, format='png', dpi=200, bbox_inches='tight')
     plt.close(fig)
     buf.seek(0)
+    # 画像はアスペクト維持で幅フィット（高さは自動比率）
+    slide = prs.slides.add_slide(prs.slide_layouts[5])  # Title Only
+    slide.shapes.title.text = title
     left = Inches(0.5)
     top = Inches(1.6)
     width = prs.slide_width - Inches(1.0)
+    slide.shapes.add_picture(buf, left, top, width=width)  # heightは指定しない（比率維持）
 def _add_footer(prs, theme_rgb):
     for idx, slide in enumerate(prs.slides, start=1):
     _add_footer(prs, theme_rgb)
     prs.save(output_path)
+# ======================================================
 # Gradio App
+# ======================================================
 def generate_pptx(long_text: str,
                   title: str,
                   theme_hex: str,
         **Tips**
         - 日本語要約には `sonoisa/t5-base-japanese` を推奨（`text2text-generation`）。
         - Inference API を使う場合は、Space の Secrets に `HF_TOKEN` を設定してください。
+        - チャートは `ラベル: 数値` 形式の行を自動検出して棒グラフを作成します。
         """)
     return demo