Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +28 -12
- app.py +103 -0
- requirements.txt +11 -0
- runtime.txt +1 -0
README.md
CHANGED
|
@@ -1,12 +1,28 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Auto-PPT Generator (Hugging Face Space)
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
End-to-end pipeline: Long text → Summary → Sectioning → Bullets/Tables/Charts → PPTX export, with theme color and logo.
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
## Run on a Space
|
| 8 |
+
1. Create a Gradio Space and upload all files.
|
| 9 |
+
2. (Optional) **Settings → Variables & secrets**: add `HF_TOKEN` if you will use the Inference API.
|
| 10 |
+
3. Click **Run**.
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
## Models
|
| 14 |
+
- **Local CPU (English):** `sshleifer/distilbart-cnn-12-6` (summarization)
|
| 15 |
+
- **Local CPU (Japanese):** `sonoisa/t5-base-japanese` (use `text2text-generation` path with `要約:` prefix)
|
| 16 |
+
- **Inference API:** any instruct/summarization model you have access to (e.g., `Qwen/Qwen2-7B-Instruct`, `elyza/ELYZA-japanese-Llama-2-7b-fast-instruct`).
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
## Input conventions
|
| 20 |
+
- **Tables:** Provide lines like `項目: 値` under a section to auto-build a 2-column table.
|
| 21 |
+
- **Charts:** Provide lines like `ラベル: 123` (numbers) to auto-build a bar chart.
|
| 22 |
+
- **Bullets:** Lines starting with `-`, `*`, `・`, or numbered lists are detected automatically.
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
## Notes
|
| 26 |
+
- Slide numbers are approximated (python-pptx lacks true auto-numbering fields).
|
| 27 |
+
- For corporate fonts, pre-install them or post-process the PPTX if required.
|
| 28 |
+
- For very large texts, we truncate per-model token limits but the rule-based extractors remain robust.
|
app.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import io
|
| 3 |
+
import time
|
| 4 |
+
import gradio as gr
|
| 5 |
+
from modules.text_processing import process_text
|
| 6 |
+
from modules.pptx_builder import build_presentation
|
| 7 |
+
from modules.utils import safe_hex_to_rgb, ensure_tmpdir
|
| 8 |
+
|
| 9 |
+
APP_NAME = "Auto-PPT Generator"
|
| 10 |
+
|
| 11 |
+
def generate_pptx(long_text: str,
|
| 12 |
+
title: str,
|
| 13 |
+
theme_hex: str,
|
| 14 |
+
add_summary: bool,
|
| 15 |
+
add_charts: bool,
|
| 16 |
+
use_inference_api: bool,
|
| 17 |
+
summarize_model: str,
|
| 18 |
+
generator_model: str,
|
| 19 |
+
max_summary_words: int,):
|
| 20 |
+
if not long_text or not long_text.strip():
|
| 21 |
+
raise gr.Error("入力テキストが空です。長文を貼り付けてください。")
|
| 22 |
+
|
| 23 |
+
theme_rab = safe_hex_to_rgb(theme_hex or "#3B82F6")
|
| 24 |
+
|
| 25 |
+
# Read logo (optional)
|
| 26 |
+
logo_bytes = None
|
| 27 |
+
if logo_file is not None:
|
| 28 |
+
logo_bytes = logo_file.read()
|
| 29 |
+
|
| 30 |
+
# Step 1-3: NLP pipeline(summary, sections, bullets, tables , chart data)
|
| 31 |
+
results = process_text(
|
| 32 |
+
text=long_text,
|
| 33 |
+
use_inference_api=use_inference_api,
|
| 34 |
+
summarize_model=summarize_model,
|
| 35 |
+
generator_model=generator_model,
|
| 36 |
+
want_summary=add_summary,
|
| 37 |
+
want_tables=add_tables,
|
| 38 |
+
want_charts=add_charts,
|
| 39 |
+
max_summary_words=max_summary_words,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Step 4: Build PPTX
|
| 43 |
+
ensure_tmpdir()
|
| 44 |
+
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 45 |
+
out_path = f"/tmp/auto_ppt_{timestamp}.pptx"
|
| 46 |
+
|
| 47 |
+
build_presentation(
|
| 48 |
+
output_path=out_path,
|
| 49 |
+
title=title or "Auto-PPT"
|
| 50 |
+
theme_rgb=theme_rab,
|
| 51 |
+
logo_bytes=logo_bytes,
|
| 52 |
+
executive_summary=result.get("summary"),
|
| 53 |
+
sections=result.get("sections", []),
|
| 54 |
+
bullets_by_section=result.get("bullets", {}),
|
| 55 |
+
tables=result.get("tables", []),
|
| 56 |
+
charts=result.get("charts", []),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Return file path for download
|
| 60 |
+
return out_path
|
| 61 |
+
|
| 62 |
+
def ui():
|
| 63 |
+
with gr.Blocks(title=APP_NAME) as demo:
|
| 64 |
+
gr.Markdown(f"# {APP_NAME}\n長文→要約→セクション分割→箇条書き/表/図→**PPTX出力**まで自動化")
|
| 65 |
+
with gr.Row():
|
| 66 |
+
with gr.Column(scale=2):
|
| 67 |
+
long_text = gr.Textbox(label="長文テキスト(貼り付け)", lines=20, placeholder="ここに長文テキストを貼り付けてください...")
|
| 68 |
+
title = gr.Textbox(label="タイトル", value="自動生成スライド")
|
| 69 |
+
theme_hex = gr.ColorPicker(label="テーマカラー", value="#3B82F6")
|
| 70 |
+
logo = gr.File(label="ロゴ画像(任意)")
|
| 71 |
+
with gr.Row():
|
| 72 |
+
add_summary = gr.Checkbox(label="要約スライドを追加", value=True)
|
| 73 |
+
add_tables = gr.Checkbox(label="表を検出して追加", value=True)
|
| 74 |
+
add_charts = gr.Checkbox(label="チャートを生成して追加", value=True)
|
| 75 |
+
with gr.Column(scale=1):
|
| 76 |
+
use_inference_api = gr.Checkbox(label="Hugging Face Inference APIを使用", value=False)
|
| 77 |
+
summarize_api = gr.Textbox(label="要約モデル名(local or API)", value="sshleifer/distilbart-cnn-12-6")
|
| 78 |
+
generator_model = gr.Textbox(label="生成モデル(API推奨,任意)", value="")
|
| 79 |
+
max_summary_words = gr.Slider(label="要約の最大単語数", 50, 600, value=200, step=10)
|
| 80 |
+
generate = gr.Button("PPTXを生成", variant="primary")
|
| 81 |
+
output_file = gr.File(label="ダウンロード")
|
| 82 |
+
|
| 83 |
+
generate.click(
|
| 84 |
+
fn=generate_pptx,
|
| 85 |
+
inputs=[
|
| 86 |
+
long_text, title, theme_hex, add_summary, add_charts,add_tables,
|
| 87 |
+
use_inference_api, summarize_api, generator_model, max_summary_words, summarizer_model,
|
| 88 |
+
],
|
| 89 |
+
outputs=output_file
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
gr.Markdown("""
|
| 93 |
+
**Tips**
|
| 94 |
+
- 日本語要約には`sonoisa/t5-base-japanese`を推奨('text2text-generation').
|
| 95 |
+
- Inference API を使う場合は、 Spaceの Secret に `HF_TOKEN` を設定してください。
|
| 96 |
+
- チャートは'Label: 123' 形式の行を自動検出して棒グラフを作成します。
|
| 97 |
+
""")
|
| 98 |
+
|
| 99 |
+
return demo
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
app = ui()
|
| 103 |
+
app.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
+
transformers>=4.44.0
|
| 3 |
+
sentencepiece>=0.1.99
|
| 4 |
+
accelerate>=0.33.0
|
| 5 |
+
torch>=2.2.0
|
| 6 |
+
python-pptx>=0.6.23
|
| 7 |
+
matplotlib>=3.8.4
|
| 8 |
+
pillow>=10.2.0
|
| 9 |
+
pandas>=2.2.2
|
| 10 |
+
numpy>=1.26.4
|
| 11 |
+
requests>=2.31.0
|
runtime.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python-3.10
|