Spaces:

Armaggheddon
/

yolo11-document-layout

Running

App Files Files Community

Armaggheddon commited on Sep 29, 2025

Commit

81fc526

1 Parent(s): 3d862ee

Initial commit

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +8 -5
app.py +111 -0
requirements.txt +4 -0
samples/image1.png +3 -0
samples/image2.png +3 -0
samples/image3.png +3 -0
samples/image4.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,17 @@
 ---
 title: Yolo11 Document Layout
-emoji: 🐨
-colorFrom: red
-colorTo: gray
 sdk: gradio
 sdk_version: 5.47.2
 app_file: app.py
 pinned: false
-license: mit
-short_description: Detect document layout elements in documents (PDF or Images)
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Yolo11 Document Layout
+emoji: 📄
+short_description: Detect document layout elements in documents (PDF or Images)
+colorFrom: purple
+colorTo: purple
 sdk: gradio
 sdk_version: 5.47.2
 app_file: app.py
 pinned: false
+preload_from_hub:
+    - Armaggheddon/yolo11-document-layout yolo11n_doc_layout.pt,yolo11s_doc_layout.pt,yolo11m_doc_layout.pt
+models:
+    - Armaggheddon/yolo11-document-layout
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from pathlib import Path
+import gradio as gr
+import pymupdf
+from ultralytics import YOLO
+from PIL import Image
+from huggingface_hub import hf_hub_download
+SAMPLES = Path(__file__).parent / "samples"
+IMAGE_SAMPLES = [
+    SAMPLES / "image1.png",
+    SAMPLES / "image2.png",
+    SAMPLES / "image3.png",
+    SAMPLES / "image4.png",
+]
+AVAILABLE_MODELS = {
+    "yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"),
+    "yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"),
+    "yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"),
+}
+current_model = "yolo11n"
+model = None
+def load_model(selected_model):
+    global model
+    if model is None or current_model != selected_model:
+        repo_id, filename = AVAILABLE_MODELS[selected_model]
+        model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        model = YOLO(model_path)
+def model_runner(image, conf=0.25, iou=0.45):
+    result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280)
+    result_img = result[0].plot()
+    return result_img
+def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45):
+    if pdf_input is None and image_input is None:
+        return gr.Error("Please upload a PDF or an image file.")
+    load_model(selected_model)
+    pages = []
+    if pdf_input is not None and pdf_input.endswith(".pdf"):
+        doc = pymupdf.open(pdf_input)
+        for page in doc:
+            pix = page.get_pixmap(dpi=200) # if A4 should result in above 1400px width
+            pil_img = pix.pil_image()
+            result_img = model_runner(pil_img)
+            pages.append(result_img)
+    elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")):
+        image = image_input
+        result_img = model_runner(image)
+        pages.append(result_img)
+    else:
+        return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.")
+    return ((page, f"Page {i+1}") for i, page in enumerate(pages))
+with gr.Blocks() as demo:
+    gr.Markdown("# YOLO11 Document Layout 🔎📄")
+    gr.Markdown(
+"""
+Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library.
+Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements.
+- Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout)
+- More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout)
+"""
+    )
+    with gr.Row():
+        with gr.Column():
+            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single")
+            image_input = gr.Image(label="Upload Image", type="filepath")
+            clear_button = gr.Button("Clear")
+            run_button = gr.Button("Run", variant="primary")
+        with gr.Column():
+            outputs = gr.Gallery(label="Output Image")
+            with gr.Group():
+                model_name = gr.Dropdown(
+                    list(AVAILABLE_MODELS.keys()),
+                    value="yolo11n",
+                    label="Model size",
+                )
+                conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold")
+                iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold")
+    examples = gr.Examples(
+        examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES],
+        inputs=[image_input, model_name],
+        cache_examples=False,
+        fn=process_input,
+        outputs=outputs,
+    )
+    run_button.click(
+        fn=process_input,
+        inputs=[model_name, pdf_input, image_input, conf, iou],
+        outputs=outputs,
+    )
+    clear_button.click(
+        fn=lambda: (None, None, None),
+        inputs=[],
+        outputs=[pdf_input, image_input, outputs],
+    )
+demo.launch()