Armaggheddon commited on
Commit
81fc526
Β·
1 Parent(s): 3d862ee

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,17 @@
1
  ---
2
  title: Yolo11 Document Layout
3
- emoji: 🐨
4
- colorFrom: red
5
- colorTo: gray
 
6
  sdk: gradio
7
  sdk_version: 5.47.2
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: Detect document layout elements in documents (PDF or Images)
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Yolo11 Document Layout
3
+ emoji: πŸ“„
4
+ short_description: Detect document layout elements in documents (PDF or Images)
5
+ colorFrom: purple
6
+ colorTo: purple
7
  sdk: gradio
8
  sdk_version: 5.47.2
9
  app_file: app.py
10
  pinned: false
11
+ preload_from_hub:
12
+ - Armaggheddon/yolo11-document-layout yolo11n_doc_layout.pt,yolo11s_doc_layout.pt,yolo11m_doc_layout.pt
13
+ models:
14
+ - Armaggheddon/yolo11-document-layout
15
  ---
16
 
17
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import gradio as gr
3
+ import pymupdf
4
+ from ultralytics import YOLO
5
+ from PIL import Image
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ SAMPLES = Path(__file__).parent / "samples"
9
+
10
+ IMAGE_SAMPLES = [
11
+ SAMPLES / "image1.png",
12
+ SAMPLES / "image2.png",
13
+ SAMPLES / "image3.png",
14
+ SAMPLES / "image4.png",
15
+ ]
16
+
17
+ AVAILABLE_MODELS = {
18
+ "yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"),
19
+ "yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"),
20
+ "yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"),
21
+ }
22
+ current_model = "yolo11n"
23
+ model = None
24
+
25
+ def load_model(selected_model):
26
+ global model
27
+ if model is None or current_model != selected_model:
28
+ repo_id, filename = AVAILABLE_MODELS[selected_model]
29
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename)
30
+ model = YOLO(model_path)
31
+
32
+ def model_runner(image, conf=0.25, iou=0.45):
33
+ result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280)
34
+ result_img = result[0].plot()
35
+ return result_img
36
+
37
+ def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45):
38
+ if pdf_input is None and image_input is None:
39
+ return gr.Error("Please upload a PDF or an image file.")
40
+
41
+ load_model(selected_model)
42
+ pages = []
43
+ if pdf_input is not None and pdf_input.endswith(".pdf"):
44
+ doc = pymupdf.open(pdf_input)
45
+ for page in doc:
46
+ pix = page.get_pixmap(dpi=200) # if A4 should result in above 1400px width
47
+ pil_img = pix.pil_image()
48
+ result_img = model_runner(pil_img)
49
+ pages.append(result_img)
50
+ elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")):
51
+ image = image_input
52
+ result_img = model_runner(image)
53
+ pages.append(result_img)
54
+
55
+ else:
56
+ return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.")
57
+
58
+ return ((page, f"Page {i+1}") for i, page in enumerate(pages))
59
+
60
+ with gr.Blocks() as demo:
61
+ gr.Markdown("# YOLO11 Document Layout πŸ”ŽπŸ“„")
62
+ gr.Markdown(
63
+ """
64
+ Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library.
65
+ Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements.
66
+ - Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout)
67
+ - More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout)
68
+ """
69
+ )
70
+ with gr.Row():
71
+ with gr.Column():
72
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single")
73
+ image_input = gr.Image(label="Upload Image", type="filepath")
74
+ clear_button = gr.Button("Clear")
75
+ run_button = gr.Button("Run", variant="primary")
76
+ with gr.Column():
77
+ outputs = gr.Gallery(label="Output Image")
78
+ with gr.Group():
79
+ model_name = gr.Dropdown(
80
+ list(AVAILABLE_MODELS.keys()),
81
+ value="yolo11n",
82
+ label="Model size",
83
+ )
84
+ conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold")
85
+ iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold")
86
+
87
+ examples = gr.Examples(
88
+ examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES],
89
+ inputs=[image_input, model_name],
90
+ cache_examples=False,
91
+ fn=process_input,
92
+ outputs=outputs,
93
+ )
94
+
95
+ run_button.click(
96
+ fn=process_input,
97
+ inputs=[model_name, pdf_input, image_input, conf, iou],
98
+ outputs=outputs,
99
+ )
100
+
101
+ clear_button.click(
102
+ fn=lambda: (None, None, None),
103
+ inputs=[],
104
+ outputs=[pdf_input, image_input, outputs],
105
+ )
106
+
107
+ demo.launch()
108
+
109
+
110
+
111
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ultralytics
2
+ pymupdf
3
+ pillow
4
+ gradio
samples/image1.png ADDED

Git LFS Details

  • SHA256: c74e536ff7027f785cb907426998551eafab9dfc5b9daf8fc3fc99d56813af06
  • Pointer size: 131 Bytes
  • Size of remote file: 278 kB
samples/image2.png ADDED

Git LFS Details

  • SHA256: a191e19194839e4f2b0f8d8fc525a002c28b8b8a1b1caea9e96d15d85c261667
  • Pointer size: 131 Bytes
  • Size of remote file: 199 kB
samples/image3.png ADDED

Git LFS Details

  • SHA256: fd814bb17d25b99a876ad23b8c6b2af5e6378e6e49b0726d825624149241e6c3
  • Pointer size: 131 Bytes
  • Size of remote file: 263 kB
samples/image4.png ADDED

Git LFS Details

  • SHA256: bdc9cd81f4b54dfb8e81ab012be8cd1f550f7abf03bb1d00ededc96138f7665a
  • Pointer size: 131 Bytes
  • Size of remote file: 282 kB