saneowl commited on
Commit
0753607
·
verified ·
1 Parent(s): 5b43897

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import zipfile
4
+ import tempfile
5
+ import shutil
6
+
7
+ CHUNK_SIZE = 20 * 1024 * 1024 # 20 MB
8
+
9
+
10
+ def split_zip_streaming(input_zip):
11
+ base_name = os.path.splitext(os.path.basename(input_zip))[0]
12
+
13
+ with tempfile.TemporaryDirectory() as temp_dir:
14
+ parts_dir = os.path.join(temp_dir, "parts")
15
+ os.makedirs(parts_dir, exist_ok=True)
16
+
17
+ part_num = 1
18
+ current_size = 0
19
+ current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip")
20
+ current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED)
21
+
22
+ with zipfile.ZipFile(input_zip, "r") as zin:
23
+ for info in zin.infolist():
24
+ file_size = info.file_size
25
+
26
+ # Case 1: big file -> standalone, unzipped
27
+ if file_size > CHUNK_SIZE:
28
+ filename = os.path.basename(info.filename)
29
+ standalone_path = os.path.join(parts_dir, f"{filename}")
30
+ with zin.open(info, "r") as src, open(standalone_path, "wb") as dst:
31
+ shutil.copyfileobj(src, dst)
32
+ continue
33
+
34
+ # Case 2: fits into a zip part
35
+ if current_size + file_size > CHUNK_SIZE and current_size > 0:
36
+ current_zip.close()
37
+ part_num += 1
38
+ current_size = 0
39
+ current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip")
40
+ current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED)
41
+
42
+ # Stream copy into the current part zip
43
+ data = zin.read(info.filename)
44
+ current_zip.writestr(info.filename, data)
45
+ current_size += file_size
46
+
47
+ current_zip.close()
48
+
49
+ # Bundle all parts + standalone files into one final zip
50
+ final_zip_path = f"{base_name}_split_output.zip"
51
+ with zipfile.ZipFile(final_zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zout:
52
+ for file in os.listdir(parts_dir):
53
+ zout.write(os.path.join(parts_dir, file), arcname=file)
54
+
55
+ return final_zip_path
56
+
57
+
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("# 📦 Large ZIP Splitter (Streaming)\nUpload a large `.zip` file and get back a single archive containing ~20MB zips and any oversized files as standalone.")
60
+
61
+ with gr.Row():
62
+ inp = gr.File(label="Upload ZIP file", type="filepath")
63
+ out = gr.File(label="Download Split Archive")
64
+
65
+ btn = gr.Button("Process & Split")
66
+ btn.click(fn=split_zip_streaming, inputs=inp, outputs=out)
67
+
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch()