zip-splitter / app.py
saneowl's picture
Create app.py
0753607 verified
import gradio as gr
import os
import zipfile
import tempfile
import shutil
CHUNK_SIZE = 20 * 1024 * 1024 # 20 MB
def split_zip_streaming(input_zip):
base_name = os.path.splitext(os.path.basename(input_zip))[0]
with tempfile.TemporaryDirectory() as temp_dir:
parts_dir = os.path.join(temp_dir, "parts")
os.makedirs(parts_dir, exist_ok=True)
part_num = 1
current_size = 0
current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip")
current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED)
with zipfile.ZipFile(input_zip, "r") as zin:
for info in zin.infolist():
file_size = info.file_size
# Case 1: big file -> standalone, unzipped
if file_size > CHUNK_SIZE:
filename = os.path.basename(info.filename)
standalone_path = os.path.join(parts_dir, f"{filename}")
with zin.open(info, "r") as src, open(standalone_path, "wb") as dst:
shutil.copyfileobj(src, dst)
continue
# Case 2: fits into a zip part
if current_size + file_size > CHUNK_SIZE and current_size > 0:
current_zip.close()
part_num += 1
current_size = 0
current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip")
current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED)
# Stream copy into the current part zip
data = zin.read(info.filename)
current_zip.writestr(info.filename, data)
current_size += file_size
current_zip.close()
# Bundle all parts + standalone files into one final zip
final_zip_path = f"{base_name}_split_output.zip"
with zipfile.ZipFile(final_zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zout:
for file in os.listdir(parts_dir):
zout.write(os.path.join(parts_dir, file), arcname=file)
return final_zip_path
with gr.Blocks() as demo:
gr.Markdown("# πŸ“¦ Large ZIP Splitter (Streaming)\nUpload a large `.zip` file and get back a single archive containing ~20MB zips and any oversized files as standalone.")
with gr.Row():
inp = gr.File(label="Upload ZIP file", type="filepath")
out = gr.File(label="Download Split Archive")
btn = gr.Button("Process & Split")
btn.click(fn=split_zip_streaming, inputs=inp, outputs=out)
if __name__ == "__main__":
demo.launch()