import gradio as gr import os import zipfile import tempfile import shutil CHUNK_SIZE = 20 * 1024 * 1024 # 20 MB def split_zip_streaming(input_zip): base_name = os.path.splitext(os.path.basename(input_zip))[0] with tempfile.TemporaryDirectory() as temp_dir: parts_dir = os.path.join(temp_dir, "parts") os.makedirs(parts_dir, exist_ok=True) part_num = 1 current_size = 0 current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip") current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED) with zipfile.ZipFile(input_zip, "r") as zin: for info in zin.infolist(): file_size = info.file_size # Case 1: big file -> standalone, unzipped if file_size > CHUNK_SIZE: filename = os.path.basename(info.filename) standalone_path = os.path.join(parts_dir, f"{filename}") with zin.open(info, "r") as src, open(standalone_path, "wb") as dst: shutil.copyfileobj(src, dst) continue # Case 2: fits into a zip part if current_size + file_size > CHUNK_SIZE and current_size > 0: current_zip.close() part_num += 1 current_size = 0 current_zip_path = os.path.join(parts_dir, f"{base_name}_part{part_num}.zip") current_zip = zipfile.ZipFile(current_zip_path, "w", compression=zipfile.ZIP_DEFLATED) # Stream copy into the current part zip data = zin.read(info.filename) current_zip.writestr(info.filename, data) current_size += file_size current_zip.close() # Bundle all parts + standalone files into one final zip final_zip_path = f"{base_name}_split_output.zip" with zipfile.ZipFile(final_zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zout: for file in os.listdir(parts_dir): zout.write(os.path.join(parts_dir, file), arcname=file) return final_zip_path with gr.Blocks() as demo: gr.Markdown("# 📦 Large ZIP Splitter (Streaming)\nUpload a large `.zip` file and get back a single archive containing ~20MB zips and any oversized files as standalone.") with gr.Row(): inp = gr.File(label="Upload ZIP file", type="filepath") out = gr.File(label="Download Split Archive") btn = gr.Button("Process & Split") btn.click(fn=split_zip_streaming, inputs=inp, outputs=out) if __name__ == "__main__": demo.launch()