Spaces:

akazemian
/

audio-library

Sleeping

App Files Files Community

akazemian commited on Sep 19

Commit

6282cbe

verified ·

1 Parent(s): b3908ac

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +266 -61

app.py CHANGED Viewed

@@ -13,15 +13,16 @@ INDEX_FILENAME  = "index.csv"
 DB_PATH = "library.csv"
 ALLOWED_EXTS = {".html"}
-# Columns in DB (no model_name)
-EXTRA_COLS = ["category", "dataset"]
 BASE_COLS  = ["id","filename","path","tags","keywords","notes","uploaded_at"]
 ALL_DB_COLS = BASE_COLS + EXTRA_COLS
-# Columns shown in the table (no model_name)
 TABLE_COLS = ["id","filename","category","dataset",
               "tags","keywords","notes","uploaded_at"]
 # ---------- DB helpers ----------
 def _load_db() -> pd.DataFrame:
     if os.path.exists(DB_PATH):
@@ -29,11 +30,19 @@ def _load_db() -> pd.DataFrame:
         for c in ALL_DB_COLS:
             if c not in df.columns:
                 df[c] = ""
-        for c in ["tags","keywords","notes","category","dataset"]:
             df[c] = df[c].fillna("").astype(str)
         return df[ALL_DB_COLS]
     return pd.DataFrame(columns=ALL_DB_COLS)
 def _save_db(df: pd.DataFrame):
     df.to_csv(DB_PATH, index=False)
@@ -81,52 +90,114 @@ def _load_hf_index() -> pd.DataFrame:
 # ---------- Sync by model (prefix inside HF dataset) ----------
 def sync_model(model_name: str):
-    """
-    Load index.csv from HF, add rows for the selected model (by relpath prefix),
-    store HF URIs in DB, and show only that model’s files.
-    """
     model_name = (model_name or "").strip()
     if not model_name:
-        return gr.Info("Please enter a model name."), None, None, None, ""
     try:
         idx = _load_hf_index()
     except Exception as e:
         traceback.print_exc()
-        return gr.Info(f"Failed to load index from HF: {e}"), None, None, None, ""
-    # rows like "{model_name}/.../file.html"
-    subset = idx[idx["relpath"].str.startswith(model_name + "/")]
-    if subset.empty:
-        return gr.Info(f"No HTML files found for model '{model_name}' on {HF_DATASET_REPO}"), None, None, None, ""
-    df = _load_db()
     now = datetime.datetime.now().isoformat(timespec="seconds")
     new_rows = []
-    for _, r in subset.iterrows():
-        relpath = r["relpath"]
-        hub_uri = f"hf://{HF_DATASET_REPO}/{relpath}"
-        if (df["path"] == hub_uri).any():
             continue
         new_rows.append({
-            "id": r["id"] if r["id"] else uuid.uuid4().hex[:8],
             "filename": r["filename"],
-            "path": hub_uri,                        # store HF URI
             "tags": r["tags"],
             "keywords": r["keywords"],
             "notes": r["notes"],
             "uploaded_at": r["uploaded_at"] or now,
             "category": r["category"],
-            "dataset": r["dataset"]
         })
     if new_rows:
-        df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
-        _save_db(df)
-    current_model = model_name  # remember which model prefix is active
-    return refresh_view("", [], "", "", current_model) + (current_model,)
 # ---------- Search / filters ----------
 def refresh_view(query, tag_filters, category_filter, dataset_filter, current_model):
@@ -184,30 +255,14 @@ def refresh_view(query, tag_filters, category_filter, dataset_filter, current_mo
 def _iframe_from_html_string(raw_html: str, height_px: int = 720) -> str:
     srcdoc = raw_html.replace("&", "&amp;").replace('"', "&quot;")
     return f'<iframe style="width:100%;height:{height_px}px;border:1px solid #ddd;border-radius:8px;" srcdoc="{srcdoc}"></iframe>'
-def select_row(evt: gr.SelectData, table_value):
     try:
         view = _df_from_table_value(table_value)
         if view.empty:
             return "<em>No rows.</em>", ""
-        # resolve row
-        row_idx = None
-        ix = getattr(evt, "index", None)
-        if isinstance(ix, int):
-            row_idx = ix
-        elif isinstance(ix, (list, tuple)) and ix and isinstance(ix[0], int):
-            row_idx = ix[0]
-        if row_idx is None:
-            val = getattr(evt, "value", None)
-            if isinstance(val, dict) and "id" in val:
-                hits = view.index[view["id"] == val["id"]].tolist()
-                if hits: row_idx = hits[0]
-            elif isinstance(val, list) and len(val) >= 1:
-                hits = view.index[view["id"] == val[0]].tolist()
-                if hits: row_idx = hits[0]
-        if row_idx is None or not (0 <= row_idx < len(view)):
-            return "<em>Invalid selection.</em>", ""
         row = view.iloc[row_idx]
         sel_id = row["id"]
@@ -216,31 +271,93 @@ def select_row(evt: gr.SelectData, table_value):
         if rec.empty:
             return "<em>Could not find file for this row.</em>", ""
-        path_str = rec["path"].values[0]
-        # Hub-backed path → lazy download
-        if str(path_str).startswith("hf://"):
             _, rest = path_str.split("hf://", 1)
             repo_id, relpath = rest.split("/", 1)
             local_path = hf_hub_download(repo_id=repo_id, repo_type="dataset", filename=relpath)
             raw_html = Path(local_path).read_text(encoding="utf-8")
-        elif str(path_str).startswith("http"):
-            # if you ever swap to CDN URLs, iframe the URL directly
             iframe = f'<iframe style="width:100%;height:720px;border:1px solid #ddd;border-radius:8px;" src="{_py_html.escape(path_str)}"></iframe>'
             return iframe, f"📄 {row['filename']}"
-        else:
-            # local file fallback (not used for HF flow, kept for compatibility)
-            p = Path(path_str)
-            if not p.exists():
-                return f"<em>File not found:</em> <code>{_py_html.escape(str(p))}</code>", f"📄 {row['filename']}"
-            raw_html = p.read_text(encoding="utf-8")
         iframe = _iframe_from_html_string(raw_html, height_px=720)
         return iframe, f"📄 {row['filename']}"
     except Exception as e:
         traceback.print_exc()
         return f"<pre>Failed to render (see terminal):\n{_py_html.escape(str(e))}</pre>", ""
 # ---------- Save edits ----------
 def save_edits(edited_table, current_model):
     if edited_table is None or not len(edited_table):
@@ -298,9 +415,82 @@ custom_css = """
 }
 """
 with gr.Blocks(title="Audio HTML Library", css=custom_css) as demo:
     gr.Markdown("## 🎧 Audio Reconstruction Reports — sync • search • view")
     current_model = gr.State("")  # remembers active model prefix inside HF repo
     with gr.Row():
         with gr.Column(scale=1):
@@ -315,6 +505,15 @@ with gr.Blocks(title="Audio HTML Library", css=custom_css) as demo:
             tag_filter = gr.CheckboxGroup(choices=[], label="Filter by tags (AND)")
             category_filter = gr.Dropdown(choices=[], label="Category")
             dataset_filter  = gr.Dropdown(choices=[], label="Dataset")
             refresh_btn = gr.Button("Refresh", elem_id="refresh-btn")
         with gr.Column(scale=2):
@@ -348,14 +547,20 @@ with gr.Blocks(title="Audio HTML Library", css=custom_css) as demo:
         [table, tag_filter, category_filter, dataset_filter, count_md]
     )
-    for comp in (query, tag_filter, category_filter, dataset_filter):
         comp.change(
             refresh_view,
             [query, tag_filter, category_filter, dataset_filter, current_model],
             [table, tag_filter, category_filter, dataset_filter, count_md]
         )
-    table.select(select_row, [table], [preview_html, preview_label])
     save_btn.click(save_edits, [table, current_model], [table])
     # initial load (no model yet)

 DB_PATH = "library.csv"
 ALLOWED_EXTS = {".html"}
+# Columns in DB
+EXTRA_COLS = ["category", "dataset", "hf_path"]   # <-- add hf_path here
 BASE_COLS  = ["id","filename","path","tags","keywords","notes","uploaded_at"]
 ALL_DB_COLS = BASE_COLS + EXTRA_COLS
+# Columns shown in the table (don't show hf_path)
 TABLE_COLS = ["id","filename","category","dataset",
               "tags","keywords","notes","uploaded_at"]
 # ---------- DB helpers ----------
 def _load_db() -> pd.DataFrame:
     if os.path.exists(DB_PATH):
         for c in ALL_DB_COLS:
             if c not in df.columns:
                 df[c] = ""
+        for c in ["tags","keywords","notes","category","dataset","hf_path","path","filename","id","uploaded_at"]:
             df[c] = df[c].fillna("").astype(str)
         return df[ALL_DB_COLS]
     return pd.DataFrame(columns=ALL_DB_COLS)
+def _load_hf_index() -> pd.DataFrame:
+    p = hf_hub_download(repo_id=HF_DATASET_REPO, repo_type="dataset", filename=INDEX_FILENAME)
+    df = pd.read_csv(p)
+    for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
+        if c not in df.columns:
+            df[c] = ""
+    return df.fillna("")
 def _save_db(df: pd.DataFrame):
     df.to_csv(DB_PATH, index=False)
 # ---------- Sync by model (prefix inside HF dataset) ----------
 def sync_model(model_name: str):
     model_name = (model_name or "").strip()
     if not model_name:
+        return gr.Info("Please enter a model name."), None, None, None, "", ""
+    # 1) read index from HF and filter to this model prefix
     try:
         idx = _load_hf_index()
     except Exception as e:
         traceback.print_exc()
+        return gr.Info(f"Failed to load index from HF: {e}"), None, None, None, "", ""
+    sub = idx[idx["relpath"].astype(str).str.startswith(f"{model_name}/")]
+    if sub.empty:
+        return gr.Info(f"No HTML files found for model '{model_name}' on {HF_DATASET_REPO}"), None, None, None, "", ""
+    # 2) load local DB, backfill hf_path for existing rows of this model
+    db = _load_db()
+    # any existing rows whose filename appears in this model → rewrite hf_path
+    if not db.empty:
+        # Build map: filename -> relpath (works if filenames are unique per model)
+        rel_by_fname = dict(zip(sub["filename"].astype(str), sub["relpath"].astype(str)))
+        mask_model_rows = db["filename"].isin(rel_by_fname.keys())
+        db.loc[mask_model_rows, "hf_path"] = db.loc[mask_model_rows, "filename"].map(
+            lambda fn: f"hf://{HF_DATASET_REPO}/{rel_by_fname.get(fn, fn)}"
+        )
+    # 3) add any missing rows from HF index
     now = datetime.datetime.now().isoformat(timespec="seconds")
     new_rows = []
+    existing_hf = set(db["hf_path"].astype(str))
+    for _, r in sub.iterrows():
+        hf_uri = f"hf://{HF_DATASET_REPO}/{r['relpath']}"
+        if hf_uri in existing_hf:
+            continue
+        # try to find by filename match in local path rows
+        if not db[db["filename"] == r["filename"]].empty:
+            # row exists, just set hf_path (done above), skip adding duplicate line
             continue
         new_rows.append({
+            "id": (r["id"] or uuid.uuid4().hex[:8]),
             "filename": r["filename"],
+            "path": "",                         # no local path known
+            "hf_path": hf_uri,                  # <-- set hf path
             "tags": r["tags"],
             "keywords": r["keywords"],
             "notes": r["notes"],
             "uploaded_at": r["uploaded_at"] or now,
             "category": r["category"],
+            "dataset": r["dataset"],
         })
     if new_rows:
+        db = pd.concat([db, pd.DataFrame(new_rows)], ignore_index=True)
+        _save_db(db)
+    else:
+        _save_db(db)  # still write back if we updated hf_path on existing rows
+    current_model = model_name
+    return refresh_view("", [], "", "", current_model) + (current_model, "HF")
+# def sync_model(model_name: str):
+#     """
+#     Load index.csv from HF, add rows for the selected model (by relpath prefix),
+#     store HF URIs in DB, and show only that model’s files.
+#     """
+#     model_name = (model_name or "").strip()
+#     if not model_name:
+#         return gr.Info("Please enter a model name."), None, None, None, ""
+#     try:
+#         idx = _load_hf_index()
+    # except Exception as e:
+    #     traceback.print_exc()
+    #     return gr.Info(f"Failed to load index from HF: {e}"), None, None, None, ""
+    # # rows like "{model_name}/.../file.html"
+    # subset = idx[idx["relpath"].str.startswith(model_name + "/")]
+    # if subset.empty:
+    #     return gr.Info(f"No HTML files found for model '{model_name}' on {HF_DATASET_REPO}"), None, None, None, ""
+    # df = _load_db()
+    # now = datetime.datetime.now().isoformat(timespec="seconds")
+    # new_rows = []
+    # for _, r in subset.iterrows():
+    #     relpath = r["relpath"]
+    #     hub_uri = f"hf://{HF_DATASET_REPO}/{relpath}"
+    #     if (df["path"] == hub_uri).any():
+    #         continue
+    #     new_rows.append({
+    #         "id": r["id"] if r["id"] else uuid.uuid4().hex[:8],
+    #         "filename": r["filename"],
+    #         "path": hub_uri,                        # store HF URI
+    #         "tags": r["tags"],
+    #         "keywords": r["keywords"],
+    #         "notes": r["notes"],
+    #         "uploaded_at": r["uploaded_at"] or now,
+    #         "category": r["category"],
+    #         "dataset": r["dataset"]
+    #     })
+    # if new_rows:
+    #     df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
+    #     _save_db(df)
+    # current_model = model_name  # remember which model prefix is active
+    # return refresh_view("", [], "", "", current_model) + (current_model,)
 # ---------- Search / filters ----------
 def refresh_view(query, tag_filters, category_filter, dataset_filter, current_model):
 def _iframe_from_html_string(raw_html: str, height_px: int = 720) -> str:
     srcdoc = raw_html.replace("&", "&amp;").replace('"', "&quot;")
     return f'<iframe style="width:100%;height:{height_px}px;border:1px solid #ddd;border-radius:8px;" srcdoc="{srcdoc}"></iframe>'
+def select_row(evt: gr.SelectData, table_value, source_mode):
     try:
         view = _df_from_table_value(table_value)
         if view.empty:
             return "<em>No rows.</em>", ""
+        # resolve row_idx ... (unchanged)
+        # ...
         row = view.iloc[row_idx]
         sel_id = row["id"]
         if rec.empty:
             return "<em>Could not find file for this row.</em>", ""
+        # pick which column to use
+        use_hf = (str(source_mode).upper() == "HF")
+        path_str = rec["hf_path"].values[0] if use_hf else rec["path"].values[0]
+        path_str = str(path_str or "")
+        if not path_str:
+            return "<em>No path available for this source.</em>", f"📄 {row['filename']}"
+        if path_str.startswith("hf://"):
             _, rest = path_str.split("hf://", 1)
             repo_id, relpath = rest.split("/", 1)
             local_path = hf_hub_download(repo_id=repo_id, repo_type="dataset", filename=relpath)
             raw_html = Path(local_path).read_text(encoding="utf-8")
+            iframe = _iframe_from_html_string(raw_html, height_px=720)
+            return iframe, f"📄 {row['filename']}"
+        if path_str.startswith("http"):
             iframe = f'<iframe style="width:100%;height:720px;border:1px solid #ddd;border-radius:8px;" src="{_py_html.escape(path_str)}"></iframe>'
             return iframe, f"📄 {row['filename']}"
+        # local fallback
+        p = Path(path_str)
+        if not p.exists():
+            return f"<em>File not found:</em> <code>{_py_html.escape(str(p))}</code>", f"📄 {row['filename']}"
+        raw_html = p.read_text(encoding="utf-8")
         iframe = _iframe_from_html_string(raw_html, height_px=720)
         return iframe, f"📄 {row['filename']}"
     except Exception as e:
         traceback.print_exc()
         return f"<pre>Failed to render (see terminal):\n{_py_html.escape(str(e))}</pre>", ""
+# def select_row(evt: gr.SelectData, table_value):
+#     try:
+#         view = _df_from_table_value(table_value)
+#         if view.empty:
+#             return "<em>No rows.</em>", ""
+#         # resolve row
+#         row_idx = None
+#         ix = getattr(evt, "index", None)
+#         if isinstance(ix, int):
+#             row_idx = ix
+#         elif isinstance(ix, (list, tuple)) and ix and isinstance(ix[0], int):
+#             row_idx = ix[0]
+#         if row_idx is None:
+#             val = getattr(evt, "value", None)
+#             if isinstance(val, dict) and "id" in val:
+#                 hits = view.index[view["id"] == val["id"]].tolist()
+#                 if hits: row_idx = hits[0]
+#             elif isinstance(val, list) and len(val) >= 1:
+#                 hits = view.index[view["id"] == val[0]].tolist()
+#                 if hits: row_idx = hits[0]
+#         if row_idx is None or not (0 <= row_idx < len(view)):
+#             return "<em>Invalid selection.</em>", ""
+#         row = view.iloc[row_idx]
+#         sel_id = row["id"]
+#         db = _load_db()
+#         rec = db[db["id"] == sel_id]
+#         if rec.empty:
+#             return "<em>Could not find file for this row.</em>", ""
+#         path_str = rec["path"].values[0]
+#         # Hub-backed path → lazy download
+#         if str(path_str).startswith("hf://"):
+#             _, rest = path_str.split("hf://", 1)
+#             repo_id, relpath = rest.split("/", 1)
+#             local_path = hf_hub_download(repo_id=repo_id, repo_type="dataset", filename=relpath)
+#             raw_html = Path(local_path).read_text(encoding="utf-8")
+#         elif str(path_str).startswith("http"):
+#             # if you ever swap to CDN URLs, iframe the URL directly
+#             iframe = f'<iframe style="width:100%;height:720px;border:1px solid #ddd;border-radius:8px;" src="{_py_html.escape(path_str)}"></iframe>'
+#             return iframe, f"📄 {row['filename']}"
+#         else:
+#             # local file fallback (not used for HF flow, kept for compatibility)
+#             p = Path(path_str)
+#             if not p.exists():
+#                 return f"<em>File not found:</em> <code>{_py_html.escape(str(p))}</code>", f"📄 {row['filename']}"
+#             raw_html = p.read_text(encoding="utf-8")
+#         iframe = _iframe_from_html_string(raw_html, height_px=720)
+#         return iframe, f"📄 {row['filename']}"
+#     except Exception as e:
+#         traceback.print_exc()
+#         return f"<pre>Failed to render (see terminal):\n{_py_html.escape(str(e))}</pre>", ""
 # ---------- Save edits ----------
 def save_edits(edited_table, current_model):
     if edited_table is None or not len(edited_table):
 }
 """
+# with gr.Blocks(title="Audio HTML Library", css=custom_css) as demo:
+#     gr.Markdown("## 🎧 Audio Reconstruction Reports — sync • search • view")
+#     current_model = gr.State("")  # remembers active model prefix inside HF repo
+#     source_mode = gr.State("HF")  # default
+#     with gr.Row():
+#         with gr.Column(scale=1):
+#             # Choose model & sync
+#             gr.Markdown(f"**Model prefix on HF dataset:** `{HF_DATASET_REPO}/<model_name>/...`")
+#             model_in = gr.Textbox(label="Model name", placeholder="e.g., WavCochV8192")
+#             sync_btn = gr.Button("Sync this model", elem_id="sync-btn")
+#             # Search & filters
+#             gr.Markdown("---\n**Search & filter**")
+#             query = gr.Textbox(label="Keyword search (filename/tags/notes/category/dataset)", placeholder="type to search…")
+#             tag_filter = gr.CheckboxGroup(choices=[], label="Filter by tags (AND)")
+#             category_filter = gr.Dropdown(choices=[], label="Category")
+#             dataset_filter  = gr.Dropdown(choices=[], label="Dataset")
+#             refresh_btn = gr.Button("Refresh", elem_id="refresh-btn")
+#         with gr.Column(scale=2):
+#             # Count of current view
+#             count_md = gr.Markdown("**Showing 0 file(s)**")
+#             gr.Markdown("**Library** (click a row to preview; edit cells and Save)")
+#             table = gr.Dataframe(
+#                 headers=TABLE_COLS,
+#                 datatype=["str"] * len(TABLE_COLS),
+#                 interactive=True,
+#                 wrap=True,
+#                 row_count=(0, "dynamic"),
+#                 col_count=(len(TABLE_COLS), "fixed")
+#             )
+#             with gr.Row():
+#                 save_btn = gr.Button("Save Edits", elem_id="save-btn")
+#             preview_label = gr.Markdown("")
+#             preview_html = gr.HTML("")
+#     # wiring: sync (also sets current_model)
+#     sync_btn.click(
+#         sync_model,
+#         [model_in],
+#         [table, tag_filter, category_filter, dataset_filter, count_md, current_model]
+#     )
+#     # wiring: refresh + live filters (respect current_model)
+#     refresh_btn.click(
+#         refresh_view,
+#         [query, tag_filter, category_filter, dataset_filter, current_model],
+#         [table, tag_filter, category_filter, dataset_filter, count_md]
+#     )
+#     for comp in (query, tag_filter, category_filter, dataset_filter):
+#         comp.change(
+#             refresh_view,
+#             [query, tag_filter, category_filter, dataset_filter, current_model],
+#             [table, tag_filter, category_filter, dataset_filter, count_md]
+#         )
+#     table.select(select_row, [table], [preview_html, preview_label])
+#     save_btn.click(save_edits, [table, current_model], [table])
+#     # initial load (no model yet)
+#     demo.load(
+#         refresh_view,
+#         [query, tag_filter, category_filter, dataset_filter, current_model],
+#         [table, tag_filter, category_filter, dataset_filter, count_md]
+#     )
+# if __name__ == "__main__":
+#     demo.launch(share=True)  # auth optional
 with gr.Blocks(title="Audio HTML Library", css=custom_css) as demo:
     gr.Markdown("## 🎧 Audio Reconstruction Reports — sync • search • view")
     current_model = gr.State("")  # remembers active model prefix inside HF repo
+    source_mode = gr.State("HF")  # default
     with gr.Row():
         with gr.Column(scale=1):
             tag_filter = gr.CheckboxGroup(choices=[], label="Filter by tags (AND)")
             category_filter = gr.Dropdown(choices=[], label="Category")
             dataset_filter  = gr.Dropdown(choices=[], label="Dataset")
+            # 🔽 Step 5: Source toggle (HF vs Local)
+            mode_radio = gr.Radio(
+                choices=["HF", "Local"],
+                value="HF",
+                label="Source",
+                info="Preview from HF dataset or local disk"
+            )
             refresh_btn = gr.Button("Refresh", elem_id="refresh-btn")
         with gr.Column(scale=2):
         [table, tag_filter, category_filter, dataset_filter, count_md]
     )
+    # Trigger refresh when any filter OR source mode changes
+    for comp in (query, tag_filter, category_filter, dataset_filter, mode_radio):
         comp.change(
             refresh_view,
             [query, tag_filter, category_filter, dataset_filter, current_model],
             [table, tag_filter, category_filter, dataset_filter, count_md]
         )
+    # Keep source_mode state in sync with the radio
+    mode_radio.change(lambda x: x, [mode_radio], [source_mode])
+    # Pass source_mode into select_row so it can choose hf_path vs path
+    table.select(select_row, [table, source_mode], [preview_html, preview_label])
     save_btn.click(save_edits, [table, current_model], [table])
     # initial load (no model yet)