"""
def _oxygen_compare_card(pred, conf, pub):
ok = (pub is not None and pred == pub)
badge = (f'✓ match'
if ok else f'△ mismatch') if pub else ""
pub_block = (
f'
Predicted growth conditions for microbes that have never been cultured. Pick one. Try the medium.
Five thousand candidates from GTDB scored against twenty-four DSMZ media.
""",
unsafe_allow_html=True,
)
tab_catalog, tab_test, tab_about = st.tabs(
["Catalog", "Test on a known genome", "Model accuracy"]
)
# ──────────────────────────────────────────────────────────────────────
# Tab 1 — Catalog
# ──────────────────────────────────────────────────────────────────────
with tab_catalog:
unc_all = load_uncultured().copy()
unc_all["phylum"] = unc_all["gtdb_taxonomy"].map(phylum_from_taxonomy)
unc_all["truly_uncultured"] = (
unc_all["ncbi_organism_name"].fillna("").str.lower().str.startswith("uncultured")
)
n_focused = int(unc_all["truly_uncultured"].sum())
n_total = len(unc_all)
if "mode" not in st.session_state:
st.session_state["mode"] = "focused"
if "filter" not in st.session_state:
st.session_state["filter"] = "all"
# Mode strip — two big pills with chrome shift
mode = st.session_state["mode"]
focused = mode == "focused"
mc1, mc2 = st.columns(2, gap="small")
with mc1:
if st.button(
f"1,294 truly never-cultured" + (" · NCBI name starts with \"uncultured\"" if focused else ""),
key="mode_focused",
type="primary" if focused else "secondary",
use_container_width=True,
):
st.session_state["mode"] = "focused"
st.rerun()
with mc2:
if st.button(
f"5,000 all candidates" + (" · includes 3,706 named-but-absent-from-BacDive" if not focused else ""),
key="mode_broad",
type="primary" if not focused else "secondary",
use_container_width=True,
):
st.session_state["mode"] = "broad"
st.rerun()
# ──────────────── Predict bar ────────────────
st.markdown('
', unsafe_allow_html=True)
st.markdown(
f'
Predict a medium
',
unsafe_allow_html=True,
)
pcol1, pcol2, pcol3 = st.columns([5, 2, 2])
with pcol1:
query = st.text_input(
label="predict query",
label_visibility="collapsed",
placeholder='Organism name, NCBI accession, or paste FASTA…',
key="predict_query",
)
with pcol2:
upload = st.file_uploader(
label="upload",
label_visibility="collapsed",
type=["fna", "fa", "fasta", "gz"],
key="predict_upload",
)
with pcol3:
submit = st.button("🔎 Predict", type="primary", use_container_width=True)
quick = st.columns([1, 1, 1, 6])
with quick[0]:
if st.button("Try: Thermus thermophilus", key="qt_thermus"):
st.session_state["predict_target"] = "Thermus thermophilus"
st.session_state["run_predict"] = True
with quick[1]:
if st.button("Try: E. coli K-12", key="qt_ecoli"):
st.session_state["predict_target"] = "GCF_000005845.2"
st.session_state["run_predict"] = True
with quick[2]:
if st.button("Try: B. subtilis 168", key="qt_bsub"):
st.session_state["predict_target"] = "GCF_000009045.1"
st.session_state["run_predict"] = True
# Run prediction if requested
target = None
if upload is not None and submit:
tmp = ROOT / "data" / "_uploaded" / upload.name
tmp.parent.mkdir(parents=True, exist_ok=True)
tmp.write_bytes(upload.getbuffer())
target = str(tmp)
elif submit and query.strip() and is_accession(query):
target = query.strip()
elif submit and query.strip():
with st.spinner(f"Searching NCBI for '{query.strip()}'…"):
hits = search_ncbi_assembly(query.strip(), retmax=10)
if not hits:
st.warning(f"No NCBI Assembly hits for '{query.strip()}'.")
else:
st.session_state["ncbi_hits"] = hits
elif st.session_state.pop("run_predict", False):
target = st.session_state.pop("predict_target")
if not is_accession(target):
with st.spinner(f"Searching NCBI for '{target}'…"):
hits = search_ncbi_assembly(target, retmax=5)
if hits:
target = hits[0]["accession"]
hits = st.session_state.get("ncbi_hits", [])
if hits and not target:
st.markdown(f'
{len(hits)} NCBI matches
', unsafe_allow_html=True)
labels = [f"{h['accession']} — {h['organism']} · {h['level']}" for h in hits]
choice = st.radio("pick", options=list(range(len(hits))), format_func=lambda i: labels[i],
label_visibility="collapsed", key="ncbi_choice")
if st.button("Run on selected", type="primary"):
target = hits[choice]["accession"]
st.session_state.pop("ncbi_hits", None)
if target:
with st.spinner(f"Predicting for {target}…"):
try:
result = run_inference(target)
except SystemExit as e:
st.error(str(e))
st.stop()
st.session_state["last_result"] = result
result = st.session_state.get("last_result")
if result:
p = result["phenotypes"]
top = result["media"][0] if result["media"] else None
T = p.get("optimal_temperature_c", {})
pH = p.get("optimal_ph", {})
O2 = p.get("oxygen_requirement", {})
salt = p.get("salt_tolerance_pct", {})
st.markdown(
f"""
Prediction · {result['accession']}
Try {top['name'] if top else '—'}{top['medium_id'] if top else ''}
T {T.get('prediction', 0):.0f}°CpH {pH.get('prediction', 0):.1f}O₂ {O2.get('prediction', '—')}salt {salt.get('prediction', 0):.1f}%
""",
unsafe_allow_html=True,
)
with st.expander("Full prediction · phenotype intervals + ranked media", expanded=False):
ic = st.columns(4)
for col, (key, label, unit, scale) in zip(
ic,
[
("optimal_temperature_c", "T_opt", "°C", (0, 110)),
("optimal_ph", "pH", "", (2, 11)),
("oxygen_requirement", "O₂", "", None),
("salt_tolerance_pct", "salt", "%", (0, 25)),
],
strict=True,
):
info = p.get(key) or {}
with col:
if info.get("task") == "regression":
v, lo, hi = info["prediction"], info.get("low_80"), info.get("high_80")
c = (temp_color(v) if "temp" in key else
ph_color(v) if "ph" in key else salt_color(v))
st.markdown(
f"""
{label}
{v:.1f}{unit}
{interval_bar(v, lo or v, hi or v, scale[0], scale[1], c, unit, show_label=True)}
", unsafe_allow_html=True) # close lab-pad
# ──────────────── Quick filters ────────────────
filter_opts = [
("all", "All"),
("thermo", "Thermophiles · >55°C"),
("psychro", "Psychrophiles · <15°C"),
("anaerobe", "Anaerobes"),
("halo", "Halotolerant · >3% NaCl"),
]
fcols = st.columns([1, 1.4, 1.4, 1, 1.4, 4])
for i, (key, label) in enumerate(filter_opts):
with fcols[i]:
if st.button(
label, key=f"filter_{key}",
type="primary" if st.session_state["filter"] == key else "secondary",
use_container_width=True,
):
st.session_state["filter"] = key
st.rerun()
unc = unc_all[unc_all["truly_uncultured"]] if focused else unc_all
f = st.session_state["filter"]
if f == "thermo":
unc = unc[unc["pred_optimal_temperature_c"] > 55]
elif f == "psychro":
unc = unc[unc["pred_optimal_temperature_c"] < 15]
elif f == "anaerobe":
unc = unc[unc["pred_oxygen_requirement"].fillna("").str.contains("anaerobe", case=False)]
elif f == "halo":
unc = unc[unc["pred_salt_tolerance_pct"] > 3]
# Search row
st.markdown('
', unsafe_allow_html=True)
sc1, sc2 = st.columns([4, 1])
with sc1:
search = st.text_input(
label="search",
label_visibility="collapsed",
placeholder="⌕ filter by organism name…",
key="catalog_search",
)
with sc2:
st.markdown(
f'
'
f'showing {len(unc):,} · sorted by confidence
',
unsafe_allow_html=True,
)
st.markdown("
", unsafe_allow_html=True)
if search:
unc = unc[unc["ncbi_organism_name"].fillna("").str.contains(search, case=False, na=False)]
if "top1_confidence" in unc.columns:
unc = unc.sort_values("top1_confidence", ascending=False)
# ──────────────── Top picks (cards) ────────────────
featured = unc.head(6)
rest = unc.iloc[6:]
if len(featured):
st.markdown(
f'
'
f'
Top {len(featured)} picksby media confidence
'
f'
',
unsafe_allow_html=True,
)
cards_html = ['
']
for _, m in featured.iterrows():
T = float(m["pred_optimal_temperature_c"])
ph = float(m["pred_optimal_ph"])
slt = float(m["pred_salt_tolerance_pct"])
o2_lbl = m["pred_oxygen_requirement"] or "—"
o2_conf = float(m.get("pred_oxygen_requirement_confidence") or 0)
top_id = m["top1_medium_id"]
top_name = m["top1_medium_name"]
top_conf = float(m["top1_confidence"])
short = (m["ncbi_organism_name"] or m["genome_accession"])[:80]
cards_html.append(f"""
{m['genome_accession']} · {m['phylum']}
{short}
T_opt{T:.0f}°C
{interval_bar(T, max(0, T - 5), min(110, T + 5), 0, 110, temp_color(T))}
")
st.markdown("\n".join(cards_html), unsafe_allow_html=True)
# ──────────────── Rest as table ────────────────
if len(rest):
st.markdown(
f'
'
f'
Remaining {len(rest):,}
'
f'
',
unsafe_allow_html=True,
)
table_rows = []
for _, m in rest.head(80).iterrows():
T = float(m["pred_optimal_temperature_c"])
ph = float(m["pred_optimal_ph"])
slt = float(m["pred_salt_tolerance_pct"])
o2_lbl = m["pred_oxygen_requirement"] or "—"
o2_conf = float(m.get("pred_oxygen_requirement_confidence") or 0)
short = (m["ncbi_organism_name"] or "")[:60]
table_rows.append(f"""
{m['genome_accession']}
{short}
{m['phylum']}
{m['top1_medium_id']}{m['top1_medium_name'][:38]}
{media_conf_bar(float(m['top1_confidence']))}
{T:.0f}°C
{ph:.1f}
{oxygen_conf_arc(o2_conf, size=20)}{o2_lbl}
{slt:.1f}%
{float(m['checkm_completeness']):.0f}
""")
st.markdown(
f'
'
f'
'
f'
{"".join(f"
{h}
" for h in ["Accession", "Organism", "Phylum", "Try this medium", "Conf.", "T", "pH", "O₂", "Salt", "CheckM"])}
'
f'{"".join(table_rows)}
'
f'
'
f'showing first 80 of {len(rest):,} remaining · use search and filters to narrow'
f'
Sanity-check the model on a microbe with published growth conditions.
'
f'
',
unsafe_allow_html=True,
)
st.markdown('
', unsafe_allow_html=True)
pcols = st.columns(3)
for col, org in zip(pcols, SANITY_ORGANISMS, strict=True):
with col:
k = org["known"]
st.markdown(
f"""
{org['name']}
{org['accession']}
{k['T_opt']:.0f}°CpH {k['pH']:.1f}{k['O2']}
""",
unsafe_allow_html=True,
)
if st.button(f"Predict {org['name'].split()[0]}", key=f"sanity_{org['accession']}", use_container_width=True):
st.session_state["test_target"] = org["accession"]
st.session_state["test_known"] = org["known"]
st.session_state["test_run"] = True
st.markdown('', unsafe_allow_html=True)
with st.form("test_form", clear_on_submit=False):
tcol1, tcol2 = st.columns([5, 2])
with tcol1:
t_query = st.text_input(
label="test query",
label_visibility="collapsed",
placeholder="⌕ organism name or NCBI accession…",
value=st.session_state.get("test_target", ""),
)
with tcol2:
t_upload = st.file_uploader("test upload", type=["fna", "fa", "fasta", "gz"], label_visibility="collapsed")
t_submit = st.form_submit_button("Run", type="primary", use_container_width=True)
auto = st.session_state.pop("test_run", False)
known = st.session_state.pop("test_known", None)
t_target = None
if t_upload is not None:
tmp = ROOT / "data" / "_uploaded" / t_upload.name
tmp.parent.mkdir(parents=True, exist_ok=True)
tmp.write_bytes(t_upload.getbuffer())
t_target = str(tmp)
elif t_submit and t_query.strip() and is_accession(t_query):
t_target = t_query.strip()
elif t_submit and t_query.strip():
with st.spinner(f"Searching NCBI for '{t_query.strip()}'…"):
t_hits = search_ncbi_assembly(t_query.strip(), retmax=5)
if t_hits:
t_target = t_hits[0]["accession"]
else:
st.warning(f"No NCBI hits for '{t_query.strip()}'.")
elif auto:
t_target = st.session_state.get("test_target")
if t_target:
with st.spinner(f"Predicting {t_target}…"):
try:
t_result = run_inference(t_target)
except SystemExit as e:
st.error(str(e))
st.stop()
p = t_result["phenotypes"]
st.markdown(
f''
f'
Predicted vs published — {t_result["accession"]}
',
unsafe_allow_html=True,
)
cards = []
# Temperature
T = p.get("optimal_temperature_c", {})
if T:
v, lo, hi = T["prediction"], T.get("low_80", T["prediction"]), T.get("high_80", T["prediction"])
pub = known["T_opt"] if known else None
ok = pub is not None and lo <= pub <= hi
cards.append(_compare_card_html("Optimum temperature", v, lo, hi, pub, "°C", temp_color(v), 0, 110, ok))
pH = p.get("optimal_ph", {})
if pH:
v, lo, hi = pH["prediction"], pH.get("low_80", pH["prediction"]), pH.get("high_80", pH["prediction"])
pub = known["pH"] if known else None
ok = pub is not None and lo <= pub <= hi
cards.append(_compare_card_html("Optimum pH", v, lo, hi, pub, "", ph_color(v), 2, 11, ok))
slt = p.get("salt_tolerance_pct", {})
if slt:
v, lo, hi = slt["prediction"], slt.get("low_80", slt["prediction"]), slt.get("high_80", slt["prediction"])
pub = known["salt"] if known else None
ok = pub is not None and lo <= pub <= hi
cards.append(_compare_card_html("Salt tolerance", v, lo, hi, pub, "%", salt_color(v), 0, 25, ok))
O2 = p.get("oxygen_requirement", {})
if O2:
cards.append(_oxygen_compare_card(O2.get("prediction", "—"), O2.get("confidence", 0),
known["O2"] if known else None))
st.markdown(
f'
'
f'{"".join(cards)}
',
unsafe_allow_html=True,
)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Top media to try
', unsafe_allow_html=True)
for i, r in enumerate(t_result["media"][:5], 1):
st.markdown(
f"""