Spaces:

mwitiderrick
/

medicalbot

Build error

App Files Files Community

mwitiderrick commited on Jun 11

Commit

2304b58

verified ·

1 Parent(s): e2c5c85

Upload 4 files

Browse files

Files changed (4) hide show

app.py +30 -0
rag_dspy.py +74 -0
readme.md +43 -0
requirements.txt +8 -3

app.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import streamlit as st
+from rag_dspy import MedicalRAG
+st.set_page_config(page_title="Medical QA Bot", page_icon="🩺")
+st.title("🩺 Medical QA Bot")
+st.write("Ask a medical question and get an answer based on retrieved medical literature.")
+if "history" not in st.session_state:
+    st.session_state["history"] = []
+rag_chain = MedicalRAG()
+with st.form("chat_form"):
+    user_question = st.text_input("Enter your medical question:", "")
+    submitted = st.form_submit_button("Get Answer")
+if submitted and user_question.strip():
+    with st.spinner("Retrieving answer..."):
+        result = rag_chain.forward(user_question)
+        answer = result.final_answer
+        st.session_state["history"].append((user_question, answer))
+        st.markdown(f"**Answer:** {answer}")
+if st.session_state["history"]:
+    st.markdown("---")
+    st.markdown("### Conversation History")
+    for q, a in reversed(st.session_state["history"]):
+        st.markdown(f"**Q:** {q}")
+        st.markdown(f"**A:** {a}")

rag_dspy.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# rag_dspy.py
+import dspy
+from dspy_qdrant import QdrantRM
+from qdrant_client import QdrantClient, models
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# DSPy setup
+lm = dspy.LM("gpt-4", max_tokens=512,api_key=os.environ.get("OPENAI_API_KEY"))
+client = QdrantClient(url=os.environ.get("QDRANT_CLOUD_URL"), api_key=os.environ.get("QDRANT_API_KEY"))
+collection_name = "medical_chat_bot"
+rm = QdrantRM(
+    qdrant_collection_name=collection_name,
+    qdrant_client=client,
+    vector_name="dense",                 # <-- MATCHES your vector field in upsert
+    document_field="passage_text",        # <-- MATCHES your payload field in upsert
+    k=20)
+dspy.settings.configure(lm=lm, rm=rm)
+# Manual reranker using ColBERT multivector field
+# Manual reranker using Qdrant’s native prefetch + ColBERT query
+def rerank_with_colbert(query_text):
+    from fastembed import TextEmbedding, LateInteractionTextEmbedding
+    # Encode query once with both models
+    dense_model = TextEmbedding("BAAI/bge-small-en")
+    colbert_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")
+    dense_query = list(dense_model.embed(query_text))[0]
+    colbert_query = list(colbert_model.embed(query_text))[0]
+    # Combined query: retrieve with dense, rerank with ColBERT
+    results = client.query_points(
+        collection_name=collection_name,
+        prefetch=models.Prefetch(
+            query=dense_query,
+            using="dense"
+        ),
+        query=colbert_query,
+        using="colbert",
+        limit=5,
+        with_payload=True
+    )
+    points = results.points
+    docs = []
+    for point in points:
+        docs.append(point.payload['passage_text'])
+    return docs
+# DSPy Signature and Module
+class MedicalAnswer(dspy.Signature):
+    question = dspy.InputField(desc="The medical question to answer")
+    context = dspy.OutputField(desc="The answer to the medical question")
+    final_answer = dspy.OutputField(desc="The answer to the medical question")
+class MedicalRAG(dspy.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, question):
+        reranked_docs = rerank_with_colbert(question)
+        context_str = "\n".join(reranked_docs)
+        return dspy.ChainOfThought(MedicalAnswer)(
+            question=question,
+            context=context_str
+        )

readme.md ADDED Viewed

	@@ -0,0 +1,43 @@

+#  Medical QA Chatbot
+This is a Chain-of-Thought powered medical chatbot that:
+- Retrieves answers from a Qdrant Cloud vector DB using dense + ColBERT multivectors
+- Uses Stanford DSPy to reason step-by-step with retrieved context
+- Supports traceable source highlighting in Chainlit
+- Deployable on Hugging Face Spaces via Docker
+---
+##  How to Deploy
+- Add your `OPENAI_API_KEY` as a secret environment variable in Hugging Face Space settings
+- Make sure `qdrant-client` points to your Qdrant Cloud instance in `rag_dspy.py`
+- Run the Space
+## Sample Questions
+### General Medical Knowledge
+- What are the most common symptoms of lupus?
+- How is type 2 diabetes usually managed in adults?
+- What is the difference between viral and bacterial pneumonia?
+### Treatment & Medication
+- What are the first-line medications for treating hypertension?
+- How does metformin work to lower blood sugar?
+### Diagnosis & Tests
+- What diagnostic tests are used to detect rheumatoid arthritis?
+- When is a colonoscopy recommended for cancer screening?
+### Hospital & Patient Care
+- What are the psychosocial challenges faced by cancer patients?
+- How do hospitals manage patients with multidrug-resistant infections?
+### Clinical Guidelines / Rare Topics
+- What is the recommended treatment for acute myocardial infarction in elderly patients?

requirements.txt CHANGED Viewed

@@ -1,3 +1,8 @@
-altair
-pandas
-streamlit

+datasets==3.6.0
+streamlit
+git+https://github.com/stanfordnlp/dspy.git
+python-dotenv==1.1.0
+cachetools
+cloudpickle
+qdrant-client[fastembed]>=1.14.2
+dspy-qdrant