Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,29 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
from tempfile import NamedTemporaryFile
|
|
|
|
|
|
|
|
|
|
| 4 |
from langchain.chains import create_retrieval_chain
|
| 5 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 6 |
from langchain_core.prompts import ChatPromptTemplate
|
| 7 |
-
from langchain_openai import ChatOpenAI
|
| 8 |
-
from langchain_community.document_loaders import PyPDFLoader
|
| 9 |
-
from langchain_community.document_loaders import TextLoader
|
| 10 |
from langchain_community.vectorstores import FAISS
|
| 11 |
-
from langchain_openai import OpenAIEmbeddings
|
| 12 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 13 |
-
import re
|
| 14 |
-
import anthropic
|
| 15 |
|
| 16 |
# Function to remove code block markers from the answer
|
| 17 |
def remove_code_blocks(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
|
| 19 |
match = re.match(code_block_pattern, text, re.DOTALL)
|
| 20 |
if match:
|
|
@@ -24,29 +33,48 @@ def remove_code_blocks(text):
|
|
| 24 |
|
| 25 |
# Function to process PDF, run Q&A, and return results
|
| 26 |
def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 28 |
|
|
|
|
| 29 |
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
|
| 30 |
temp_pdf.write(uploaded_file.read())
|
| 31 |
temp_pdf_path = temp_pdf.name
|
| 32 |
|
|
|
|
| 33 |
loader = PyPDFLoader(temp_pdf_path)
|
| 34 |
docs = loader.load()
|
| 35 |
-
|
| 36 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
|
| 37 |
splits = text_splitter.split_documents(docs)
|
| 38 |
|
|
|
|
| 39 |
vectorstore = FAISS.from_documents(
|
| 40 |
-
documents=splits,
|
|
|
|
| 41 |
)
|
| 42 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
|
| 43 |
|
|
|
|
| 44 |
if os.path.exists(prompt_path):
|
| 45 |
with open(prompt_path, "r") as file:
|
| 46 |
system_prompt = file.read()
|
| 47 |
else:
|
| 48 |
raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
|
| 49 |
|
|
|
|
| 50 |
prompt = ChatPromptTemplate.from_messages(
|
| 51 |
[
|
| 52 |
("system", system_prompt),
|
|
@@ -54,38 +82,60 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
|
|
| 54 |
]
|
| 55 |
)
|
| 56 |
|
|
|
|
| 57 |
llm = ChatOpenAI(model="gpt-4o")
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 60 |
|
|
|
|
| 61 |
if os.path.exists(questions_path):
|
| 62 |
with open(questions_path, "r") as file:
|
| 63 |
questions = [line.strip() for line in file.readlines() if line.strip()]
|
| 64 |
else:
|
| 65 |
raise FileNotFoundError(f"The specified file was not found: {questions_path}")
|
| 66 |
|
|
|
|
| 67 |
qa_results = []
|
| 68 |
for question in questions:
|
| 69 |
result = rag_chain.invoke({"input": question})
|
| 70 |
answer = result["answer"]
|
| 71 |
|
|
|
|
| 72 |
answer = remove_code_blocks(answer)
|
| 73 |
|
| 74 |
qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
|
| 75 |
qa_results.append(qa_text)
|
| 76 |
display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
|
| 77 |
|
|
|
|
| 78 |
os.remove(temp_pdf_path)
|
| 79 |
|
| 80 |
return qa_results
|
| 81 |
|
| 82 |
-
#
|
| 83 |
def process_multi_plan_qa(api_key, input_text, display_placeholder):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 85 |
|
| 86 |
# Load the existing vector store
|
| 87 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 88 |
-
vector_store = FAISS.load_local(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# Convert the vector store to a retriever
|
| 91 |
retriever = vector_store.as_retriever(search_kwargs={"k": 50})
|
|
@@ -108,7 +158,9 @@ def process_multi_plan_qa(api_key, input_text, display_placeholder):
|
|
| 108 |
|
| 109 |
# Create the question-answering chain
|
| 110 |
llm = ChatOpenAI(model="gpt-4o")
|
| 111 |
-
question_answer_chain = create_stuff_documents_chain(
|
|
|
|
|
|
|
| 112 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 113 |
|
| 114 |
# Process the input text
|
|
@@ -118,14 +170,27 @@ def process_multi_plan_qa(api_key, input_text, display_placeholder):
|
|
| 118 |
# Display the answer
|
| 119 |
display_placeholder.markdown(f"**Answer:**\n{answer}")
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 123 |
|
| 124 |
# Directory containing individual vector stores
|
| 125 |
vectorstore_directory = "Individual_Summary_Vectorstores"
|
| 126 |
|
| 127 |
# List all vector store directories
|
| 128 |
-
vectorstore_names = [
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Initialize a list to collect all retrieved chunks
|
| 131 |
all_retrieved_chunks = []
|
|
@@ -136,13 +201,17 @@ def multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
|
|
| 136 |
|
| 137 |
# Load the vector store
|
| 138 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 139 |
-
vector_store = FAISS.load_local(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# Convert the vector store to a retriever
|
| 142 |
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
|
| 143 |
|
| 144 |
# Retrieve relevant chunks for the input text
|
| 145 |
-
retrieved_chunks = retriever.invoke(
|
| 146 |
all_retrieved_chunks.extend(retrieved_chunks)
|
| 147 |
|
| 148 |
# Read the system prompt for multi-document QA
|
|
@@ -163,16 +232,30 @@ def multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
|
|
| 163 |
|
| 164 |
# Create the question-answering chain
|
| 165 |
llm = ChatOpenAI(model="gpt-4o")
|
| 166 |
-
question_answer_chain = create_stuff_documents_chain(
|
|
|
|
|
|
|
| 167 |
|
| 168 |
# Process the combined context
|
| 169 |
-
result = question_answer_chain.invoke({
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
# Display the answer
|
| 172 |
-
|
| 173 |
-
|
| 174 |
|
| 175 |
def load_documents_from_pdf(file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
# Check if the file is a PDF
|
| 177 |
if not file.name.endswith('.pdf'):
|
| 178 |
raise ValueError("The uploaded file is not a PDF. Please upload a PDF file.")
|
|
@@ -187,51 +270,84 @@ def load_documents_from_pdf(file):
|
|
| 187 |
return docs
|
| 188 |
|
| 189 |
def load_vector_store_from_path(path):
|
| 190 |
-
|
| 191 |
-
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
# Function to compare
|
| 195 |
def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 197 |
print(comparison_inputs)
|
| 198 |
# Load focus documents or vector store
|
| 199 |
if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
|
|
|
|
| 200 |
focus_docs = load_documents_from_pdf(focus_input)
|
| 201 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
|
| 202 |
focus_splits = text_splitter.split_documents(focus_docs)
|
| 203 |
-
focus_vector_store = FAISS.from_documents(
|
|
|
|
|
|
|
|
|
|
| 204 |
focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 205 |
elif isinstance(focus_input, str) and os.path.isdir(focus_input):
|
|
|
|
| 206 |
focus_vector_store = load_vector_store_from_path(focus_input)
|
| 207 |
focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 208 |
else:
|
| 209 |
raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")
|
| 210 |
|
|
|
|
| 211 |
focus_docs = focus_retriever.invoke(input_text)
|
| 212 |
|
|
|
|
| 213 |
comparison_chunks = []
|
| 214 |
for comparison_input in comparison_inputs:
|
| 215 |
if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
|
|
|
|
| 216 |
comparison_docs = load_documents_from_pdf(comparison_input)
|
| 217 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
|
| 218 |
comparison_splits = text_splitter.split_documents(comparison_docs)
|
| 219 |
-
comparison_vector_store = FAISS.from_documents(
|
|
|
|
|
|
|
|
|
|
| 220 |
comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 221 |
elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
|
|
|
|
| 222 |
comparison_vector_store = load_vector_store_from_path(comparison_input)
|
| 223 |
comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 224 |
else:
|
| 225 |
raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")
|
| 226 |
|
|
|
|
| 227 |
comparison_docs = comparison_retriever.invoke(input_text)
|
| 228 |
comparison_chunks.extend(comparison_docs)
|
| 229 |
|
| 230 |
# Construct the combined context
|
| 231 |
-
combined_context =
|
| 232 |
-
focus_docs +
|
| 233 |
-
comparison_chunks
|
| 234 |
-
)
|
| 235 |
|
| 236 |
# Read the system prompt
|
| 237 |
prompt_path = "Prompts/comparison_prompt.md"
|
|
@@ -252,7 +368,7 @@ def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_tex
|
|
| 252 |
# Create the question-answering chain
|
| 253 |
llm = ChatOpenAI(model="gpt-4o")
|
| 254 |
question_answer_chain = create_stuff_documents_chain(
|
| 255 |
-
llm,
|
| 256 |
prompt,
|
| 257 |
document_variable_name="context"
|
| 258 |
)
|
|
@@ -264,35 +380,67 @@ def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_tex
|
|
| 264 |
})
|
| 265 |
|
| 266 |
# Display the answer
|
| 267 |
-
|
|
|
|
| 268 |
|
| 269 |
# Function to list vector store documents
|
| 270 |
def list_vector_store_documents():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
# Assuming documents are stored in the "Individual_All_Vectorstores" directory
|
| 272 |
directory_path = "Individual_All_Vectorstores"
|
| 273 |
if not os.path.exists(directory_path):
|
| 274 |
-
raise FileNotFoundError(
|
|
|
|
|
|
|
|
|
|
| 275 |
# List all available vector stores by document name
|
| 276 |
-
documents = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
return documents
|
| 278 |
|
|
|
|
| 279 |
def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 281 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
|
| 282 |
-
# Load the focus plan
|
| 283 |
|
| 284 |
-
# Load focus documents
|
| 285 |
if isinstance(focus_plan_path, st.runtime.uploaded_file_manager.UploadedFile):
|
|
|
|
| 286 |
focus_docs = load_documents_from_pdf(focus_plan_path)
|
| 287 |
elif isinstance(focus_plan_path, str):
|
|
|
|
| 288 |
focus_loader = PyPDFLoader(focus_plan_path)
|
| 289 |
focus_docs = focus_loader.load()
|
|
|
|
|
|
|
| 290 |
|
| 291 |
# Concatenate selected summary documents
|
| 292 |
summaries_directory = "CAPS_Summaries"
|
| 293 |
summaries_content = ""
|
| 294 |
for filename in selected_summaries:
|
| 295 |
-
|
|
|
|
|
|
|
| 296 |
summaries_content += file.read() + "\n\n"
|
| 297 |
|
| 298 |
# Prepare the context
|
|
@@ -300,17 +448,15 @@ def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan
|
|
| 300 |
|
| 301 |
# Create the client and message
|
| 302 |
client = anthropic.Anthropic(api_key=anthropic_api_key)
|
| 303 |
-
|
| 304 |
-
model="claude-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
{"role": "user", "content": f"{input_text}\n\nFocus Document:\n{focus_context}\n\nSummaries:\n{summaries_content}"}
|
| 308 |
-
]
|
| 309 |
)
|
| 310 |
|
| 311 |
# Display the answer
|
| 312 |
-
|
| 313 |
-
|
| 314 |
|
| 315 |
# Streamlit app layout with tabs
|
| 316 |
st.title("Climate Policy Analysis Tool")
|
|
@@ -319,11 +465,21 @@ st.title("Climate Policy Analysis Tool")
|
|
| 319 |
api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")
|
| 320 |
|
| 321 |
# Create tabs
|
| 322 |
-
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
# First tab: Summary Generation
|
| 325 |
with tab1:
|
| 326 |
-
uploaded_file = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
prompt_file_path = "Prompts/summary_tool_system_prompt.md"
|
| 329 |
questions_file_path = "Prompts/summary_tool_questions.md"
|
|
@@ -337,14 +493,19 @@ with tab1:
|
|
| 337 |
display_placeholder = st.empty()
|
| 338 |
with st.spinner("Processing..."):
|
| 339 |
try:
|
| 340 |
-
results = process_pdf(
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
markdown_text = "\n".join(results)
|
| 343 |
-
|
| 344 |
# Use the uploaded file's name for the download file
|
| 345 |
base_name = os.path.splitext(uploaded_file.name)[0]
|
| 346 |
download_file_name = f"{base_name}_Summary.md"
|
| 347 |
-
|
| 348 |
st.download_button(
|
| 349 |
label="Download Results as Markdown",
|
| 350 |
data=markdown_text,
|
|
@@ -355,7 +516,7 @@ with tab1:
|
|
| 355 |
except Exception as e:
|
| 356 |
st.error(f"An error occurred: {e}")
|
| 357 |
|
| 358 |
-
# Second tab: Multi-Plan QA
|
| 359 |
with tab2:
|
| 360 |
input_text = st.text_input("Ask a question:", key="multi_plan_input")
|
| 361 |
if st.button("Ask", key="multi_plan_qa_button"):
|
|
@@ -375,7 +536,7 @@ with tab2:
|
|
| 375 |
except Exception as e:
|
| 376 |
st.error(f"An error occurred: {e}")
|
| 377 |
|
| 378 |
-
|
| 379 |
with tab3:
|
| 380 |
user_input = st.text_input("Ask a question:", key="multi_vectorstore_input")
|
| 381 |
if st.button("Ask", key="multi_vectorstore_qa_button"):
|
|
@@ -387,7 +548,7 @@ with tab3:
|
|
| 387 |
display_placeholder3 = st.empty()
|
| 388 |
with st.spinner("Processing..."):
|
| 389 |
try:
|
| 390 |
-
|
| 391 |
api_key,
|
| 392 |
user_input,
|
| 393 |
display_placeholder3
|
|
@@ -403,10 +564,18 @@ with tab4:
|
|
| 403 |
vectorstore_documents = list_vector_store_documents()
|
| 404 |
|
| 405 |
# Option to upload a new plan or select from existing vector stores
|
| 406 |
-
focus_option = st.radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
if focus_option == "Upload a new plan":
|
| 409 |
-
focus_uploaded_file = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
if focus_uploaded_file is not None:
|
| 411 |
# Directly use the uploaded file
|
| 412 |
focus_input = focus_uploaded_file
|
|
@@ -414,21 +583,49 @@ with tab4:
|
|
| 414 |
focus_input = None
|
| 415 |
else:
|
| 416 |
# Select a focus plan from existing vector stores
|
| 417 |
-
selected_focus_plan = st.selectbox(
|
| 418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
|
| 420 |
# Option to upload comparison documents or select from existing vector stores
|
| 421 |
-
comparison_option = st.radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
if comparison_option == "Upload new documents":
|
| 424 |
-
comparison_files = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
comparison_inputs = comparison_files
|
| 426 |
else:
|
| 427 |
# Select comparison documents from existing vector stores
|
| 428 |
-
selected_comparison_plans = st.multiselect(
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
-
input_text = st.text_input(
|
|
|
|
|
|
|
|
|
|
| 432 |
|
| 433 |
if st.button("Compare", key="compare_button"):
|
| 434 |
if not api_key:
|
|
@@ -444,8 +641,13 @@ with tab4:
|
|
| 444 |
with st.spinner("Processing..."):
|
| 445 |
try:
|
| 446 |
# Call the process_one_to_many_query function
|
| 447 |
-
process_one_to_many_query(
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
except Exception as e:
|
| 450 |
st.error(f"An error occurred: {e}")
|
| 451 |
|
|
@@ -454,30 +656,56 @@ with tab5:
|
|
| 454 |
st.header("Plan Comparison with Long Context Model")
|
| 455 |
|
| 456 |
# Anthropics API Key Input
|
| 457 |
-
anthropic_api_key = st.text_input(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
# Option to upload a new plan or select from a list
|
| 460 |
-
focus_option = st.radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
|
| 462 |
if focus_option == "Upload a new plan":
|
| 463 |
-
focus_uploaded_file = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
if focus_uploaded_file is not None:
|
| 465 |
# Directly use the uploaded file
|
| 466 |
focus_plan_path = focus_uploaded_file
|
| 467 |
else:
|
| 468 |
focus_plan_path = None
|
| 469 |
else:
|
| 470 |
-
#
|
| 471 |
plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
|
| 472 |
-
selected_focus_plan = st.selectbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
focus_plan_path = os.path.join("CAPS", f"{selected_focus_plan}.pdf")
|
| 474 |
|
| 475 |
# List available summary documents for selection
|
| 476 |
summaries_directory = "CAPS_Summaries"
|
| 477 |
-
summary_files = [
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
-
input_text = st.text_input(
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
if st.button("Compare with Long Context", key="compare_button_long_context"):
|
| 483 |
if not api_key:
|
|
@@ -492,6 +720,13 @@ with tab5:
|
|
| 492 |
display_placeholder = st.empty()
|
| 493 |
with st.spinner("Processing..."):
|
| 494 |
try:
|
| 495 |
-
compare_with_long_context(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
except Exception as e:
|
| 497 |
-
st.error(f"An error occurred: {e}")
|
|
|
|
| 1 |
import os
|
| 2 |
+
import re
|
| 3 |
import streamlit as st
|
| 4 |
from tempfile import NamedTemporaryFile
|
| 5 |
+
import anthropic
|
| 6 |
+
|
| 7 |
+
# Import necessary modules from LangChain
|
| 8 |
from langchain.chains import create_retrieval_chain
|
| 9 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 10 |
from langchain_core.prompts import ChatPromptTemplate
|
| 11 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 12 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader
|
|
|
|
| 13 |
from langchain_community.vectorstores import FAISS
|
|
|
|
| 14 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Function to remove code block markers from the answer
|
| 17 |
def remove_code_blocks(text):
|
| 18 |
+
"""
|
| 19 |
+
Removes code block markers from the answer text.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
text (str): The text from which code block markers should be removed.
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
str: The text without code block markers.
|
| 26 |
+
"""
|
| 27 |
code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
|
| 28 |
match = re.match(code_block_pattern, text, re.DOTALL)
|
| 29 |
if match:
|
|
|
|
| 33 |
|
| 34 |
# Function to process PDF, run Q&A, and return results
|
| 35 |
def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
|
| 36 |
+
"""
|
| 37 |
+
Processes a PDF file, runs Q&A, and returns the results.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
api_key (str): OpenAI API key.
|
| 41 |
+
uploaded_file: Uploaded PDF file.
|
| 42 |
+
questions_path (str): Path to the questions file.
|
| 43 |
+
prompt_path (str): Path to the system prompt file.
|
| 44 |
+
display_placeholder: Streamlit placeholder for displaying results.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
list: List of QA results.
|
| 48 |
+
"""
|
| 49 |
+
# Set the OpenAI API key
|
| 50 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 51 |
|
| 52 |
+
# Save the uploaded PDF to a temporary file
|
| 53 |
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
|
| 54 |
temp_pdf.write(uploaded_file.read())
|
| 55 |
temp_pdf_path = temp_pdf.name
|
| 56 |
|
| 57 |
+
# Load and split the PDF into documents
|
| 58 |
loader = PyPDFLoader(temp_pdf_path)
|
| 59 |
docs = loader.load()
|
|
|
|
| 60 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
|
| 61 |
splits = text_splitter.split_documents(docs)
|
| 62 |
|
| 63 |
+
# Create a vector store from the documents
|
| 64 |
vectorstore = FAISS.from_documents(
|
| 65 |
+
documents=splits,
|
| 66 |
+
embedding=OpenAIEmbeddings(model="text-embedding-3-large")
|
| 67 |
)
|
| 68 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
|
| 69 |
|
| 70 |
+
# Load the system prompt
|
| 71 |
if os.path.exists(prompt_path):
|
| 72 |
with open(prompt_path, "r") as file:
|
| 73 |
system_prompt = file.read()
|
| 74 |
else:
|
| 75 |
raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
|
| 76 |
|
| 77 |
+
# Create the prompt template
|
| 78 |
prompt = ChatPromptTemplate.from_messages(
|
| 79 |
[
|
| 80 |
("system", system_prompt),
|
|
|
|
| 82 |
]
|
| 83 |
)
|
| 84 |
|
| 85 |
+
# Initialize the language model
|
| 86 |
llm = ChatOpenAI(model="gpt-4o")
|
| 87 |
+
|
| 88 |
+
# Create the question-answering chain
|
| 89 |
+
question_answer_chain = create_stuff_documents_chain(
|
| 90 |
+
llm, prompt, document_variable_name="context"
|
| 91 |
+
)
|
| 92 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 93 |
|
| 94 |
+
# Load the questions
|
| 95 |
if os.path.exists(questions_path):
|
| 96 |
with open(questions_path, "r") as file:
|
| 97 |
questions = [line.strip() for line in file.readlines() if line.strip()]
|
| 98 |
else:
|
| 99 |
raise FileNotFoundError(f"The specified file was not found: {questions_path}")
|
| 100 |
|
| 101 |
+
# Process each question
|
| 102 |
qa_results = []
|
| 103 |
for question in questions:
|
| 104 |
result = rag_chain.invoke({"input": question})
|
| 105 |
answer = result["answer"]
|
| 106 |
|
| 107 |
+
# Remove code block markers
|
| 108 |
answer = remove_code_blocks(answer)
|
| 109 |
|
| 110 |
qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
|
| 111 |
qa_results.append(qa_text)
|
| 112 |
display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
|
| 113 |
|
| 114 |
+
# Clean up temporary PDF file
|
| 115 |
os.remove(temp_pdf_path)
|
| 116 |
|
| 117 |
return qa_results
|
| 118 |
|
| 119 |
+
# Function to perform multi-plan QA using an existing vector store
|
| 120 |
def process_multi_plan_qa(api_key, input_text, display_placeholder):
|
| 121 |
+
"""
|
| 122 |
+
Performs multi-plan QA using an existing shared vector store.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
api_key (str): OpenAI API key.
|
| 126 |
+
input_text (str): The question to ask.
|
| 127 |
+
display_placeholder: Streamlit placeholder for displaying results.
|
| 128 |
+
"""
|
| 129 |
+
# Set the OpenAI API key
|
| 130 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 131 |
|
| 132 |
# Load the existing vector store
|
| 133 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 134 |
+
vector_store = FAISS.load_local(
|
| 135 |
+
"Combined_Summary_Vectorstore",
|
| 136 |
+
embeddings,
|
| 137 |
+
allow_dangerous_deserialization=True
|
| 138 |
+
)
|
| 139 |
|
| 140 |
# Convert the vector store to a retriever
|
| 141 |
retriever = vector_store.as_retriever(search_kwargs={"k": 50})
|
|
|
|
| 158 |
|
| 159 |
# Create the question-answering chain
|
| 160 |
llm = ChatOpenAI(model="gpt-4o")
|
| 161 |
+
question_answer_chain = create_stuff_documents_chain(
|
| 162 |
+
llm, prompt, document_variable_name="context"
|
| 163 |
+
)
|
| 164 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 165 |
|
| 166 |
# Process the input text
|
|
|
|
| 170 |
# Display the answer
|
| 171 |
display_placeholder.markdown(f"**Answer:**\n{answer}")
|
| 172 |
|
| 173 |
+
# Function to perform multi-plan QA using multiple individual vector stores
|
| 174 |
+
def process_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
|
| 175 |
+
"""
|
| 176 |
+
Performs multi-plan QA using multiple individual vector stores.
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
api_key (str): OpenAI API key.
|
| 180 |
+
input_text (str): The question to ask.
|
| 181 |
+
display_placeholder: Streamlit placeholder for displaying results.
|
| 182 |
+
"""
|
| 183 |
+
# Set the OpenAI API key
|
| 184 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 185 |
|
| 186 |
# Directory containing individual vector stores
|
| 187 |
vectorstore_directory = "Individual_Summary_Vectorstores"
|
| 188 |
|
| 189 |
# List all vector store directories
|
| 190 |
+
vectorstore_names = [
|
| 191 |
+
d for d in os.listdir(vectorstore_directory)
|
| 192 |
+
if os.path.isdir(os.path.join(vectorstore_directory, d))
|
| 193 |
+
]
|
| 194 |
|
| 195 |
# Initialize a list to collect all retrieved chunks
|
| 196 |
all_retrieved_chunks = []
|
|
|
|
| 201 |
|
| 202 |
# Load the vector store
|
| 203 |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 204 |
+
vector_store = FAISS.load_local(
|
| 205 |
+
vectorstore_path,
|
| 206 |
+
embeddings,
|
| 207 |
+
allow_dangerous_deserialization=True
|
| 208 |
+
)
|
| 209 |
|
| 210 |
# Convert the vector store to a retriever
|
| 211 |
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
|
| 212 |
|
| 213 |
# Retrieve relevant chunks for the input text
|
| 214 |
+
retrieved_chunks = retriever.invoke(input_text)
|
| 215 |
all_retrieved_chunks.extend(retrieved_chunks)
|
| 216 |
|
| 217 |
# Read the system prompt for multi-document QA
|
|
|
|
| 232 |
|
| 233 |
# Create the question-answering chain
|
| 234 |
llm = ChatOpenAI(model="gpt-4o")
|
| 235 |
+
question_answer_chain = create_stuff_documents_chain(
|
| 236 |
+
llm, prompt, document_variable_name="context"
|
| 237 |
+
)
|
| 238 |
|
| 239 |
# Process the combined context
|
| 240 |
+
result = question_answer_chain.invoke({
|
| 241 |
+
"input": input_text,
|
| 242 |
+
"context": all_retrieved_chunks
|
| 243 |
+
})
|
| 244 |
|
| 245 |
# Display the answer
|
| 246 |
+
answer = result["answer"] if "answer" in result else result
|
| 247 |
+
display_placeholder.markdown(f"**Answer:**\n{answer}")
|
| 248 |
|
| 249 |
def load_documents_from_pdf(file):
|
| 250 |
+
"""
|
| 251 |
+
Loads documents from a PDF file.
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
file: Uploaded PDF file.
|
| 255 |
+
|
| 256 |
+
Returns:
|
| 257 |
+
list: List of documents.
|
| 258 |
+
"""
|
| 259 |
# Check if the file is a PDF
|
| 260 |
if not file.name.endswith('.pdf'):
|
| 261 |
raise ValueError("The uploaded file is not a PDF. Please upload a PDF file.")
|
|
|
|
| 270 |
return docs
|
| 271 |
|
| 272 |
def load_vector_store_from_path(path):
|
| 273 |
+
"""
|
| 274 |
+
Loads a vector store from a given path.
|
| 275 |
|
| 276 |
+
Args:
|
| 277 |
+
path (str): Path to the vector store.
|
| 278 |
+
|
| 279 |
+
Returns:
|
| 280 |
+
FAISS: Loaded vector store.
|
| 281 |
+
"""
|
| 282 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
|
| 283 |
+
return FAISS.load_local(
|
| 284 |
+
path,
|
| 285 |
+
embeddings,
|
| 286 |
+
allow_dangerous_deserialization=True
|
| 287 |
+
)
|
| 288 |
|
| 289 |
+
# Function to compare documents via one-to-many query approach
|
| 290 |
def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
|
| 291 |
+
"""
|
| 292 |
+
Compares a focus document against multiple comparison documents using a one-to-many query approach.
|
| 293 |
+
|
| 294 |
+
Args:
|
| 295 |
+
api_key (str): OpenAI API key.
|
| 296 |
+
focus_input: Focus document (uploaded file or path to vector store).
|
| 297 |
+
comparison_inputs: List of comparison documents (uploaded files or paths to vector stores).
|
| 298 |
+
input_text (str): The comparison question to ask.
|
| 299 |
+
display_placeholder: Streamlit placeholder for displaying results.
|
| 300 |
+
"""
|
| 301 |
+
# Set the OpenAI API key
|
| 302 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 303 |
print(comparison_inputs)
|
| 304 |
# Load focus documents or vector store
|
| 305 |
if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
|
| 306 |
+
# If focus_input is an uploaded PDF file
|
| 307 |
focus_docs = load_documents_from_pdf(focus_input)
|
| 308 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
|
| 309 |
focus_splits = text_splitter.split_documents(focus_docs)
|
| 310 |
+
focus_vector_store = FAISS.from_documents(
|
| 311 |
+
focus_splits,
|
| 312 |
+
OpenAIEmbeddings(model="text-embedding-3-large")
|
| 313 |
+
)
|
| 314 |
focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 315 |
elif isinstance(focus_input, str) and os.path.isdir(focus_input):
|
| 316 |
+
# If focus_input is a path to a vector store
|
| 317 |
focus_vector_store = load_vector_store_from_path(focus_input)
|
| 318 |
focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 319 |
else:
|
| 320 |
raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")
|
| 321 |
|
| 322 |
+
# Retrieve relevant chunks from the focus document
|
| 323 |
focus_docs = focus_retriever.invoke(input_text)
|
| 324 |
|
| 325 |
+
# Initialize list to collect comparison chunks
|
| 326 |
comparison_chunks = []
|
| 327 |
for comparison_input in comparison_inputs:
|
| 328 |
if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
|
| 329 |
+
# If comparison_input is an uploaded PDF file
|
| 330 |
comparison_docs = load_documents_from_pdf(comparison_input)
|
| 331 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
|
| 332 |
comparison_splits = text_splitter.split_documents(comparison_docs)
|
| 333 |
+
comparison_vector_store = FAISS.from_documents(
|
| 334 |
+
comparison_splits,
|
| 335 |
+
OpenAIEmbeddings(model="text-embedding-3-large")
|
| 336 |
+
)
|
| 337 |
comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 338 |
elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
|
| 339 |
+
# If comparison_input is a path to a vector store
|
| 340 |
comparison_vector_store = load_vector_store_from_path(comparison_input)
|
| 341 |
comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
|
| 342 |
else:
|
| 343 |
raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")
|
| 344 |
|
| 345 |
+
# Retrieve relevant chunks from the comparison document
|
| 346 |
comparison_docs = comparison_retriever.invoke(input_text)
|
| 347 |
comparison_chunks.extend(comparison_docs)
|
| 348 |
|
| 349 |
# Construct the combined context
|
| 350 |
+
combined_context = focus_docs + comparison_chunks
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
# Read the system prompt
|
| 353 |
prompt_path = "Prompts/comparison_prompt.md"
|
|
|
|
| 368 |
# Create the question-answering chain
|
| 369 |
llm = ChatOpenAI(model="gpt-4o")
|
| 370 |
question_answer_chain = create_stuff_documents_chain(
|
| 371 |
+
llm,
|
| 372 |
prompt,
|
| 373 |
document_variable_name="context"
|
| 374 |
)
|
|
|
|
| 380 |
})
|
| 381 |
|
| 382 |
# Display the answer
|
| 383 |
+
answer = result["answer"] if "answer" in result else result
|
| 384 |
+
display_placeholder.markdown(f"**Answer:**\n{answer}")
|
| 385 |
|
| 386 |
# Function to list vector store documents
|
| 387 |
def list_vector_store_documents():
|
| 388 |
+
"""
|
| 389 |
+
Lists available vector store documents.
|
| 390 |
+
|
| 391 |
+
Returns:
|
| 392 |
+
list: List of document names.
|
| 393 |
+
"""
|
| 394 |
# Assuming documents are stored in the "Individual_All_Vectorstores" directory
|
| 395 |
directory_path = "Individual_All_Vectorstores"
|
| 396 |
if not os.path.exists(directory_path):
|
| 397 |
+
raise FileNotFoundError(
|
| 398 |
+
f"The directory '{directory_path}' does not exist. "
|
| 399 |
+
"Run `create_and_save_individual_vector_stores()` to create it."
|
| 400 |
+
)
|
| 401 |
# List all available vector stores by document name
|
| 402 |
+
documents = [
|
| 403 |
+
f.replace("_vectorstore", "").replace("_", " ")
|
| 404 |
+
for f in os.listdir(directory_path)
|
| 405 |
+
if f.endswith("_vectorstore")
|
| 406 |
+
]
|
| 407 |
return documents
|
| 408 |
|
| 409 |
+
# Function to compare plans using a long context model
|
| 410 |
def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder):
|
| 411 |
+
"""
|
| 412 |
+
Compares plans using a long context model.
|
| 413 |
+
|
| 414 |
+
Args:
|
| 415 |
+
api_key (str): OpenAI API key.
|
| 416 |
+
anthropic_api_key (str): Anthropic API key.
|
| 417 |
+
input_text (str): The comparison question to ask.
|
| 418 |
+
focus_plan_path: Path to the focus plan or uploaded file.
|
| 419 |
+
selected_summaries (list): List of selected summary documents.
|
| 420 |
+
display_placeholder: Streamlit placeholder for displaying results.
|
| 421 |
+
"""
|
| 422 |
+
# Set the API keys
|
| 423 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 424 |
os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
|
|
|
|
| 425 |
|
| 426 |
+
# Load focus documents
|
| 427 |
if isinstance(focus_plan_path, st.runtime.uploaded_file_manager.UploadedFile):
|
| 428 |
+
# If focus_plan_path is an uploaded file
|
| 429 |
focus_docs = load_documents_from_pdf(focus_plan_path)
|
| 430 |
elif isinstance(focus_plan_path, str):
|
| 431 |
+
# If focus_plan_path is a file path
|
| 432 |
focus_loader = PyPDFLoader(focus_plan_path)
|
| 433 |
focus_docs = focus_loader.load()
|
| 434 |
+
else:
|
| 435 |
+
raise ValueError("Invalid focus plan input type. Must be an uploaded file or a file path.")
|
| 436 |
|
| 437 |
# Concatenate selected summary documents
|
| 438 |
summaries_directory = "CAPS_Summaries"
|
| 439 |
summaries_content = ""
|
| 440 |
for filename in selected_summaries:
|
| 441 |
+
# Fix the filename by replacing ' Summary' with '_Summary'
|
| 442 |
+
summary_filename = f"{filename.replace(' Summary', '_Summary')}.md"
|
| 443 |
+
with open(os.path.join(summaries_directory, summary_filename), 'r') as file:
|
| 444 |
summaries_content += file.read() + "\n\n"
|
| 445 |
|
| 446 |
# Prepare the context
|
|
|
|
| 448 |
|
| 449 |
# Create the client and message
|
| 450 |
client = anthropic.Anthropic(api_key=anthropic_api_key)
|
| 451 |
+
response = client.completions.create(
|
| 452 |
+
model="claude-2",
|
| 453 |
+
max_tokens_to_sample=1024,
|
| 454 |
+
prompt=f"{input_text}\n\nFocus Document:\n{focus_context}\n\nSummaries:\n{summaries_content}"
|
|
|
|
|
|
|
| 455 |
)
|
| 456 |
|
| 457 |
# Display the answer
|
| 458 |
+
answer = response.completion
|
| 459 |
+
display_placeholder.markdown(f"**Answer:**\n{answer}", unsafe_allow_html=True)
|
| 460 |
|
| 461 |
# Streamlit app layout with tabs
|
| 462 |
st.title("Climate Policy Analysis Tool")
|
|
|
|
| 465 |
api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")
|
| 466 |
|
| 467 |
# Create tabs
|
| 468 |
+
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
| 469 |
+
"Summary Generation",
|
| 470 |
+
"Multi-Plan QA (Shared Vectorstore)",
|
| 471 |
+
"Multi-Plan QA (Multi-Vectorstore)",
|
| 472 |
+
"Plan Comparison Tool",
|
| 473 |
+
"Plan Comparison with Long Context Model"
|
| 474 |
+
])
|
| 475 |
|
| 476 |
# First tab: Summary Generation
|
| 477 |
with tab1:
|
| 478 |
+
uploaded_file = st.file_uploader(
|
| 479 |
+
"Upload a Climate Action Plan in PDF format",
|
| 480 |
+
type="pdf",
|
| 481 |
+
key="upload_file"
|
| 482 |
+
)
|
| 483 |
|
| 484 |
prompt_file_path = "Prompts/summary_tool_system_prompt.md"
|
| 485 |
questions_file_path = "Prompts/summary_tool_questions.md"
|
|
|
|
| 493 |
display_placeholder = st.empty()
|
| 494 |
with st.spinner("Processing..."):
|
| 495 |
try:
|
| 496 |
+
results = process_pdf(
|
| 497 |
+
api_key,
|
| 498 |
+
uploaded_file,
|
| 499 |
+
questions_file_path,
|
| 500 |
+
prompt_file_path,
|
| 501 |
+
display_placeholder
|
| 502 |
+
)
|
| 503 |
markdown_text = "\n".join(results)
|
| 504 |
+
|
| 505 |
# Use the uploaded file's name for the download file
|
| 506 |
base_name = os.path.splitext(uploaded_file.name)[0]
|
| 507 |
download_file_name = f"{base_name}_Summary.md"
|
| 508 |
+
|
| 509 |
st.download_button(
|
| 510 |
label="Download Results as Markdown",
|
| 511 |
data=markdown_text,
|
|
|
|
| 516 |
except Exception as e:
|
| 517 |
st.error(f"An error occurred: {e}")
|
| 518 |
|
| 519 |
+
# Second tab: Multi-Plan QA (Shared Vectorstore)
|
| 520 |
with tab2:
|
| 521 |
input_text = st.text_input("Ask a question:", key="multi_plan_input")
|
| 522 |
if st.button("Ask", key="multi_plan_qa_button"):
|
|
|
|
| 536 |
except Exception as e:
|
| 537 |
st.error(f"An error occurred: {e}")
|
| 538 |
|
| 539 |
+
# Third tab: Multi-Plan QA (Multi-Vectorstore)
|
| 540 |
with tab3:
|
| 541 |
user_input = st.text_input("Ask a question:", key="multi_vectorstore_input")
|
| 542 |
if st.button("Ask", key="multi_vectorstore_qa_button"):
|
|
|
|
| 548 |
display_placeholder3 = st.empty()
|
| 549 |
with st.spinner("Processing..."):
|
| 550 |
try:
|
| 551 |
+
process_multi_plan_qa_multi_vectorstore(
|
| 552 |
api_key,
|
| 553 |
user_input,
|
| 554 |
display_placeholder3
|
|
|
|
| 564 |
vectorstore_documents = list_vector_store_documents()
|
| 565 |
|
| 566 |
# Option to upload a new plan or select from existing vector stores
|
| 567 |
+
focus_option = st.radio(
|
| 568 |
+
"Choose a focus plan:",
|
| 569 |
+
("Select from existing vector stores", "Upload a new plan"),
|
| 570 |
+
key="focus_option"
|
| 571 |
+
)
|
| 572 |
|
| 573 |
if focus_option == "Upload a new plan":
|
| 574 |
+
focus_uploaded_file = st.file_uploader(
|
| 575 |
+
"Upload a Climate Action Plan to compare",
|
| 576 |
+
type="pdf",
|
| 577 |
+
key="focus_upload"
|
| 578 |
+
)
|
| 579 |
if focus_uploaded_file is not None:
|
| 580 |
# Directly use the uploaded file
|
| 581 |
focus_input = focus_uploaded_file
|
|
|
|
| 583 |
focus_input = None
|
| 584 |
else:
|
| 585 |
# Select a focus plan from existing vector stores
|
| 586 |
+
selected_focus_plan = st.selectbox(
|
| 587 |
+
"Select a focus plan:",
|
| 588 |
+
vectorstore_documents,
|
| 589 |
+
key="select_focus_plan"
|
| 590 |
+
)
|
| 591 |
+
focus_input = os.path.join(
|
| 592 |
+
"Individual_All_Vectorstores",
|
| 593 |
+
f"{selected_focus_plan.replace(' Summary', '_Summary')}_vectorstore"
|
| 594 |
+
)
|
| 595 |
|
| 596 |
# Option to upload comparison documents or select from existing vector stores
|
| 597 |
+
comparison_option = st.radio(
|
| 598 |
+
"Choose comparison documents:",
|
| 599 |
+
("Select from existing vector stores", "Upload new documents"),
|
| 600 |
+
key="comparison_option"
|
| 601 |
+
)
|
| 602 |
|
| 603 |
if comparison_option == "Upload new documents":
|
| 604 |
+
comparison_files = st.file_uploader(
|
| 605 |
+
"Upload comparison documents",
|
| 606 |
+
type="pdf",
|
| 607 |
+
accept_multiple_files=True,
|
| 608 |
+
key="comparison_files"
|
| 609 |
+
)
|
| 610 |
comparison_inputs = comparison_files
|
| 611 |
else:
|
| 612 |
# Select comparison documents from existing vector stores
|
| 613 |
+
selected_comparison_plans = st.multiselect(
|
| 614 |
+
"Select comparison documents:",
|
| 615 |
+
vectorstore_documents,
|
| 616 |
+
key="select_comparison_plans"
|
| 617 |
+
)
|
| 618 |
+
comparison_inputs = [
|
| 619 |
+
os.path.join(
|
| 620 |
+
"Individual_All_Vectorstores",
|
| 621 |
+
f"{doc.replace(' Summary', '_Summary')}_vectorstore"
|
| 622 |
+
) for doc in selected_comparison_plans
|
| 623 |
+
]
|
| 624 |
|
| 625 |
+
input_text = st.text_input(
|
| 626 |
+
"Ask a comparison question:",
|
| 627 |
+
key="comparison_input"
|
| 628 |
+
)
|
| 629 |
|
| 630 |
if st.button("Compare", key="compare_button"):
|
| 631 |
if not api_key:
|
|
|
|
| 641 |
with st.spinner("Processing..."):
|
| 642 |
try:
|
| 643 |
# Call the process_one_to_many_query function
|
| 644 |
+
process_one_to_many_query(
|
| 645 |
+
api_key,
|
| 646 |
+
focus_input,
|
| 647 |
+
comparison_inputs,
|
| 648 |
+
input_text,
|
| 649 |
+
display_placeholder4
|
| 650 |
+
)
|
| 651 |
except Exception as e:
|
| 652 |
st.error(f"An error occurred: {e}")
|
| 653 |
|
|
|
|
| 656 |
st.header("Plan Comparison with Long Context Model")
|
| 657 |
|
| 658 |
# Anthropics API Key Input
|
| 659 |
+
anthropic_api_key = st.text_input(
|
| 660 |
+
"Enter your Anthropic API key:",
|
| 661 |
+
type="password",
|
| 662 |
+
key="anthropic_key"
|
| 663 |
+
)
|
| 664 |
|
| 665 |
# Option to upload a new plan or select from a list
|
| 666 |
+
focus_option = st.radio(
|
| 667 |
+
"Choose a focus plan:",
|
| 668 |
+
("Select from existing plans", "Upload a new plan"),
|
| 669 |
+
key="focus_option_long_context"
|
| 670 |
+
)
|
| 671 |
|
| 672 |
if focus_option == "Upload a new plan":
|
| 673 |
+
focus_uploaded_file = st.file_uploader(
|
| 674 |
+
"Upload a Climate Action Plan to compare",
|
| 675 |
+
type="pdf",
|
| 676 |
+
key="focus_upload_long_context"
|
| 677 |
+
)
|
| 678 |
if focus_uploaded_file is not None:
|
| 679 |
# Directly use the uploaded file
|
| 680 |
focus_plan_path = focus_uploaded_file
|
| 681 |
else:
|
| 682 |
focus_plan_path = None
|
| 683 |
else:
|
| 684 |
+
# List of existing plans in CAPS
|
| 685 |
plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
|
| 686 |
+
selected_focus_plan = st.selectbox(
|
| 687 |
+
"Select a focus plan:",
|
| 688 |
+
plan_list,
|
| 689 |
+
key="select_focus_plan_long_context"
|
| 690 |
+
)
|
| 691 |
focus_plan_path = os.path.join("CAPS", f"{selected_focus_plan}.pdf")
|
| 692 |
|
| 693 |
# List available summary documents for selection
|
| 694 |
summaries_directory = "CAPS_Summaries"
|
| 695 |
+
summary_files = [
|
| 696 |
+
f.replace(".md", "").replace("_", " ")
|
| 697 |
+
for f in os.listdir(summaries_directory) if f.endswith('.md')
|
| 698 |
+
]
|
| 699 |
+
selected_summaries = st.multiselect(
|
| 700 |
+
"Select summary documents for comparison:",
|
| 701 |
+
summary_files,
|
| 702 |
+
key="selected_summaries"
|
| 703 |
+
)
|
| 704 |
|
| 705 |
+
input_text = st.text_input(
|
| 706 |
+
"Ask a comparison question:",
|
| 707 |
+
key="comparison_input_long_context"
|
| 708 |
+
)
|
| 709 |
|
| 710 |
if st.button("Compare with Long Context", key="compare_button_long_context"):
|
| 711 |
if not api_key:
|
|
|
|
| 720 |
display_placeholder = st.empty()
|
| 721 |
with st.spinner("Processing..."):
|
| 722 |
try:
|
| 723 |
+
compare_with_long_context(
|
| 724 |
+
api_key,
|
| 725 |
+
anthropic_api_key,
|
| 726 |
+
input_text,
|
| 727 |
+
focus_plan_path,
|
| 728 |
+
selected_summaries,
|
| 729 |
+
display_placeholder
|
| 730 |
+
)
|
| 731 |
except Exception as e:
|
| 732 |
+
st.error(f"An error occurred: {e}")
|