Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -121,6 +121,12 @@ df2 = pd.read_csv("cleaned2.csv")
|
|
| 121 |
embeddings = torch.load("embeddings1.pt")
|
| 122 |
embeddings2 = torch.load("embeddings2.pt")
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
def predict(text):
|
| 125 |
if not text or text.strip() == "":
|
| 126 |
return "No query provided"
|
|
@@ -131,32 +137,37 @@ def predict(text):
|
|
| 131 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
| 132 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
| 133 |
|
| 134 |
-
# Get top 3 indices
|
| 135 |
-
top3_idx1 = sim_scores1.topk(3)
|
| 136 |
-
top3_idx2 = sim_scores2.topk(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
# Prepare results
|
| 139 |
results = {
|
| 140 |
-
"top1": [
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
}
|
| 143 |
|
| 144 |
-
for idx in top3_idx1:
|
| 145 |
-
results["top1"].append({
|
| 146 |
-
"question": df.iloc[idx]["question"],
|
| 147 |
-
"link": df.iloc[idx]["link"],
|
| 148 |
-
"score": float(sim_scores1[idx])
|
| 149 |
-
})
|
| 150 |
-
|
| 151 |
-
for idx in top3_idx2:
|
| 152 |
-
results["top2"].append({
|
| 153 |
-
"question": df2.iloc[idx]["question"],
|
| 154 |
-
"link": df2.iloc[idx]["link"],
|
| 155 |
-
"score": float(sim_scores2[idx])
|
| 156 |
-
})
|
| 157 |
-
|
| 158 |
return results
|
| 159 |
-
|
| 160 |
|
| 161 |
# Match the EXACT structure of your working translation app
|
| 162 |
title = "Search CSV"
|
|
|
|
| 121 |
embeddings = torch.load("embeddings1.pt")
|
| 122 |
embeddings2 = torch.load("embeddings2.pt")
|
| 123 |
|
| 124 |
+
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
| 125 |
+
df_questions = df["question"].values
|
| 126 |
+
df_links = df["link"].values
|
| 127 |
+
df2_questions = df2["question"].values
|
| 128 |
+
df2_links = df2["link"].values
|
| 129 |
+
|
| 130 |
def predict(text):
|
| 131 |
if not text or text.strip() == "":
|
| 132 |
return "No query provided"
|
|
|
|
| 137 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
| 138 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
| 139 |
|
| 140 |
+
# Get top 3 values and indices in one call
|
| 141 |
+
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
| 142 |
+
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
| 143 |
+
|
| 144 |
+
# Convert to CPU once
|
| 145 |
+
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
| 146 |
+
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
| 147 |
+
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
| 148 |
+
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
| 149 |
|
| 150 |
+
# Prepare results using pre-extracted arrays
|
| 151 |
results = {
|
| 152 |
+
"top1": [
|
| 153 |
+
{
|
| 154 |
+
"question": df_questions[idx],
|
| 155 |
+
"link": df_links[idx],
|
| 156 |
+
"score": float(score)
|
| 157 |
+
}
|
| 158 |
+
for idx, score in zip(top3_idx1_cpu, top3_scores1_cpu)
|
| 159 |
+
],
|
| 160 |
+
"top2": [
|
| 161 |
+
{
|
| 162 |
+
"question": df2_questions[idx],
|
| 163 |
+
"link": df2_links[idx],
|
| 164 |
+
"score": float(score)
|
| 165 |
+
}
|
| 166 |
+
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
| 167 |
+
]
|
| 168 |
}
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
return results
|
|
|
|
| 171 |
|
| 172 |
# Match the EXACT structure of your working translation app
|
| 173 |
title = "Search CSV"
|