Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- README.md +23 -75
- app.py +1 -0
- lightweight_rag.py +820 -0
- requirements.txt +1 -0
- venue_ai_complete.py +146 -89
README.md
CHANGED
|
@@ -1,86 +1,34 @@
|
|
| 1 |
-
|
| 2 |
-
title: Yerevan Venue AI Assistant
|
| 3 |
-
emoji: ๐ฝ๏ธ
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: purple
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.34.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
license: mit
|
| 11 |
-
---
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- **958 Venues**: Comprehensive database of restaurants, bars, pubs, cafes, and clubs
|
| 21 |
-
- **5-Star Reviews**: Integrated reviews from 727 venues with 5-star ratings
|
| 22 |
-
- **Smart Filtering**: Filter by rating, price range, and distance
|
| 23 |
-
- **Location-Aware**: Search by specific streets, landmarks, and districts
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
- **Distance Calculation**: Accurate distance measurements from user location
|
| 29 |
-
- **Bilingual Location Support**: Recognize locations in both Armenian and English
|
| 30 |
-
|
| 31 |
-
### ๐ฌ Conversational AI
|
| 32 |
-
- **Natural Conversations**: Engage in casual chat and small talk
|
| 33 |
-
- **Bilingual Support**: Communicate in Armenian (ีีกีตีฅึีฅีถ) or English
|
| 34 |
-
- **Template-Based Responses**: Fast, contextual responses
|
| 35 |
-
- **Smart Query Detection**: Automatically detects venue requests vs casual conversation
|
| 36 |
-
|
| 37 |
-
### ๐ฆ๐ฒ Armenian Language Support
|
| 38 |
-
- **Native Armenian**: Full support for Armenian text input and output
|
| 39 |
-
- **Cultural Context**: Understanding of Armenian venue culture and preferences
|
| 40 |
-
- **Bilingual Categories**: Recognize venue types in both languages (ึีกีข, ีผีฅีฝีฟีธึีกีถ, ีขีกึ, etc.)
|
| 41 |
-
|
| 42 |
-
## ๐ How to Use
|
| 43 |
-
|
| 44 |
-
### Venue Recommendations
|
| 45 |
-
```
|
| 46 |
-
"Find me a good pub on Pushkin Street"
|
| 47 |
-
"Restaurants near Opera House with rating above 4"
|
| 48 |
-
"ีขีกึีฅึ ีีกีทีฟีธึีซ ีบีธีฒีธีฟีกีตีธึีด" (bars on Mashtots Avenue)
|
| 49 |
-
"ึีกีขีฅึ ีีกีฌีขีกีถีคีตีกีถ ึีธีฒีธึีธึีด" (pubs on Nalbandyan Street)
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
### Casual Conversation
|
| 53 |
-
```
|
| 54 |
-
"Hello! How are you?"
|
| 55 |
-
"What can you help me with?"
|
| 56 |
-
"ีขีกึึ ีฑีฅีฆ, ีซีถีนีบีฅีฝ ีฅึ?" (Hello, how are you?)
|
| 57 |
-
"Thanks for your help!"
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
### Location-Based Queries
|
| 61 |
-
```
|
| 62 |
-
"Any good restaurants near Cascade?"
|
| 63 |
-
"Bars on Saryan Street"
|
| 64 |
-
"ีฝึีณีกึีกีถีถีฅึ ีีกีถึีกีบีฅีฟีธึีฉีตีกีถ ีฐึีกีบีกึีกีฏีซ ีดีธีฟ" (cafes near Republic Square)
|
| 65 |
```
|
| 66 |
|
| 67 |
-
##
|
| 68 |
-
|
| 69 |
-
### Smart Filtering Options
|
| 70 |
-
- **Minimum Rating**: 0-5 stars (default: 3.0)
|
| 71 |
-
- **Price Range**: Budget, Mid-range, Expensive, or All
|
| 72 |
-
- **Maximum Distance**: 0.5-20 km from specified location
|
| 73 |
|
| 74 |
-
|
| 75 |
-
-
|
| 76 |
-
-
|
| 77 |
-
-
|
| 78 |
|
| 79 |
-
|
| 80 |
-
- Automatically detects input language
|
| 81 |
-
- Provides responses in the same language as the query
|
| 82 |
-
- Supports mixed-language conversations
|
| 83 |
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
*Built with โค๏ธ for the Yerevan community. Combining traditional Armenian hospitality with modern AI technology.*
|
|
|
|
| 1 |
+
# Yerevan Venue AI Assistant
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
Bilingual venue recommendation system for Yerevan, Armenia with RAG-enhanced search capabilities.
|
| 4 |
|
| 5 |
+
## Features
|
| 6 |
|
| 7 |
+
- **Bilingual Support**: Armenian and English
|
| 8 |
+
- **Location-Aware**: 158+ known locations and landmarks
|
| 9 |
+
- **Smart Search**: RAG-enhanced with synonym expansion
|
| 10 |
+
- **Review Integration**: review analysis
|
| 11 |
+
- **Conversational**: Chat interface with natural language
|
| 12 |
|
| 13 |
+
## Quick Start
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
```bash
|
| 16 |
+
pip install -r requirements.txt
|
| 17 |
+
python app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
```
|
| 19 |
|
| 20 |
+
## Usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
- **Chat**: "Hello! How are you?"
|
| 23 |
+
- **Find venues**: "bars near Opera House"
|
| 24 |
+
- **Armenian**: "ีขีกึีฅึ ีีกีทีฟีธึีซ ีดีธีฟ"
|
| 25 |
+
- **Specific**: "craft beer pubs walking distance from Republic Square"
|
| 26 |
|
| 27 |
+
## Files
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
- `app.py` - Main application
|
| 30 |
+
- `venue_ai_complete.py` - Core AI engine
|
| 31 |
+
- `lightweight_rag.py` - RAG system
|
| 32 |
+
- `requirements.txt` - Dependencies
|
| 33 |
+
- `yerevan_*.json/csv` - Venue data
|
| 34 |
|
|
|
app.py
CHANGED
|
@@ -51,4 +51,5 @@ def main():
|
|
| 51 |
sys.exit(1)
|
| 52 |
|
| 53 |
if __name__ == "__main__":
|
|
|
|
| 54 |
main()
|
|
|
|
| 51 |
sys.exit(1)
|
| 52 |
|
| 53 |
if __name__ == "__main__":
|
| 54 |
+
|
| 55 |
main()
|
lightweight_rag.py
ADDED
|
@@ -0,0 +1,820 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import json
|
| 3 |
+
import math
|
| 4 |
+
from typing import List, Dict, Optional, Set, Tuple
|
| 5 |
+
from collections import defaultdict, Counter
|
| 6 |
+
from geopy.distance import geodesic
|
| 7 |
+
import logging
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
class LightweightRAGEnhancer:
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.geo_landmarks = self._initialize_comprehensive_geo_data()
|
| 17 |
+
self.synonym_map = self._initialize_smart_synonyms()
|
| 18 |
+
self.query_patterns = self._initialize_query_patterns()
|
| 19 |
+
self.user_preferences = defaultdict(float)
|
| 20 |
+
self.query_history = []
|
| 21 |
+
|
| 22 |
+
def _initialize_comprehensive_geo_data(self) -> Dict[str, Dict]:
|
| 23 |
+
return {
|
| 24 |
+
# Government & Administrative
|
| 25 |
+
"Republic Square": {
|
| 26 |
+
"coords": (40.1761, 44.5126),
|
| 27 |
+
"aliases": ["ีฐีกีถึีกีบีฅีฟีธึีฉีตีกีถ ีฐึีกีบีกึีกีฏ", "republic", "central square", "main square"],
|
| 28 |
+
"category": "landmark",
|
| 29 |
+
"importance": 10,
|
| 30 |
+
"description": "Central square of Yerevan, heart of the city"
|
| 31 |
+
},
|
| 32 |
+
"Presidential Palace": {
|
| 33 |
+
"coords": (40.1789, 44.5145),
|
| 34 |
+
"aliases": ["ีถีกีญีกีฃีกีฐีกีฏีกีถ", "presidential", "palace"],
|
| 35 |
+
"category": "government",
|
| 36 |
+
"importance": 8,
|
| 37 |
+
"description": "Official residence of Armenian President"
|
| 38 |
+
},
|
| 39 |
+
"National Assembly": {
|
| 40 |
+
"coords": (40.1823, 44.5167),
|
| 41 |
+
"aliases": ["ีกีฆีฃีกีตีซีถ ีชีธีฒีธีพ", "parliament", "assembly"],
|
| 42 |
+
"category": "government",
|
| 43 |
+
"importance": 7,
|
| 44 |
+
"description": "Armenian Parliament building"
|
| 45 |
+
},
|
| 46 |
+
|
| 47 |
+
# Cultural & Historical Sites
|
| 48 |
+
"Opera House": {
|
| 49 |
+
"coords": (40.1836, 44.5098),
|
| 50 |
+
"aliases": ["ึ
ีบีฅึีก", "ึ
ีบีฅึีกีตีซ ีฟีธึีถ", "opera", "opera house", "spendiaryan"],
|
| 51 |
+
"category": "cultural",
|
| 52 |
+
"importance": 10,
|
| 53 |
+
"description": "Armenian National Opera and Ballet Theatre"
|
| 54 |
+
},
|
| 55 |
+
"Cascade": {
|
| 56 |
+
"coords": (40.1876, 44.5086),
|
| 57 |
+
"aliases": ["ีฏีกีฝีฏีกีค", "cascade complex", "cafesjian", "art center"],
|
| 58 |
+
"category": "cultural",
|
| 59 |
+
"importance": 10,
|
| 60 |
+
"description": "Giant stairway and cultural center with modern art"
|
| 61 |
+
},
|
| 62 |
+
"Matenadaran": {
|
| 63 |
+
"coords": (40.1901, 44.5167),
|
| 64 |
+
"aliases": ["ีดีกีฟีฅีถีกีคีกึีกีถ", "manuscript repository", "mesrop mashtots"],
|
| 65 |
+
"category": "cultural",
|
| 66 |
+
"importance": 9,
|
| 67 |
+
"description": "Ancient manuscript repository and museum"
|
| 68 |
+
},
|
| 69 |
+
"Blue Mosque": {
|
| 70 |
+
"coords": (40.1733, 44.5151),
|
| 71 |
+
"aliases": ["ีฏีกีบีธึีตีฟ ีดีฆีฏีซีฉ", "blue mosque", "gรถk medrese"],
|
| 72 |
+
"category": "religious",
|
| 73 |
+
"importance": 8,
|
| 74 |
+
"description": "Historic 18th century mosque"
|
| 75 |
+
},
|
| 76 |
+
"Mother Armenia": {
|
| 77 |
+
"coords": (40.1856, 44.5098),
|
| 78 |
+
"aliases": ["ีดีกีตึ ีฐีกีตีกีฝีฟีกีถ", "mother armenia", "victory park"],
|
| 79 |
+
"category": "monument",
|
| 80 |
+
"importance": 9,
|
| 81 |
+
"description": "Iconic statue overlooking Yerevan"
|
| 82 |
+
},
|
| 83 |
+
"Erebuni Fortress": {
|
| 84 |
+
"coords": (40.1234, 44.5345),
|
| 85 |
+
"aliases": ["ีงึีฅีขีธึีถีซ", "erebuni", "fortress", "ancient yerevan"],
|
| 86 |
+
"category": "historical",
|
| 87 |
+
"importance": 8,
|
| 88 |
+
"description": "Ancient Urartian fortress, birthplace of Yerevan"
|
| 89 |
+
},
|
| 90 |
+
"Saint Gregory Cathedral": {
|
| 91 |
+
"coords": (40.1756, 44.5089),
|
| 92 |
+
"aliases": ["ีฝีธึึีข ีฃึีซีฃีธึ", "cathedral", "gregory illuminator"],
|
| 93 |
+
"category": "religious",
|
| 94 |
+
"importance": 8,
|
| 95 |
+
"description": "Largest Armenian Apostolic cathedral"
|
| 96 |
+
},
|
| 97 |
+
|
| 98 |
+
# Shopping & Commercial
|
| 99 |
+
"Northern Avenue": {
|
| 100 |
+
"coords": (40.1792, 44.5146),
|
| 101 |
+
"aliases": ["ีฐีตีธึีฝีซีฝีกีตีซีถ ีบีธีฒีธีฟีก", "northern", "pedestrian street"],
|
| 102 |
+
"category": "shopping",
|
| 103 |
+
"importance": 9,
|
| 104 |
+
"description": "Main pedestrian shopping street"
|
| 105 |
+
},
|
| 106 |
+
"Vernissage Market": {
|
| 107 |
+
"coords": (40.1823, 44.5134),
|
| 108 |
+
"aliases": ["ีพีฅึีถีซีฝีกีช", "vernissage", "flea market", "weekend market"],
|
| 109 |
+
"category": "shopping",
|
| 110 |
+
"importance": 8,
|
| 111 |
+
"description": "Famous weekend arts and crafts market"
|
| 112 |
+
},
|
| 113 |
+
"Dalma Garden Mall": {
|
| 114 |
+
"coords": (40.1567, 44.4789),
|
| 115 |
+
"aliases": ["ีคีกีฌีดีก", "dalma", "mall", "shopping center"],
|
| 116 |
+
"category": "shopping",
|
| 117 |
+
"importance": 7,
|
| 118 |
+
"description": "Large shopping and entertainment complex"
|
| 119 |
+
},
|
| 120 |
+
"Yerevan Mall": {
|
| 121 |
+
"coords": (40.1934, 44.4823),
|
| 122 |
+
"aliases": ["yerevan mall", "mall", "shopping"],
|
| 123 |
+
"category": "shopping",
|
| 124 |
+
"importance": 7,
|
| 125 |
+
"description": "Major shopping mall in Yerevan"
|
| 126 |
+
},
|
| 127 |
+
"Rossia Mall": {
|
| 128 |
+
"coords": (40.1612, 44.4934),
|
| 129 |
+
"aliases": ["ีผีธีฝีซีก", "rossia", "russia mall"],
|
| 130 |
+
"category": "shopping",
|
| 131 |
+
"importance": 6,
|
| 132 |
+
"description": "Shopping center with various stores"
|
| 133 |
+
},
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
"Zvartnots Airport": {
|
| 137 |
+
"coords": (40.1473, 44.3959),
|
| 138 |
+
"aliases": ["ีฆีพีกึีฉีถีธึ", "airport", "international airport"],
|
| 139 |
+
"category": "transport",
|
| 140 |
+
"importance": 9,
|
| 141 |
+
"description": "Main international airport of Armenia"
|
| 142 |
+
},
|
| 143 |
+
"Central Railway Station": {
|
| 144 |
+
"coords": (40.1567, 44.4912),
|
| 145 |
+
"aliases": ["ีฅึีฏีกีฉีฃีฎีกีตีซีถ", "train station", "railway"],
|
| 146 |
+
"category": "transport",
|
| 147 |
+
"importance": 6,
|
| 148 |
+
"description": "Main railway station"
|
| 149 |
+
},
|
| 150 |
+
"Kilikia Bus Station": {
|
| 151 |
+
"coords": (40.1645, 44.4823),
|
| 152 |
+
"aliases": ["ีฏีซีฌีซีฏีซีก", "bus station", "central bus"],
|
| 153 |
+
"category": "transport",
|
| 154 |
+
"importance": 7,
|
| 155 |
+
"description": "Central bus terminal"
|
| 156 |
+
},
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
"Victory Park": {
|
| 160 |
+
"coords": (40.1876, 44.5098),
|
| 161 |
+
"aliases": ["ีฐีกีฒีฉีกีถีกีฏีซ ีกีตีฃีซ", "victory", "park", "amusement park"],
|
| 162 |
+
"category": "park",
|
| 163 |
+
"importance": 8,
|
| 164 |
+
"description": "Large park with amusement rides and lake"
|
| 165 |
+
},
|
| 166 |
+
"Lovers Park": {
|
| 167 |
+
"coords": (40.1823, 44.5089),
|
| 168 |
+
"aliases": ["ีฝีซึีกีฐีกึีถีฅึีซ ีกีตีฃีซ", "lovers", "romantic park"],
|
| 169 |
+
"category": "park",
|
| 170 |
+
"importance": 7,
|
| 171 |
+
"description": "Romantic park popular for dates"
|
| 172 |
+
},
|
| 173 |
+
"English Park": {
|
| 174 |
+
"coords": (40.1789, 44.5178),
|
| 175 |
+
"aliases": ["ีกีถีฃีฌีซีกีฏีกีถ ีกีตีฃีซ", "english", "park"],
|
| 176 |
+
"category": "park",
|
| 177 |
+
"importance": 6,
|
| 178 |
+
"description": "Quiet park in city center"
|
| 179 |
+
},
|
| 180 |
+
"Children's Park": {
|
| 181 |
+
"coords": (40.1845, 44.5134),
|
| 182 |
+
"aliases": ["ีฅึีฅีญีกีถีฅึีซ ีกีตีฃีซ", "children", "kids park"],
|
| 183 |
+
"category": "park",
|
| 184 |
+
"importance": 6,
|
| 185 |
+
"description": "Family-friendly park with playgrounds"
|
| 186 |
+
},
|
| 187 |
+
"Circular Park": {
|
| 188 |
+
"coords": (40.1823, 44.5201),
|
| 189 |
+
"aliases": ["ีทึีปีกีถีกีตีซีถ ีกีตีฃีซ", "circular", "round park"],
|
| 190 |
+
"category": "park",
|
| 191 |
+
"importance": 5,
|
| 192 |
+
"description": "Circular park around city center"
|
| 193 |
+
},
|
| 194 |
+
|
| 195 |
+
# Universities & Education
|
| 196 |
+
"American University": {
|
| 197 |
+
"coords": (40.1934, 44.4912),
|
| 198 |
+
"aliases": ["ีกีดีฅึีซีฏีตีกีถ ีฐีกีดีกีฌีฝีกึีกีถ", "aua", "american uni"],
|
| 199 |
+
"category": "education",
|
| 200 |
+
"importance": 7,
|
| 201 |
+
"description": "American University of Armenia"
|
| 202 |
+
},
|
| 203 |
+
"Yerevan State University": {
|
| 204 |
+
"coords": (40.1789, 44.5189),
|
| 205 |
+
"aliases": ["ีฅึึีกีถีซ ีบีฅีฟีกีฏีกีถ", "ysu", "state university"],
|
| 206 |
+
"category": "education",
|
| 207 |
+
"importance": 8,
|
| 208 |
+
"description": "Main state university of Armenia"
|
| 209 |
+
},
|
| 210 |
+
"French University": {
|
| 211 |
+
"coords": (40.1756, 44.5234),
|
| 212 |
+
"aliases": ["ึึีกีถีฝีซีกีฏีกีถ ีฐีกีดีกีฌีฝีกึีกีถ", "french uni", "ufar"],
|
| 213 |
+
"category": "education",
|
| 214 |
+
"importance": 6,
|
| 215 |
+
"description": "French University of Armenia"
|
| 216 |
+
},
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
"Armenia Marriott": {
|
| 220 |
+
"coords": (40.1761, 44.5145),
|
| 221 |
+
"aliases": ["ีดีกึีซีธีฟ", "marriott", "luxury hotel"],
|
| 222 |
+
"category": "hotel",
|
| 223 |
+
"importance": 8,
|
| 224 |
+
"description": "Luxury hotel on Republic Square"
|
| 225 |
+
},
|
| 226 |
+
"Tufenkian Historic Hotel": {
|
| 227 |
+
"coords": (40.1789, 44.5156),
|
| 228 |
+
"aliases": ["ีฟีธึึีฅีถีฏีตีกีถ", "tufenkian", "historic hotel"],
|
| 229 |
+
"category": "hotel",
|
| 230 |
+
"importance": 7,
|
| 231 |
+
"description": "Boutique historic hotel"
|
| 232 |
+
},
|
| 233 |
+
"Grand Hotel Yerevan": {
|
| 234 |
+
"coords": (40.1823, 44.5123),
|
| 235 |
+
"aliases": ["ีฃึีกีถีค ีฐีธีฟีฅีฌ", "grand hotel"],
|
| 236 |
+
"category": "hotel",
|
| 237 |
+
"importance": 7,
|
| 238 |
+
"description": "Centrally located grand hotel"
|
| 239 |
+
},
|
| 240 |
+
|
| 241 |
+
# Markets & Food
|
| 242 |
+
"GUM Market": {
|
| 243 |
+
"coords": (40.1789, 44.5178),
|
| 244 |
+
"aliases": ["ีฃีธึีด", "central market", "covered market"],
|
| 245 |
+
"category": "market",
|
| 246 |
+
"importance": 8,
|
| 247 |
+
"description": "Historic covered market"
|
| 248 |
+
},
|
| 249 |
+
"Pak Shuka": {
|
| 250 |
+
"coords": (40.1567, 44.5289),
|
| 251 |
+
"aliases": ["ึีกีฏ ีทีธึีฏีก", "closed market", "weekend market"],
|
| 252 |
+
"category": "market",
|
| 253 |
+
"importance": 6,
|
| 254 |
+
"description": "Large weekend market"
|
| 255 |
+
},
|
| 256 |
+
"Fish Market": {
|
| 257 |
+
"coords": (40.1634, 44.5167),
|
| 258 |
+
"aliases": ["ีฑีฏีกีถ ีทีธึีฏีก", "fish", "seafood market"],
|
| 259 |
+
"category": "market",
|
| 260 |
+
"importance": 5,
|
| 261 |
+
"description": "Specialized fish and seafood market"
|
| 262 |
+
},
|
| 263 |
+
|
| 264 |
+
# Entertainment & Nightlife Districts
|
| 265 |
+
"Saryan Street": {
|
| 266 |
+
"coords": (40.1851, 44.5086),
|
| 267 |
+
"aliases": ["ีฝีกึีตีกีถีซ", "saryan", "martiros saryan", "nightlife street"],
|
| 268 |
+
"category": "district",
|
| 269 |
+
"importance": 9,
|
| 270 |
+
"description": "Popular street with bars, restaurants and nightlife"
|
| 271 |
+
},
|
| 272 |
+
"Abovyan Street": {
|
| 273 |
+
"coords": (40.1776, 44.5146),
|
| 274 |
+
"aliases": ["ีกีขีธีพีตีกีถีซ", "abovyan", "main street"],
|
| 275 |
+
"category": "district",
|
| 276 |
+
"importance": 8,
|
| 277 |
+
"description": "Historic street with shops and cafes"
|
| 278 |
+
},
|
| 279 |
+
"Tumanyan Street": {
|
| 280 |
+
"coords": (40.1822, 44.5149),
|
| 281 |
+
"aliases": ["ีฉีธึีดีกีถีตีกีถีซ", "tumanyan", "hovhannes tumanyan"],
|
| 282 |
+
"category": "district",
|
| 283 |
+
"importance": 7,
|
| 284 |
+
"description": "Cultural street with bookstores and cafes"
|
| 285 |
+
},
|
| 286 |
+
|
| 287 |
+
# Specific Neighborhoods
|
| 288 |
+
"Kentron District": {
|
| 289 |
+
"coords": (40.1792, 44.5146),
|
| 290 |
+
"aliases": ["ีฏีฅีถีฟึีธีถ", "center", "downtown", "city center"],
|
| 291 |
+
"category": "district",
|
| 292 |
+
"importance": 10,
|
| 293 |
+
"description": "Central district of Yerevan"
|
| 294 |
+
},
|
| 295 |
+
"Arabkir": {
|
| 296 |
+
"coords": (40.2089, 44.4856),
|
| 297 |
+
"aliases": ["ีกึีกีขีฏีซึ", "arabkir district"],
|
| 298 |
+
"category": "district",
|
| 299 |
+
"importance": 6,
|
| 300 |
+
"description": "Northern residential district"
|
| 301 |
+
},
|
| 302 |
+
"Avan": {
|
| 303 |
+
"coords": (40.2156, 44.5489),
|
| 304 |
+
"aliases": ["ีกีพีกีถ", "avan district"],
|
| 305 |
+
"category": "district",
|
| 306 |
+
"importance": 5,
|
| 307 |
+
"description": "Northern district of Yerevan"
|
| 308 |
+
},
|
| 309 |
+
"Erebuni": {
|
| 310 |
+
"coords": (40.1345, 44.5234),
|
| 311 |
+
"aliases": ["ีงึีฅีขีธึีถีซ", "erebuni district"],
|
| 312 |
+
"category": "district",
|
| 313 |
+
"importance": 6,
|
| 314 |
+
"description": "Southern district with historical sites"
|
| 315 |
+
},
|
| 316 |
+
|
| 317 |
+
# Sports & Recreation
|
| 318 |
+
"Republican Stadium": {
|
| 319 |
+
"coords": (40.1856, 44.5178),
|
| 320 |
+
"aliases": ["ีฐีกีถึีกีบีฅีฟีกีฏีกีถ", "stadium", "football stadium"],
|
| 321 |
+
"category": "sports",
|
| 322 |
+
"importance": 7,
|
| 323 |
+
"description": "Main football stadium of Armenia"
|
| 324 |
+
},
|
| 325 |
+
"Karen Demirchyan Complex": {
|
| 326 |
+
"coords": (40.1923, 44.5089),
|
| 327 |
+
"aliases": ["ีคีฅีดีซึีณีตีกีถ", "sports complex", "hamalir"],
|
| 328 |
+
"category": "sports",
|
| 329 |
+
"importance": 7,
|
| 330 |
+
"description": "Large sports and concert complex"
|
| 331 |
+
},
|
| 332 |
+
"Tennis Academy": {
|
| 333 |
+
"coords": (40.1789, 44.4967),
|
| 334 |
+
"aliases": ["ีฉีฅีถีซีฝีซ ีกีฏีกีคีฅีดีซีก", "tennis", "sports academy"],
|
| 335 |
+
"category": "sports",
|
| 336 |
+
"importance": 5,
|
| 337 |
+
"description": "Professional tennis training facility"
|
| 338 |
+
},
|
| 339 |
+
|
| 340 |
+
# Business Centers
|
| 341 |
+
"Business Center Yerevan": {
|
| 342 |
+
"coords": (40.1823, 44.5201),
|
| 343 |
+
"aliases": ["ีขีซีฆีถีฅีฝ ีฏีฅีถีฟึีธีถ", "business center", "office complex"],
|
| 344 |
+
"category": "business",
|
| 345 |
+
"importance": 6,
|
| 346 |
+
"description": "Modern business and office complex"
|
| 347 |
+
},
|
| 348 |
+
"Kentron Business Center": {
|
| 349 |
+
"coords": (40.1789, 44.5167),
|
| 350 |
+
"aliases": ["ีฏีฅีถีฟึีธีถ ีขีซีฆีถีฅีฝ", "central business"],
|
| 351 |
+
"category": "business",
|
| 352 |
+
"importance": 5,
|
| 353 |
+
"description": "Central business district offices"
|
| 354 |
+
},
|
| 355 |
+
|
| 356 |
+
# Medical Centers
|
| 357 |
+
"Nairi Medical Center": {
|
| 358 |
+
"coords": (40.1867, 44.5123),
|
| 359 |
+
"aliases": ["ีถีกีซึีซ ีขีชีทีฏีกีฏีกีถ", "nairi", "medical center"],
|
| 360 |
+
"category": "medical",
|
| 361 |
+
"importance": 6,
|
| 362 |
+
"description": "Major private medical facility"
|
| 363 |
+
},
|
| 364 |
+
"Surb Grigor Hospital": {
|
| 365 |
+
"coords": (40.1756, 44.5201),
|
| 366 |
+
"aliases": ["ีฝีธึึีข ีฃึีซีฃีธึ", "hospital", "medical"],
|
| 367 |
+
"category": "medical",
|
| 368 |
+
"importance": 6,
|
| 369 |
+
"description": "Major hospital in Yerevan"
|
| 370 |
+
},
|
| 371 |
+
|
| 372 |
+
# Additional Landmarks
|
| 373 |
+
"Swan Lake": {
|
| 374 |
+
"coords": (40.1837, 44.5135),
|
| 375 |
+
"aliases": ["ีฏีกึีกีบีซ ีฌีซีณ", "swan lake", "lake"],
|
| 376 |
+
"category": "landmark",
|
| 377 |
+
"importance": 7,
|
| 378 |
+
"description": "Artificial lake in city center"
|
| 379 |
+
},
|
| 380 |
+
"Freedom Square": {
|
| 381 |
+
"coords": (40.1834, 44.5089),
|
| 382 |
+
"aliases": ["ีกีฆีกีฟีธึีฉีตีกีถ ีฐึีกีบีกึีกีฏ", "freedom", "liberty square"],
|
| 383 |
+
"category": "landmark",
|
| 384 |
+
"importance": 7,
|
| 385 |
+
"description": "Historic square near Opera House"
|
| 386 |
+
},
|
| 387 |
+
"Charles Aznavour Square": {
|
| 388 |
+
"coords": (40.1845, 44.5101),
|
| 389 |
+
"aliases": ["ีกีฆีถีกีพีธึึ", "aznavour", "charles aznavour"],
|
| 390 |
+
"category": "landmark",
|
| 391 |
+
"importance": 6,
|
| 392 |
+
"description": "Square dedicated to famous Armenian-French singer"
|
| 393 |
+
}
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
def _initialize_smart_synonyms(self) -> Dict[str, Set[str]]:
|
| 397 |
+
"""Initialize smart synonym mapping for better search"""
|
| 398 |
+
return {
|
| 399 |
+
# Venue types
|
| 400 |
+
"pub": {"bar", "tavern", "brewpub", "beerhouse", "ale house", "ีบีกีข", "ึีกีข"},
|
| 401 |
+
"bar": {"pub", "lounge", "cocktail bar", "wine bar", "ีขีกึ", "ีขีกีผ"},
|
| 402 |
+
"restaurant": {"dining", "eatery", "bistro", "cafe", "ีผีฅีฝีฟีธึีกีถ"},
|
| 403 |
+
"cafe": {"coffee shop", "coffeehouse", "bistro", "ีฝึีณีกึีกีถ"},
|
| 404 |
+
"club": {"nightclub", "disco", "dance club", "ีกีฏีธึีดีข"},
|
| 405 |
+
"hookah": {"shisha", "waterpipe", "ีฐีธึีฏีก", "ีถีกึีฃีซีฌีฅ"},
|
| 406 |
+
|
| 407 |
+
# Food & Drink
|
| 408 |
+
"beer": {"ale", "lager", "draft", "tap", "brew", "ีฃีกึีฅีปีธึึ"},
|
| 409 |
+
"draft": {"tap", "on tap", "draught", "fresh beer"},
|
| 410 |
+
"craft": {"artisan", "microbrewery", "specialty", "handcrafted"},
|
| 411 |
+
"cocktail": {"mixed drink", "martini", "mojito", "ีฏีธีฏีฟีฅีตีฌ"},
|
| 412 |
+
"wine": {"vino", "vintage", "grape", "ีฃีซีถีซ"},
|
| 413 |
+
"coffee": {"espresso", "cappuccino", "latte", "ีฝีธึึีณ"},
|
| 414 |
+
|
| 415 |
+
# Atmosphere
|
| 416 |
+
"romantic": {"intimate", "cozy", "date night", "couples"},
|
| 417 |
+
"lively": {"energetic", "vibrant", "busy", "active"},
|
| 418 |
+
"quiet": {"peaceful", "calm", "relaxed", "tranquil"},
|
| 419 |
+
"outdoor": {"terrace", "patio", "garden", "rooftop"},
|
| 420 |
+
|
| 421 |
+
# Location terms
|
| 422 |
+
"near": {"close to", "by", "next to", "around", "ีดีธีฟ", "ีฏีธีฒึีซีถ"},
|
| 423 |
+
"center": {"central", "downtown", "middle", "ีฏีฅีถีฟึีธีถ"},
|
| 424 |
+
"walking": {"on foot", "pedestrian", "walk", "ึีกีตีฌีฅีฌีธีพ"},
|
| 425 |
+
|
| 426 |
+
# Quality descriptors
|
| 427 |
+
"best": {"top", "excellent", "finest", "premium", "ีฌีกีพีกีฃีธึีตีถ"},
|
| 428 |
+
"good": {"nice", "decent", "quality", "ีฌีกีพ"},
|
| 429 |
+
"cheap": {"affordable", "budget", "inexpensive", "ีงีชีกีถ"},
|
| 430 |
+
"expensive": {"pricey", "upscale", "luxury", "ีฉีกีถีฏ"}
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
def _initialize_query_patterns(self) -> Dict[str, str]:
|
| 434 |
+
"""Initialize common query patterns for better understanding"""
|
| 435 |
+
return {
|
| 436 |
+
r"near|close to|by|next to|around|ีดีธีฟ|ีฏีธีฒึีซีถ": "proximity",
|
| 437 |
+
r"best|top|finest|excellent|ีฌีกีพีกีฃีธึีตีถ": "quality_high",
|
| 438 |
+
r"cheap|affordable|budget|ีงีชีกีถ": "price_low",
|
| 439 |
+
r"expensive|upscale|luxury|ีฉีกีถีฏ": "price_high",
|
| 440 |
+
r"walking distance|walk|on foot|ึีกีตีฌีฅีฌีธีพ": "walking",
|
| 441 |
+
r"romantic|date|intimate|ีผีธีดีกีถีฟีซีฏ": "romantic",
|
| 442 |
+
r"group|friends|party|ีญีธึีดีข": "social",
|
| 443 |
+
r"quiet|peaceful|calm|ีฐีกีถีฃีซีฝีฟ": "quiet",
|
| 444 |
+
r"lively|busy|energetic|ีฏีฅีถีคีกีถีซ": "lively",
|
| 445 |
+
r"outdoor|terrace|patio|ีขีกึึ
ีฉีตีก": "outdoor"
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
def enhance_query(self, query: str) -> Dict[str, any]:
|
| 449 |
+
"""
|
| 450 |
+
Enhance query with expanded terms, geo context, and smart scoring
|
| 451 |
+
"""
|
| 452 |
+
enhanced_data = {
|
| 453 |
+
"original_query": query,
|
| 454 |
+
"expanded_terms": self._expand_query_terms(query),
|
| 455 |
+
"geo_context": self._extract_geo_context(query),
|
| 456 |
+
"query_intent": self._analyze_query_intent(query),
|
| 457 |
+
"scoring_weights": self._calculate_scoring_weights(query),
|
| 458 |
+
"search_radius": self._determine_search_radius(query)
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
# Learn from user query patterns
|
| 462 |
+
self._update_user_preferences(query, enhanced_data)
|
| 463 |
+
|
| 464 |
+
return enhanced_data
|
| 465 |
+
|
| 466 |
+
def _expand_query_terms(self, query: str) -> List[str]:
|
| 467 |
+
"""Expand query with synonyms and related terms"""
|
| 468 |
+
query_lower = query.lower()
|
| 469 |
+
expanded = set([query_lower])
|
| 470 |
+
|
| 471 |
+
# Add synonyms
|
| 472 |
+
for term, synonyms in self.synonym_map.items():
|
| 473 |
+
if term in query_lower:
|
| 474 |
+
expanded.update(synonyms)
|
| 475 |
+
# Add partial matches
|
| 476 |
+
for synonym in synonyms:
|
| 477 |
+
if len(synonym) > 3: # Avoid very short terms
|
| 478 |
+
expanded.add(synonym)
|
| 479 |
+
|
| 480 |
+
# Add morphological variations (simple stemming)
|
| 481 |
+
words = query_lower.split()
|
| 482 |
+
for word in words:
|
| 483 |
+
if len(word) > 4:
|
| 484 |
+
# Add common endings
|
| 485 |
+
expanded.add(word + "s")
|
| 486 |
+
expanded.add(word + "ing")
|
| 487 |
+
if word.endswith("s"):
|
| 488 |
+
expanded.add(word[:-1])
|
| 489 |
+
if word.endswith("ing"):
|
| 490 |
+
expanded.add(word[:-3])
|
| 491 |
+
|
| 492 |
+
return list(expanded)
|
| 493 |
+
|
| 494 |
+
def _extract_geo_context(self, query: str) -> Dict[str, any]:
|
| 495 |
+
"""Extract geographical context from query"""
|
| 496 |
+
query_lower = query.lower()
|
| 497 |
+
geo_context = {
|
| 498 |
+
"landmarks": [],
|
| 499 |
+
"proximity_terms": [],
|
| 500 |
+
"radius_hints": 1.0, # Default 1km
|
| 501 |
+
"coordinates": None
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
# Find mentioned landmarks
|
| 505 |
+
for landmark, data in self.geo_landmarks.items():
|
| 506 |
+
landmark_lower = landmark.lower()
|
| 507 |
+
if landmark_lower in query_lower:
|
| 508 |
+
geo_context["landmarks"].append({
|
| 509 |
+
"name": landmark,
|
| 510 |
+
"coords": data["coords"],
|
| 511 |
+
"importance": data["importance"],
|
| 512 |
+
"category": data["category"]
|
| 513 |
+
})
|
| 514 |
+
continue
|
| 515 |
+
|
| 516 |
+
# Check aliases
|
| 517 |
+
for alias in data["aliases"]:
|
| 518 |
+
if alias.lower() in query_lower:
|
| 519 |
+
geo_context["landmarks"].append({
|
| 520 |
+
"name": landmark,
|
| 521 |
+
"coords": data["coords"],
|
| 522 |
+
"importance": data["importance"],
|
| 523 |
+
"category": data["category"]
|
| 524 |
+
})
|
| 525 |
+
break
|
| 526 |
+
|
| 527 |
+
# Extract proximity terms
|
| 528 |
+
proximity_patterns = [
|
| 529 |
+
r"within (\d+)\s*(km|kilometers|miles?)",
|
| 530 |
+
r"(\d+)\s*(km|kilometers|miles?) (from|of|near)",
|
| 531 |
+
r"close to|near|by|next to|around|ีดีธีฟ|ีฏีธีฒึีซีถ"
|
| 532 |
+
]
|
| 533 |
+
|
| 534 |
+
for pattern in proximity_patterns:
|
| 535 |
+
matches = re.findall(pattern, query_lower)
|
| 536 |
+
if matches:
|
| 537 |
+
geo_context["proximity_terms"].extend(matches)
|
| 538 |
+
# Extract radius if specified
|
| 539 |
+
for match in matches:
|
| 540 |
+
if isinstance(match, tuple) and len(match) >= 2:
|
| 541 |
+
try:
|
| 542 |
+
radius = float(match[0])
|
| 543 |
+
unit = match[1].lower()
|
| 544 |
+
if "mile" in unit:
|
| 545 |
+
radius *= 1.609 # Convert to km
|
| 546 |
+
geo_context["radius_hints"] = radius
|
| 547 |
+
except (ValueError, IndexError):
|
| 548 |
+
pass
|
| 549 |
+
|
| 550 |
+
# Set primary coordinate if landmark found
|
| 551 |
+
if geo_context["landmarks"]:
|
| 552 |
+
# Use highest importance landmark as primary
|
| 553 |
+
primary = max(geo_context["landmarks"], key=lambda x: x["importance"])
|
| 554 |
+
geo_context["coordinates"] = primary["coords"]
|
| 555 |
+
|
| 556 |
+
return geo_context
|
| 557 |
+
|
| 558 |
+
def _analyze_query_intent(self, query: str) -> Dict[str, float]:
|
| 559 |
+
"""Analyze query intent with confidence scores"""
|
| 560 |
+
intent_scores = defaultdict(float)
|
| 561 |
+
query_lower = query.lower()
|
| 562 |
+
|
| 563 |
+
for pattern, intent in self.query_patterns.items():
|
| 564 |
+
if re.search(pattern, query_lower, re.IGNORECASE):
|
| 565 |
+
intent_scores[intent] += 1.0
|
| 566 |
+
|
| 567 |
+
# Normalize scores
|
| 568 |
+
if intent_scores:
|
| 569 |
+
max_score = max(intent_scores.values())
|
| 570 |
+
for intent in intent_scores:
|
| 571 |
+
intent_scores[intent] /= max_score
|
| 572 |
+
|
| 573 |
+
return dict(intent_scores)
|
| 574 |
+
|
| 575 |
+
def _calculate_scoring_weights(self, query: str) -> Dict[str, float]:
|
| 576 |
+
"""Calculate dynamic scoring weights based on query"""
|
| 577 |
+
weights = {
|
| 578 |
+
"name_match": 1.0,
|
| 579 |
+
"category_match": 1.0,
|
| 580 |
+
"summary_match": 1.0,
|
| 581 |
+
"location_match": 1.0,
|
| 582 |
+
"rating_boost": 1.0,
|
| 583 |
+
"distance_penalty": 1.0
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
query_lower = query.lower()
|
| 587 |
+
|
| 588 |
+
# Boost location matching for geo queries
|
| 589 |
+
geo_terms = ["near", "close", "by", "walking", "distance", "ีดีธีฟ", "ีฏีธีฒึีซีถ"]
|
| 590 |
+
if any(term in query_lower for term in geo_terms):
|
| 591 |
+
weights["location_match"] = 2.0
|
| 592 |
+
weights["distance_penalty"] = 1.5
|
| 593 |
+
|
| 594 |
+
# Boost name matching for specific venue searches
|
| 595 |
+
if len(query.split()) <= 3 and not any(term in query_lower for term in geo_terms):
|
| 596 |
+
weights["name_match"] = 2.0
|
| 597 |
+
|
| 598 |
+
# Boost category for type-specific searches
|
| 599 |
+
category_terms = ["pub", "bar", "restaurant", "cafe", "club"]
|
| 600 |
+
if any(term in query_lower for term in category_terms):
|
| 601 |
+
weights["category_match"] = 1.5
|
| 602 |
+
|
| 603 |
+
# Boost rating for quality searches
|
| 604 |
+
quality_terms = ["best", "top", "excellent", "good", "ีฌีกีพีกีฃีธึีตีถ"]
|
| 605 |
+
if any(term in query_lower for term in quality_terms):
|
| 606 |
+
weights["rating_boost"] = 1.5
|
| 607 |
+
|
| 608 |
+
return weights
|
| 609 |
+
|
| 610 |
+
def _determine_search_radius(self, query: str) -> float:
|
| 611 |
+
"""Determine appropriate search radius based on query"""
|
| 612 |
+
query_lower = query.lower()
|
| 613 |
+
|
| 614 |
+
# Walking distance queries
|
| 615 |
+
if any(term in query_lower for term in ["walk", "walking", "on foot", "ึีกีตีฌีฅีฌีธีพ"]):
|
| 616 |
+
return 0.5 # 500m
|
| 617 |
+
|
| 618 |
+
# Neighborhood queries
|
| 619 |
+
if any(term in query_lower for term in ["neighborhood", "area", "district", "ีฉีกีฒีกีดีกีฝ"]):
|
| 620 |
+
return 2.0 # 2km
|
| 621 |
+
|
| 622 |
+
# City-wide queries
|
| 623 |
+
if any(term in query_lower for term in ["yerevan", "city", "ีฅึึีกีถ", "ึีกีฒีกึ"]):
|
| 624 |
+
return 10.0 # 10km
|
| 625 |
+
|
| 626 |
+
# Default radius
|
| 627 |
+
return 1.5 # 1.5km
|
| 628 |
+
|
| 629 |
+
def _update_user_preferences(self, query: str, enhanced_data: Dict):
|
| 630 |
+
"""Learn from user query patterns (lightweight learning)"""
|
| 631 |
+
self.query_history.append({
|
| 632 |
+
"query": query,
|
| 633 |
+
"timestamp": len(self.query_history), # Simple timestamp
|
| 634 |
+
"geo_context": enhanced_data["geo_context"],
|
| 635 |
+
"intent": enhanced_data["query_intent"]
|
| 636 |
+
})
|
| 637 |
+
|
| 638 |
+
# Keep only recent history (memory efficient)
|
| 639 |
+
if len(self.query_history) > 100:
|
| 640 |
+
self.query_history = self.query_history[-50:]
|
| 641 |
+
|
| 642 |
+
# Update preferences based on patterns
|
| 643 |
+
for intent, score in enhanced_data["query_intent"].items():
|
| 644 |
+
self.user_preferences[intent] += score * 0.1 # Small learning rate
|
| 645 |
+
|
| 646 |
+
def calculate_enhanced_score(self, venue: Dict, enhanced_query: Dict) -> Tuple[float, Dict]:
|
| 647 |
+
"""
|
| 648 |
+
Calculate enhanced relevance score with explanation
|
| 649 |
+
"""
|
| 650 |
+
score = 0.0
|
| 651 |
+
explanation = {
|
| 652 |
+
"name_match": 0,
|
| 653 |
+
"category_match": 0,
|
| 654 |
+
"summary_match": 0,
|
| 655 |
+
"location_match": 0,
|
| 656 |
+
"rating_boost": 0,
|
| 657 |
+
"distance_penalty": 0,
|
| 658 |
+
"total": 0
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
venue_name = venue.get('name', '').lower()
|
| 662 |
+
venue_category = venue.get('category', '').lower()
|
| 663 |
+
venue_summary = venue.get('summary', '').lower()
|
| 664 |
+
weights = enhanced_query["scoring_weights"]
|
| 665 |
+
expanded_terms = enhanced_query["expanded_terms"]
|
| 666 |
+
|
| 667 |
+
# Name matching with expanded terms
|
| 668 |
+
name_score = 0
|
| 669 |
+
for term in expanded_terms:
|
| 670 |
+
if term in venue_name:
|
| 671 |
+
name_score += 5
|
| 672 |
+
explanation["name_match"] = name_score * weights["name_match"]
|
| 673 |
+
score += explanation["name_match"]
|
| 674 |
+
|
| 675 |
+
# Category matching
|
| 676 |
+
category_score = 0
|
| 677 |
+
for term in expanded_terms:
|
| 678 |
+
if term in venue_category:
|
| 679 |
+
category_score += 3
|
| 680 |
+
explanation["category_match"] = category_score * weights["category_match"]
|
| 681 |
+
score += explanation["category_match"]
|
| 682 |
+
|
| 683 |
+
# Summary matching (enhanced with TF-IDF-like scoring)
|
| 684 |
+
summary_score = 0
|
| 685 |
+
summary_words = venue_summary.split()
|
| 686 |
+
for term in expanded_terms:
|
| 687 |
+
term_count = summary_words.count(term)
|
| 688 |
+
if term_count > 0:
|
| 689 |
+
# TF-IDF-like: more points for rare terms
|
| 690 |
+
term_weight = min(3.0, 1.0 / max(1, term_count * 0.1))
|
| 691 |
+
summary_score += term_count * term_weight
|
| 692 |
+
explanation["summary_match"] = summary_score * weights["summary_match"]
|
| 693 |
+
score += explanation["summary_match"]
|
| 694 |
+
|
| 695 |
+
# Location/proximity scoring
|
| 696 |
+
geo_context = enhanced_query["geo_context"]
|
| 697 |
+
location_score = 0
|
| 698 |
+
distance_penalty = 0
|
| 699 |
+
|
| 700 |
+
if geo_context["coordinates"] and venue.get('latitude') and venue.get('longitude'):
|
| 701 |
+
venue_coords = (venue['latitude'], venue['longitude'])
|
| 702 |
+
distance = geodesic(geo_context["coordinates"], venue_coords).kilometers
|
| 703 |
+
|
| 704 |
+
search_radius = enhanced_query["search_radius"]
|
| 705 |
+
if distance <= search_radius:
|
| 706 |
+
# Closer venues get higher scores
|
| 707 |
+
location_score = max(0, 10 * (1 - distance / search_radius))
|
| 708 |
+
|
| 709 |
+
# Apply distance penalty for very far venues
|
| 710 |
+
if distance > search_radius * 0.5:
|
| 711 |
+
distance_penalty = (distance - search_radius * 0.5) * 2
|
| 712 |
+
|
| 713 |
+
explanation["location_match"] = location_score * weights["location_match"]
|
| 714 |
+
explanation["distance_penalty"] = distance_penalty * weights["distance_penalty"]
|
| 715 |
+
score += explanation["location_match"]
|
| 716 |
+
score -= explanation["distance_penalty"]
|
| 717 |
+
|
| 718 |
+
# Rating boost
|
| 719 |
+
rating = venue.get('rating', 0)
|
| 720 |
+
if rating is not None:
|
| 721 |
+
try:
|
| 722 |
+
rating = float(rating)
|
| 723 |
+
if rating > 0:
|
| 724 |
+
rating_boost = (rating - 3.0) * 2 # Boost for ratings above 3.0
|
| 725 |
+
explanation["rating_boost"] = max(0, rating_boost * weights["rating_boost"])
|
| 726 |
+
score += explanation["rating_boost"]
|
| 727 |
+
except (ValueError, TypeError):
|
| 728 |
+
rating = 0
|
| 729 |
+
|
| 730 |
+
explanation["total"] = score
|
| 731 |
+
return max(0, score), explanation
|
| 732 |
+
|
| 733 |
+
def get_search_explanation(self, query: str, top_venues: List[Tuple[Dict, float, Dict]]) -> str:
|
| 734 |
+
"""Generate human-readable explanation of search results"""
|
| 735 |
+
if not top_venues:
|
| 736 |
+
return "No venues found matching your criteria."
|
| 737 |
+
|
| 738 |
+
explanations = []
|
| 739 |
+
explanations.append(f"๐ Search results for: '{query}'\n")
|
| 740 |
+
|
| 741 |
+
for i, (venue, score, details) in enumerate(top_venues[:3], 1):
|
| 742 |
+
venue_name = venue.get('name', 'Unknown')
|
| 743 |
+
rating = venue.get('rating', 'N/A')
|
| 744 |
+
|
| 745 |
+
explanation_parts = []
|
| 746 |
+
if details['name_match'] > 0:
|
| 747 |
+
explanation_parts.append(f"name match ({details['name_match']:.1f})")
|
| 748 |
+
if details['category_match'] > 0:
|
| 749 |
+
explanation_parts.append(f"category match ({details['category_match']:.1f})")
|
| 750 |
+
if details['summary_match'] > 0:
|
| 751 |
+
explanation_parts.append(f"content match ({details['summary_match']:.1f})")
|
| 752 |
+
if details['location_match'] > 0:
|
| 753 |
+
explanation_parts.append(f"location match ({details['location_match']:.1f})")
|
| 754 |
+
if details['rating_boost'] > 0:
|
| 755 |
+
explanation_parts.append(f"high rating ({rating}โญ)")
|
| 756 |
+
|
| 757 |
+
explanation_text = ", ".join(explanation_parts) if explanation_parts else "general match"
|
| 758 |
+
explanations.append(f"{i}. **{venue_name}** (Score: {score:.1f}) - {explanation_text}")
|
| 759 |
+
|
| 760 |
+
return "\n".join(explanations)
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
# Integration with your existing system
|
| 764 |
+
def integrate_lightweight_rag(venue_ai_instance):
|
| 765 |
+
"""Add lightweight RAG to your existing CompleteYerevanVenueAI"""
|
| 766 |
+
|
| 767 |
+
# Add the enhancer
|
| 768 |
+
venue_ai_instance.rag_enhancer = LightweightRAGEnhancer()
|
| 769 |
+
|
| 770 |
+
# Modify the existing search method
|
| 771 |
+
original_search = venue_ai_instance._smart_venue_search
|
| 772 |
+
|
| 773 |
+
def enhanced_smart_search(query, top_k=20):
|
| 774 |
+
# Get initial results from your existing method
|
| 775 |
+
initial_results = original_search(query, top_k * 2) # Get more for reranking
|
| 776 |
+
|
| 777 |
+
# Apply lightweight RAG enhancement
|
| 778 |
+
enhanced_results = venue_ai_instance.rag_enhancer.enhance_search(
|
| 779 |
+
query, initial_results, top_k
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
return enhanced_results
|
| 783 |
+
|
| 784 |
+
# Replace the method
|
| 785 |
+
venue_ai_instance._smart_venue_search = enhanced_smart_search
|
| 786 |
+
|
| 787 |
+
return venue_ai_instance
|
| 788 |
+
|
| 789 |
+
|
| 790 |
+
# Example usage
|
| 791 |
+
if __name__ == "__main__":
|
| 792 |
+
# Test the lightweight RAG
|
| 793 |
+
enhancer = LightweightRAGEnhancer()
|
| 794 |
+
|
| 795 |
+
# Mock venue data
|
| 796 |
+
test_venues = [
|
| 797 |
+
{
|
| 798 |
+
'name': 'Dargett Craft Beer',
|
| 799 |
+
'category': 'pub',
|
| 800 |
+
'summary': 'Armenia\'s first craft brewery offering artisanal beers on tap',
|
| 801 |
+
'rating': 4.6,
|
| 802 |
+
'address': '72 Arami Street'
|
| 803 |
+
},
|
| 804 |
+
{
|
| 805 |
+
'name': 'Coffee Central',
|
| 806 |
+
'category': 'cafe',
|
| 807 |
+
'summary': 'Cozy coffee shop with outdoor seating',
|
| 808 |
+
'rating': 4.2,
|
| 809 |
+
'address': '15 Mashtots Avenue'
|
| 810 |
+
}
|
| 811 |
+
]
|
| 812 |
+
|
| 813 |
+
# Test search
|
| 814 |
+
results = enhancer.enhance_search("find craft beer pub", test_venues)
|
| 815 |
+
|
| 816 |
+
for result in results:
|
| 817 |
+
print(f"Venue: {result['name']}")
|
| 818 |
+
print(f"Score: {result['enhanced_score']:.2f}")
|
| 819 |
+
print(f"Reasons: {', '.join(result['match_reasons'])}")
|
| 820 |
+
print("-" * 30)
|
requirements.txt
CHANGED
|
@@ -5,3 +5,4 @@ geopy>=2.3.0
|
|
| 5 |
scikit-learn>=1.3.0
|
| 6 |
regex>=2023.6.3
|
| 7 |
huggingface_hub>=0.20.0
|
|
|
|
|
|
| 5 |
scikit-learn>=1.3.0
|
| 6 |
regex>=2023.6.3
|
| 7 |
huggingface_hub>=0.20.0
|
| 8 |
+
llama-cpp-python>=0.2.0
|
venue_ai_complete.py
CHANGED
|
@@ -9,6 +9,7 @@ import re
|
|
| 9 |
import gradio as gr
|
| 10 |
import random
|
| 11 |
from geopy.distance import geodesic
|
|
|
|
| 12 |
|
| 13 |
# Set up logging first
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -26,6 +27,9 @@ except Exception as e:
|
|
| 26 |
logger.warning(f"llama-cpp-python failed to load: {e}")
|
| 27 |
LLAMA_CPP_AVAILABLE = False
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
class CompleteYerevanVenueAI:
|
| 30 |
"""
|
| 31 |
Complete Bilingual (Armenian/English) AI Assistant for Yerevan Venue Recommendations
|
|
@@ -51,10 +55,13 @@ class CompleteYerevanVenueAI:
|
|
| 51 |
self.venue_categories = self._initialize_category_knowledge()
|
| 52 |
self.conversation_templates = self._initialize_enhanced_conversation_templates()
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
logger.info("Initialized Complete YerevanVenueAI with
|
| 58 |
|
| 59 |
def _initialize_street_coordinates(self) -> Dict[str, Tuple[float, float]]:
|
| 60 |
"""Initialize street coordinates for distance calculation"""
|
|
@@ -385,21 +392,25 @@ class CompleteYerevanVenueAI:
|
|
| 385 |
return None
|
| 386 |
|
| 387 |
def _smart_venue_search(self, query: str, top_k: int = 20) -> List[Dict]:
|
| 388 |
-
"""
|
| 389 |
query_lower = query.lower()
|
| 390 |
results = []
|
| 391 |
|
|
|
|
|
|
|
|
|
|
| 392 |
query_words = set(query_lower.split())
|
|
|
|
| 393 |
|
| 394 |
# Detect category from query
|
| 395 |
language = self._detect_language(query)
|
| 396 |
detected_category = self._detect_category(query, language)
|
| 397 |
|
| 398 |
-
#
|
|
|
|
| 399 |
location_context = self._extract_enhanced_location_context(query)
|
| 400 |
|
| 401 |
for venue in self.venues_data:
|
| 402 |
-
score = 0
|
| 403 |
venue_name = venue.get('name', '')
|
| 404 |
venue_address = venue.get('address', '').lower()
|
| 405 |
|
|
@@ -414,6 +425,22 @@ class CompleteYerevanVenueAI:
|
|
| 414 |
venue_category = structured_info.iloc[0]['category']
|
| 415 |
venue_summary = str(structured_info.iloc[0]['venue_summary']).lower()
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# JSON metadata scoring
|
| 418 |
venue_types = venue.get('types', [])
|
| 419 |
|
|
@@ -475,82 +502,24 @@ class CompleteYerevanVenueAI:
|
|
| 475 |
if keyword in venue_summary or keyword in venue_name.lower():
|
| 476 |
score += 5
|
| 477 |
|
| 478 |
-
#
|
| 479 |
-
|
| 480 |
-
'draft': {
|
| 481 |
-
'keywords': ['draft', 'tap', 'beer'],
|
| 482 |
-
'metadata': ['serves_beer'],
|
| 483 |
-
'bonus': 25
|
| 484 |
-
},
|
| 485 |
-
'craft': {
|
| 486 |
-
'keywords': ['craft', 'artisan', 'microbrewery'],
|
| 487 |
-
'metadata': ['serves_beer'],
|
| 488 |
-
'bonus': 20
|
| 489 |
-
},
|
| 490 |
-
'beer': {
|
| 491 |
-
'keywords': ['beer', 'brewery', 'ale', 'lager'],
|
| 492 |
-
'metadata': ['serves_beer'],
|
| 493 |
-
'bonus': 15
|
| 494 |
-
},
|
| 495 |
-
'cocktail': {
|
| 496 |
-
'keywords': ['cocktail', 'mixology', 'bartender'],
|
| 497 |
-
'metadata': ['serves_cocktails'],
|
| 498 |
-
'bonus': 15
|
| 499 |
-
},
|
| 500 |
-
'wine': {
|
| 501 |
-
'keywords': ['wine', 'vino', 'winery'],
|
| 502 |
-
'metadata': ['serves_wine'],
|
| 503 |
-
'bonus': 15
|
| 504 |
-
},
|
| 505 |
-
'coffee': {
|
| 506 |
-
'keywords': ['coffee', 'espresso', 'cappuccino', 'latte'],
|
| 507 |
-
'metadata': ['serves_coffee'],
|
| 508 |
-
'bonus': 15
|
| 509 |
-
},
|
| 510 |
-
'breakfast': {
|
| 511 |
-
'keywords': ['breakfast', 'brunch', 'morning'],
|
| 512 |
-
'metadata': ['serves_breakfast', 'serves_brunch'],
|
| 513 |
-
'bonus': 15
|
| 514 |
-
},
|
| 515 |
-
'live music': {
|
| 516 |
-
'keywords': ['live music', 'jazz', 'band', 'concert'],
|
| 517 |
-
'metadata': ['live_music'],
|
| 518 |
-
'bonus': 20
|
| 519 |
-
},
|
| 520 |
-
'romantic': {
|
| 521 |
-
'keywords': ['romantic', 'date', 'intimate', 'cozy'],
|
| 522 |
-
'metadata': ['romantic', 'good_for_date_night'],
|
| 523 |
-
'bonus': 15
|
| 524 |
-
},
|
| 525 |
-
'pub': {
|
| 526 |
-
'keywords': ['pub', 'tavern'],
|
| 527 |
-
'metadata': ['serves_beer', 'has_bar'],
|
| 528 |
-
'bonus': 20
|
| 529 |
-
},
|
| 530 |
-
'bar': {
|
| 531 |
-
'keywords': ['bar', 'lounge'],
|
| 532 |
-
'metadata': ['has_bar', 'serves_spirits'],
|
| 533 |
-
'bonus': 20
|
| 534 |
-
},
|
| 535 |
-
'restaurant': {
|
| 536 |
-
'keywords': ['restaurant', 'dining', 'cuisine'],
|
| 537 |
-
'metadata': ['serves_lunch', 'serves_dinner'],
|
| 538 |
-
'bonus': 15
|
| 539 |
-
}
|
| 540 |
-
}
|
| 541 |
|
| 542 |
-
#
|
| 543 |
-
for
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
# Venue name matching
|
| 556 |
venue_name_lower = venue_name.lower()
|
|
@@ -558,10 +527,16 @@ class CompleteYerevanVenueAI:
|
|
| 558 |
if word in venue_name_lower:
|
| 559 |
score += 8
|
| 560 |
|
| 561 |
-
# Summary matching (use the rich summary data)
|
| 562 |
for word in query_words:
|
| 563 |
if word in venue_summary:
|
| 564 |
-
score +=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
|
| 566 |
# Address matching
|
| 567 |
if venue.get('address'):
|
|
@@ -590,6 +565,8 @@ class CompleteYerevanVenueAI:
|
|
| 590 |
venue_copy['category'] = venue_category
|
| 591 |
venue_copy['summary'] = structured_info.iloc[0]['venue_summary']
|
| 592 |
venue_copy['exact_location_match'] = exact_location_match
|
|
|
|
|
|
|
| 593 |
results.append(venue_copy)
|
| 594 |
|
| 595 |
# Sort by exact location match first, then by score
|
|
@@ -713,6 +690,22 @@ class CompleteYerevanVenueAI:
|
|
| 713 |
response_parts.append("\n" + random.choice(self.conversation_templates[language]["endings"]))
|
| 714 |
|
| 715 |
return "\n".join(response_parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
|
| 717 |
def _detect_category(self, query: str, language: str) -> Optional[str]:
|
| 718 |
"""Detect venue category from query, respecting the detected language."""
|
|
@@ -738,7 +731,7 @@ class CompleteYerevanVenueAI:
|
|
| 738 |
return None
|
| 739 |
|
| 740 |
def _format_enhanced_venue_info(self, venue: Dict, language: str = "english") -> str:
|
| 741 |
-
"""Enhanced venue information formatting with 5-star reviews and metadata"""
|
| 742 |
if language == "armenian":
|
| 743 |
info_parts = [f"**{venue['name']}**"]
|
| 744 |
if venue.get('address'):
|
|
@@ -754,7 +747,7 @@ class CompleteYerevanVenueAI:
|
|
| 754 |
distance = venue['calculated_distance']
|
| 755 |
info_parts.append(f"๐ {distance:.1f} ีฏีด")
|
| 756 |
|
| 757 |
-
# Add category
|
| 758 |
if venue.get('category'):
|
| 759 |
category = venue['category']
|
| 760 |
category_map = {
|
|
@@ -773,6 +766,14 @@ class CompleteYerevanVenueAI:
|
|
| 773 |
if features:
|
| 774 |
info_parts.append(f"โจ {', '.join(features)}")
|
| 775 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
# Add 5-star review
|
| 777 |
venue_name = venue.get('name', '')
|
| 778 |
if venue_name in self.five_star_reviews:
|
|
@@ -790,7 +791,7 @@ class CompleteYerevanVenueAI:
|
|
| 790 |
distance = venue['calculated_distance']
|
| 791 |
info_parts.append(f"๐ {distance:.1f} km away")
|
| 792 |
|
| 793 |
-
# Add category
|
| 794 |
if venue.get('category'):
|
| 795 |
info_parts.append(f"๐ท๏ธ {venue['category']}")
|
| 796 |
|
|
@@ -805,6 +806,14 @@ class CompleteYerevanVenueAI:
|
|
| 805 |
if features:
|
| 806 |
info_parts.append(f"โจ {', '.join(features)}")
|
| 807 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
# Add 5-star review
|
| 809 |
venue_name = venue.get('name', '')
|
| 810 |
if venue_name in self.five_star_reviews:
|
|
@@ -1054,6 +1063,52 @@ Assistant:"""
|
|
| 1054 |
return True
|
| 1055 |
return False
|
| 1056 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1057 |
# Global AI instance
|
| 1058 |
ai_instance = None
|
| 1059 |
|
|
@@ -1229,15 +1284,17 @@ def create_gradio_interface():
|
|
| 1229 |
label="๐ฌ Try these examples:"
|
| 1230 |
)
|
| 1231 |
|
|
|
|
|
|
|
|
|
|
| 1232 |
search_btn.click(
|
| 1233 |
-
fn=
|
| 1234 |
inputs=[query_input, min_rating, price_range, max_distance],
|
| 1235 |
outputs=output
|
| 1236 |
)
|
| 1237 |
|
| 1238 |
-
# Auto-submit on Enter
|
| 1239 |
query_input.submit(
|
| 1240 |
-
fn=
|
| 1241 |
inputs=[query_input, min_rating, price_range, max_distance],
|
| 1242 |
outputs=output
|
| 1243 |
)
|
|
|
|
| 9 |
import gradio as gr
|
| 10 |
import random
|
| 11 |
from geopy.distance import geodesic
|
| 12 |
+
from collections import defaultdict
|
| 13 |
|
| 14 |
# Set up logging first
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 27 |
logger.warning(f"llama-cpp-python failed to load: {e}")
|
| 28 |
LLAMA_CPP_AVAILABLE = False
|
| 29 |
|
| 30 |
+
# Import the lightweight RAG enhancer
|
| 31 |
+
from lightweight_rag import LightweightRAGEnhancer
|
| 32 |
+
|
| 33 |
class CompleteYerevanVenueAI:
|
| 34 |
"""
|
| 35 |
Complete Bilingual (Armenian/English) AI Assistant for Yerevan Venue Recommendations
|
|
|
|
| 55 |
self.venue_categories = self._initialize_category_knowledge()
|
| 56 |
self.conversation_templates = self._initialize_enhanced_conversation_templates()
|
| 57 |
|
| 58 |
+
# Initialize lightweight RAG enhancer with comprehensive geo data
|
| 59 |
+
self.rag_enhancer = LightweightRAGEnhancer()
|
| 60 |
+
|
| 61 |
+
# Merge RAG geo data with existing street coordinates
|
| 62 |
+
self.street_coordinates = self._merge_geo_data()
|
| 63 |
|
| 64 |
+
logger.info("Initialized Complete YerevanVenueAI with RAG enhancement and comprehensive geo data")
|
| 65 |
|
| 66 |
def _initialize_street_coordinates(self) -> Dict[str, Tuple[float, float]]:
|
| 67 |
"""Initialize street coordinates for distance calculation"""
|
|
|
|
| 392 |
return None
|
| 393 |
|
| 394 |
def _smart_venue_search(self, query: str, top_k: int = 20) -> List[Dict]:
|
| 395 |
+
"""Enhanced search using RAG system with comprehensive geo data and smart scoring"""
|
| 396 |
query_lower = query.lower()
|
| 397 |
results = []
|
| 398 |
|
| 399 |
+
# Use RAG enhancer for query analysis
|
| 400 |
+
enhanced_query = self.rag_enhancer.enhance_query(query)
|
| 401 |
+
|
| 402 |
query_words = set(query_lower.split())
|
| 403 |
+
expanded_terms = set(enhanced_query["expanded_terms"])
|
| 404 |
|
| 405 |
# Detect category from query
|
| 406 |
language = self._detect_language(query)
|
| 407 |
detected_category = self._detect_category(query, language)
|
| 408 |
|
| 409 |
+
# Get enhanced location context from RAG
|
| 410 |
+
geo_context = enhanced_query["geo_context"]
|
| 411 |
location_context = self._extract_enhanced_location_context(query)
|
| 412 |
|
| 413 |
for venue in self.venues_data:
|
|
|
|
| 414 |
venue_name = venue.get('name', '')
|
| 415 |
venue_address = venue.get('address', '').lower()
|
| 416 |
|
|
|
|
| 425 |
venue_category = structured_info.iloc[0]['category']
|
| 426 |
venue_summary = str(structured_info.iloc[0]['venue_summary']).lower()
|
| 427 |
|
| 428 |
+
# Prepare venue data for RAG scoring
|
| 429 |
+
venue_for_rag = {
|
| 430 |
+
'name': venue_name,
|
| 431 |
+
'category': venue_category,
|
| 432 |
+
'summary': venue_summary,
|
| 433 |
+
'latitude': venue.get('latitude'),
|
| 434 |
+
'longitude': venue.get('longitude'),
|
| 435 |
+
'rating': venue.get('rating', 0)
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
# Get RAG enhanced score
|
| 439 |
+
rag_score, rag_explanation = self.rag_enhancer.calculate_enhanced_score(venue_for_rag, enhanced_query)
|
| 440 |
+
|
| 441 |
+
# Start with RAG score as base
|
| 442 |
+
score = rag_score
|
| 443 |
+
|
| 444 |
# JSON metadata scoring
|
| 445 |
venue_types = venue.get('types', [])
|
| 446 |
|
|
|
|
| 502 |
if keyword in venue_summary or keyword in venue_name.lower():
|
| 503 |
score += 5
|
| 504 |
|
| 505 |
+
# Additional scoring with expanded terms from RAG
|
| 506 |
+
additional_score = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
|
| 508 |
+
# Enhanced keyword matching with expanded terms
|
| 509 |
+
for term in expanded_terms:
|
| 510 |
+
# Check in venue name
|
| 511 |
+
if term in venue_name.lower():
|
| 512 |
+
additional_score += 3
|
| 513 |
+
|
| 514 |
+
# Check in venue summary
|
| 515 |
+
if term in venue_summary:
|
| 516 |
+
additional_score += 2
|
| 517 |
+
|
| 518 |
+
# Check in venue address
|
| 519 |
+
if term in venue_address:
|
| 520 |
+
additional_score += 1
|
| 521 |
+
|
| 522 |
+
score += additional_score
|
| 523 |
|
| 524 |
# Venue name matching
|
| 525 |
venue_name_lower = venue_name.lower()
|
|
|
|
| 527 |
if word in venue_name_lower:
|
| 528 |
score += 8
|
| 529 |
|
| 530 |
+
# Summary matching (use the rich summary data with higher scoring)
|
| 531 |
for word in query_words:
|
| 532 |
if word in venue_summary:
|
| 533 |
+
score += 5 # Increased score for summary matches
|
| 534 |
+
|
| 535 |
+
# Additional bonus for detailed summary matches
|
| 536 |
+
summary_bonus_keywords = ['draft', 'tap', 'craft', 'brewery', 'beer']
|
| 537 |
+
for keyword in summary_bonus_keywords:
|
| 538 |
+
if keyword in query_lower and keyword in venue_summary:
|
| 539 |
+
score += 15 # High bonus for specific beer-related terms in summary
|
| 540 |
|
| 541 |
# Address matching
|
| 542 |
if venue.get('address'):
|
|
|
|
| 565 |
venue_copy['category'] = venue_category
|
| 566 |
venue_copy['summary'] = structured_info.iloc[0]['venue_summary']
|
| 567 |
venue_copy['exact_location_match'] = exact_location_match
|
| 568 |
+
venue_copy['rag_score'] = rag_score
|
| 569 |
+
venue_copy['rag_explanation'] = rag_explanation
|
| 570 |
results.append(venue_copy)
|
| 571 |
|
| 572 |
# Sort by exact location match first, then by score
|
|
|
|
| 690 |
response_parts.append("\n" + random.choice(self.conversation_templates[language]["endings"]))
|
| 691 |
|
| 692 |
return "\n".join(response_parts)
|
| 693 |
+
|
| 694 |
+
def get_search_explanation(self, query: str, venues: List[Dict]) -> str:
|
| 695 |
+
"""Get detailed explanation of search results using RAG system"""
|
| 696 |
+
if not venues:
|
| 697 |
+
return "No venues found matching your criteria."
|
| 698 |
+
|
| 699 |
+
# Prepare top venues with RAG explanations
|
| 700 |
+
top_venues = []
|
| 701 |
+
for venue in venues[:3]:
|
| 702 |
+
if 'rag_explanation' in venue:
|
| 703 |
+
top_venues.append((venue, venue.get('similarity_score', 0), venue['rag_explanation']))
|
| 704 |
+
|
| 705 |
+
if top_venues:
|
| 706 |
+
return self.rag_enhancer.get_search_explanation(query, top_venues)
|
| 707 |
+
else:
|
| 708 |
+
return f"Found {len(venues)} venues matching '{query}'"
|
| 709 |
|
| 710 |
def _detect_category(self, query: str, language: str) -> Optional[str]:
|
| 711 |
"""Detect venue category from query, respecting the detected language."""
|
|
|
|
| 731 |
return None
|
| 732 |
|
| 733 |
def _format_enhanced_venue_info(self, venue: Dict, language: str = "english") -> str:
|
| 734 |
+
"""Enhanced venue information formatting with CSV summary, 5-star reviews and metadata"""
|
| 735 |
if language == "armenian":
|
| 736 |
info_parts = [f"**{venue['name']}**"]
|
| 737 |
if venue.get('address'):
|
|
|
|
| 747 |
distance = venue['calculated_distance']
|
| 748 |
info_parts.append(f"๐ {distance:.1f} ีฏีด")
|
| 749 |
|
| 750 |
+
# Add category
|
| 751 |
if venue.get('category'):
|
| 752 |
category = venue['category']
|
| 753 |
category_map = {
|
|
|
|
| 766 |
if features:
|
| 767 |
info_parts.append(f"โจ {', '.join(features)}")
|
| 768 |
|
| 769 |
+
# Add comprehensive venue summary from CSV
|
| 770 |
+
if venue.get('summary'):
|
| 771 |
+
summary = venue['summary']
|
| 772 |
+
# Truncate summary for readability but keep more detail
|
| 773 |
+
if len(summary) > 200:
|
| 774 |
+
summary = summary[:200] + "..."
|
| 775 |
+
info_parts.append(f"๐ {summary}")
|
| 776 |
+
|
| 777 |
# Add 5-star review
|
| 778 |
venue_name = venue.get('name', '')
|
| 779 |
if venue_name in self.five_star_reviews:
|
|
|
|
| 791 |
distance = venue['calculated_distance']
|
| 792 |
info_parts.append(f"๐ {distance:.1f} km away")
|
| 793 |
|
| 794 |
+
# Add category
|
| 795 |
if venue.get('category'):
|
| 796 |
info_parts.append(f"๐ท๏ธ {venue['category']}")
|
| 797 |
|
|
|
|
| 806 |
if features:
|
| 807 |
info_parts.append(f"โจ {', '.join(features)}")
|
| 808 |
|
| 809 |
+
# Add comprehensive venue summary from CSV
|
| 810 |
+
if venue.get('summary'):
|
| 811 |
+
summary = venue['summary']
|
| 812 |
+
# Truncate summary for readability but keep more detail
|
| 813 |
+
if len(summary) > 200:
|
| 814 |
+
summary = summary[:200] + "..."
|
| 815 |
+
info_parts.append(f"๐ {summary}")
|
| 816 |
+
|
| 817 |
# Add 5-star review
|
| 818 |
venue_name = venue.get('name', '')
|
| 819 |
if venue_name in self.five_star_reviews:
|
|
|
|
| 1063 |
return True
|
| 1064 |
return False
|
| 1065 |
|
| 1066 |
+
def _merge_geo_data(self) -> Dict[str, Tuple[float, float]]:
|
| 1067 |
+
"""Merge existing street coordinates with comprehensive RAG geo data"""
|
| 1068 |
+
# Start with existing coordinates
|
| 1069 |
+
merged_coords = {
|
| 1070 |
+
# Major streets with approximate center coordinates (lat, lng)
|
| 1071 |
+
"Mashtots Avenue": (40.1845, 44.5117),
|
| 1072 |
+
"Abovyan Street": (40.1776, 44.5146),
|
| 1073 |
+
"Saryan Street": (40.1851, 44.5086),
|
| 1074 |
+
"Tumanyan Street": (40.1822, 44.5149),
|
| 1075 |
+
"Amiryan Street": (40.1798, 44.5139),
|
| 1076 |
+
"Pushkin Street": (40.1774, 44.5154),
|
| 1077 |
+
"Khorenatsi Street": (40.1751, 44.5181),
|
| 1078 |
+
"Teryan Street": (40.1828, 44.5163),
|
| 1079 |
+
"Paronyan Street": (40.1812, 44.5134),
|
| 1080 |
+
"Northern Avenue": (40.1792, 44.5146),
|
| 1081 |
+
"Sayat Nova Avenue": (40.1834, 44.5098),
|
| 1082 |
+
"Baghramyan Avenue": (40.1951, 44.5089),
|
| 1083 |
+
"Vazgen Sargsyan Street": (40.1823, 44.5201),
|
| 1084 |
+
"Tigran Mets Avenue": (40.1743, 44.5289),
|
| 1085 |
+
"Nalbandyan Street": (40.1800, 44.5182),
|
| 1086 |
+
# Districts (approximate centers)
|
| 1087 |
+
"Kentron": (40.1792, 44.5146),
|
| 1088 |
+
"Arabkir": (40.2089, 44.4856),
|
| 1089 |
+
"Avan": (40.2156, 44.5489),
|
| 1090 |
+
"Davtashen": (40.2267, 44.4567),
|
| 1091 |
+
"Erebuni": (40.1345, 44.5234),
|
| 1092 |
+
# Landmarks
|
| 1093 |
+
"Republic Square": (40.1761, 44.5126),
|
| 1094 |
+
"Opera House": (40.1836, 44.5098),
|
| 1095 |
+
"Cascade": (40.1876, 44.5086),
|
| 1096 |
+
"Swan Lake": (40.1837, 44.5135),
|
| 1097 |
+
"Blue Mosque": (40.1733, 44.5151)
|
| 1098 |
+
}
|
| 1099 |
+
|
| 1100 |
+
# Add comprehensive geo data from RAG enhancer
|
| 1101 |
+
for landmark, data in self.rag_enhancer.geo_landmarks.items():
|
| 1102 |
+
merged_coords[landmark] = data["coords"]
|
| 1103 |
+
|
| 1104 |
+
# Also add primary aliases for better matching
|
| 1105 |
+
for alias in data["aliases"][:2]: # Add first 2 aliases
|
| 1106 |
+
if alias not in merged_coords:
|
| 1107 |
+
merged_coords[alias] = data["coords"]
|
| 1108 |
+
|
| 1109 |
+
logger.info(f"Merged geo data: {len(merged_coords)} locations available")
|
| 1110 |
+
return merged_coords
|
| 1111 |
+
|
| 1112 |
# Global AI instance
|
| 1113 |
ai_instance = None
|
| 1114 |
|
|
|
|
| 1284 |
label="๐ฌ Try these examples:"
|
| 1285 |
)
|
| 1286 |
|
| 1287 |
+
def handle_submit(query, min_rating, price_range, max_distance):
|
| 1288 |
+
return get_recommendations(query, min_rating, price_range, max_distance)
|
| 1289 |
+
|
| 1290 |
search_btn.click(
|
| 1291 |
+
fn=handle_submit,
|
| 1292 |
inputs=[query_input, min_rating, price_range, max_distance],
|
| 1293 |
outputs=output
|
| 1294 |
)
|
| 1295 |
|
|
|
|
| 1296 |
query_input.submit(
|
| 1297 |
+
fn=handle_submit,
|
| 1298 |
inputs=[query_input, min_rating, price_range, max_distance],
|
| 1299 |
outputs=output
|
| 1300 |
)
|