artush-habetyan commited on
Commit
b6d071a
·
verified ·
1 Parent(s): e55ffff

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ yerevan_pubs_bars_20250623_193205.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,86 @@
1
  ---
2
- title: Vibe Sip
3
- emoji: 🌖
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.34.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Yerevan Venue AI Assistant
3
+ emoji: 🍽️
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.34.2
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # 🇦🇲 Yerevan Venue AI Assistant
14
+
15
+ A bilingual (Armenian & English) conversational AI assistant that helps you discover the best venues in Yerevan, Armenia. This system combines comprehensive venue data with conversational AI capabilities to provide personalized recommendations and engage in casual conversation.
16
+
17
+ ## 🌟 Features
18
+
19
+ ### 🍽️ Venue Recommendations
20
+ - **958 Venues**: Comprehensive database of restaurants, bars, pubs, cafes, and clubs
21
+ - **5-Star Reviews**: Integrated reviews from 727 venues with 5-star ratings
22
+ - **Smart Filtering**: Filter by rating, price range, and distance
23
+ - **Location-Aware**: Search by specific streets, landmarks, and districts
24
+
25
+ ### 🗺️ Street-Aware Location Recognition
26
+ - **Major Streets**: Pushkin Street, Mashtots Avenue, Saryan Street, Nalbandyan Street
27
+ - **Landmarks**: Opera House, Republic Square, Cascade, Northern Avenue
28
+ - **Distance Calculation**: Accurate distance measurements from user location
29
+ - **Bilingual Location Support**: Recognize locations in both Armenian and English
30
+
31
+ ### 💬 Conversational AI
32
+ - **Natural Conversations**: Engage in casual chat and small talk
33
+ - **Bilingual Support**: Communicate in Armenian (Հայերեն) or English
34
+ - **Template-Based Responses**: Fast, contextual responses
35
+ - **Smart Query Detection**: Automatically detects venue requests vs casual conversation
36
+
37
+ ### 🇦🇲 Armenian Language Support
38
+ - **Native Armenian**: Full support for Armenian text input and output
39
+ - **Cultural Context**: Understanding of Armenian venue culture and preferences
40
+ - **Bilingual Categories**: Recognize venue types in both languages (փաբ, ռեստորան, բար, etc.)
41
+
42
+ ## 🚀 How to Use
43
+
44
+ ### Venue Recommendations
45
+ ```
46
+ "Find me a good pub on Pushkin Street"
47
+ "Restaurants near Opera House with rating above 4"
48
+ "բարեր Մաշտոցի պողոտայում" (bars on Mashtots Avenue)
49
+ "փաբեր Նալբանդյան փողոցում" (pubs on Nalbandyan Street)
50
+ ```
51
+
52
+ ### Casual Conversation
53
+ ```
54
+ "Hello! How are you?"
55
+ "What can you help me with?"
56
+ "բարև ձեզ, ինչպես եք?" (Hello, how are you?)
57
+ "Thanks for your help!"
58
+ ```
59
+
60
+ ### Location-Based Queries
61
+ ```
62
+ "Any good restaurants near Cascade?"
63
+ "Bars on Saryan Street"
64
+ "սրճարաններ Հանրապետության հրապարակի մոտ" (cafes near Republic Square)
65
+ ```
66
+
67
+ ## 🎯 Advanced Features
68
+
69
+ ### Smart Filtering Options
70
+ - **Minimum Rating**: 0-5 stars (default: 3.0)
71
+ - **Price Range**: Budget, Mid-range, Expensive, or All
72
+ - **Maximum Distance**: 0.5-20 km from specified location
73
+
74
+ ### Conversation History
75
+ - Maintains context across multiple interactions
76
+ - Remembers previous questions and preferences
77
+ - Provides personalized follow-up recommendations
78
+
79
+ ### Multilingual Query Processing
80
+ - Automatically detects input language
81
+ - Provides responses in the same language as the query
82
+ - Supports mixed-language conversations
83
+
84
+ ---
85
+
86
+ *Built with ❤️ for the Yerevan community. Combining traditional Armenian hospitality with modern AI technology.*
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Yerevan Venue AI Assistant - Hugging Face Spaces Deployment
4
+ Main application entry point
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import logging
10
+ from venue_ai_complete import CompleteYerevanVenueAI, create_gradio_interface, initialize_ai
11
+
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def main():
17
+ """Main application entry point for Hugging Face Spaces"""
18
+
19
+ print("🚀 Starting Yerevan Venue AI Assistant...")
20
+ print("📍 Bilingual support: Armenian & English")
21
+ print("🗺️ Street-aware location recognition")
22
+ print("⭐ 5-star review integration")
23
+ print("🔧 Advanced filtering options")
24
+ print("🎯 Smart venue recommendations")
25
+ print("💬 Conversational AI capabilities")
26
+ print("-" * 50)
27
+
28
+ try:
29
+ # Initialize the AI system
30
+ logger.info("Initializing AI system...")
31
+ initialize_ai()
32
+ logger.info("AI system initialized successfully!")
33
+
34
+ # Create and launch Gradio interface
35
+ logger.info("Creating Gradio interface...")
36
+ interface = create_gradio_interface()
37
+
38
+ # Launch with Hugging Face Spaces configuration
39
+ logger.info("Launching application...")
40
+ interface.launch(
41
+ server_name="0.0.0.0",
42
+ server_port=7860,
43
+ share=False, # HF Spaces handles sharing
44
+ show_error=True,
45
+ quiet=False
46
+ )
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to start application: {e}")
50
+ print(f"❌ Error: {e}")
51
+ sys.exit(1)
52
+
53
+ if __name__ == "__main__":
54
+ main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ pandas>=2.0.0
3
+ numpy>=1.24.0
4
+ geopy>=2.3.0
5
+ scikit-learn>=1.3.0
6
+ regex>=2023.6.3
7
+ --index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
8
+ llama-cpp-python==0.2.90
9
+ huggingface_hub>=0.20.0
venue_ai_complete.py ADDED
@@ -0,0 +1,1253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gzip
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+ from typing import List, Dict, Optional, Tuple
6
+ import logging
7
+ from datetime import datetime
8
+ import re
9
+ import gradio as gr
10
+ import random
11
+ from geopy.distance import geodesic
12
+
13
+ # Add conversational LLM support
14
+ try:
15
+ from llama_cpp import Llama
16
+ LLAMA_CPP_AVAILABLE = True
17
+ except ImportError:
18
+ LLAMA_CPP_AVAILABLE = False
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class CompleteYerevanVenueAI:
23
+ """
24
+ Complete Bilingual (Armenian/English) AI Assistant for Yerevan Venue Recommendations
25
+ With enhanced templates, location parsing, filtering, distance calculation and metadata usage
26
+ """
27
+
28
+ def __init__(self, venues_json_path: str, venues_csv_path: str):
29
+ self.venues_json_path = venues_json_path
30
+ self.venues_csv_path = venues_csv_path
31
+
32
+ # Core data
33
+ self.venues_data = []
34
+ self.venues_structured = None
35
+ self.five_star_reviews = {}
36
+
37
+ # Conversational LLM
38
+ self.conversational_llm = None
39
+ self.conversation_history = []
40
+ self.max_conversation_history = 10
41
+
42
+ # Enhanced location and category knowledge
43
+ self.yerevan_streets = self._initialize_enhanced_street_knowledge()
44
+ self.venue_categories = self._initialize_category_knowledge()
45
+ self.conversation_templates = self._initialize_enhanced_conversation_templates()
46
+
47
+ # Street coordinates for distance calculation
48
+ self.street_coordinates = self._initialize_street_coordinates()
49
+
50
+ logger.info("Initialized Complete YerevanVenueAI with distance calculation and conversational capabilities")
51
+
52
+ def _initialize_street_coordinates(self) -> Dict[str, Tuple[float, float]]:
53
+ """Initialize street coordinates for distance calculation"""
54
+ return {
55
+ # Major streets with approximate center coordinates (lat, lng)
56
+ "Mashtots Avenue": (40.1845, 44.5117),
57
+ "Abovyan Street": (40.1776, 44.5146),
58
+ "Saryan Street": (40.1851, 44.5086),
59
+ "Tumanyan Street": (40.1822, 44.5149),
60
+ "Amiryan Street": (40.1798, 44.5139),
61
+ "Pushkin Street": (40.1774, 44.5154),
62
+ "Khorenatsi Street": (40.1751, 44.5181),
63
+ "Teryan Street": (40.1828, 44.5163),
64
+ "Paronyan Street": (40.1812, 44.5134),
65
+ "Northern Avenue": (40.1792, 44.5146),
66
+ "Sayat Nova Avenue": (40.1834, 44.5098),
67
+ "Baghramyan Avenue": (40.1951, 44.5089),
68
+ "Vazgen Sargsyan Street": (40.1823, 44.5201),
69
+ "Tigran Mets Avenue": (40.1743, 44.5289),
70
+ "Nalbandyan Street": (40.1800, 44.5182),
71
+ # Districts (approximate centers)
72
+ "Kentron": (40.1792, 44.5146),
73
+ "Arabkir": (40.2089, 44.4856),
74
+ "Avan": (40.2156, 44.5489),
75
+ "Davtashen": (40.2267, 44.4567),
76
+ "Erebuni": (40.1345, 44.5234),
77
+ # Landmarks
78
+ "Republic Square": (40.1761, 44.5126),
79
+ "Opera House": (40.1836, 44.5098),
80
+ "Cascade": (40.1876, 44.5086),
81
+ "Swan Lake": (40.1837, 44.5135),
82
+ "Blue Mosque": (40.1733, 44.5151)
83
+ }
84
+
85
+ def _initialize_enhanced_street_knowledge(self) -> Dict[str, Dict]:
86
+ """Enhanced Yerevan geography knowledge with Armenian names"""
87
+ return {
88
+ "streets": {
89
+ "Mashtots Avenue": ["մաշտոցի", "մաշտոց", "mashtots", "mesrop mashtots"],
90
+ "Abovyan Street": ["աբովյանի", "աբովյան", "abovyan"],
91
+ "Saryan Street": ["սարյանի", "սարյան", "saryan", "martiros saryan"],
92
+ "Tumanyan Street": ["թումանյանի", "թումանյան", "tumanyan", "hovhannes tumanyan"],
93
+ "Amiryan Street": ["ամիրյանի", "ամիրյան", "amiryan"],
94
+ "Pushkin Street": ["պուշկինի", "պուշկին", "pushkin"],
95
+ "Khorenatsi Street": ["խորենացի", "խորենաց", "khorenatsi"],
96
+ "Teryan Street": ["տերյանի", "տերյան", "teryan"],
97
+ "Paronyan Street": ["պարոնյանի", "պարոնյան", "paronyan"],
98
+ "Northern Avenue": ["հյուսիսային", "northern", "northern avenue"],
99
+ "Sayat Nova Avenue": ["սայաթ նովա", "sayat nova"],
100
+ "Baghramyan Avenue": ["բաղրամյանի", "բաղրամյան", "baghramyan"],
101
+ "Vazgen Sargsyan Street": ["վազգեն սարգսյանի", "vazgen sargsyan"],
102
+ "Tigran Mets Avenue": ["տիգրան մեծի", "tigran mets"],
103
+ "Nalbandyan Street": ["նալբանդյանի", "նալբանդյան", "nalbandyan"]
104
+ },
105
+ "districts": {
106
+ "Kentron": ["կենտրոն", "կենտրում", "center", "downtown", "central"],
107
+ "Arabkir": ["արաբկիր", "arabkir"],
108
+ "Avan": ["ավան", "avan"],
109
+ "Davtashen": ["դավթաշեն", "davtashen"],
110
+ "Erebuni": ["էրեբունի", "erebuni"],
111
+ "Kanaker-Zeytun": ["կանակեր", "զեյթուն", "kanaker", "zeytun"],
112
+ "Malatia-Sebastia": ["մալաթիա", "սեբաստիա", "malatia", "sebastia"],
113
+ "Nor Nork": ["նոր նորք", "nor nork"],
114
+ "Shengavit": ["շենգավիթ", "shengavit"],
115
+ "Ajapnyak": ["աջափնյակ", "ajapnyak"]
116
+ },
117
+ "landmarks": {
118
+ "Republic Square": ["հանրապետության հրապարակ", "հանրապետության", "republic square", "republic"],
119
+ "Opera House": ["օպերա", "օպերայի տուն", "opera", "opera house"],
120
+ "Cascade": ["կասկադ", "cascade"],
121
+ "Northern Avenue": ["հյուսիսային պողոտա", "northern avenue"],
122
+ "Swan Lake": ["կարապի լիճ", "swan lake"],
123
+ "Vernissage Market": ["վերնիսաժ", "vernissage"],
124
+ "Blue Mosque": ["կապույտ մզկիթ", "blue mosque"],
125
+ "Mother Armenia": ["մայր հայաստան", "mother armenia"],
126
+ "Matenadaran": ["մատենադարան", "matenadaran"],
127
+ "Cascade Complex": ["կասկադային համալիր", "cascade complex"]
128
+ }
129
+ }
130
+
131
+ def _initialize_category_knowledge(self) -> Dict[str, Dict]:
132
+ """Enhanced category knowledge with Armenian terms and JSON metadata"""
133
+ return {
134
+ "nightlife": {
135
+ "types": ["pub", "bar", "club", "hookah", "night_club"],
136
+ "json_types": ["bar", "night_club"],
137
+ "armenian_terms": ["բար", "պաբ", "փաբ", "փաբեր", "ակումբ", "հուկա", "գիշերային", "ժամանց"],
138
+ "keywords": ["drink", "beer", "cocktail", "party", "night", "dance", "draft", "tap", "alcohol", "whiskey", "vodka", "pub", "bar", "nightclub"],
139
+ "armenian_keywords": ["խմիչք", "գարեջուր", "կոկտեյլ", "պարտի", "գիշեր", "պար", "ալկոհոլ"],
140
+ "metadata_fields": ["serves_beer", "serves_spirits", "serves_cocktails", "serves_wine", "has_bar", "has_happy_hour", "good_for_dancing", "serves_happy_hour_drinks", "serves_late_night_food"]
141
+ },
142
+ "dining": {
143
+ "types": ["restaurant", "cafe", "fast_food", "bakery"],
144
+ "json_types": ["restaurant", "cafe"],
145
+ "armenian_terms": ["ռեստորան", "սրճարան", "արագ սնունդ", "հացագործություն"],
146
+ "keywords": ["food", "eat", "meal", "coffee", "breakfast", "lunch", "dinner", "cuisine", "dining", "restaurant", "cafe"],
147
+ "armenian_keywords": ["ուտելիք", "ուտել", "ճաշ", "սուրճ", "նախաճաշ", "ճաշ", "ընթրիք"],
148
+ "metadata_fields": ["serves_breakfast", "serves_brunch", "serves_lunch", "serves_dinner", "serves_coffee", "serves_dessert", "serves_vegetarian_food", "menu_for_children", "good_for_children", "good_for_groups"]
149
+ },
150
+ "culture": {
151
+ "types": ["cultural", "gallery", "theatre", "museum"],
152
+ "json_types": [],
153
+ "armenian_terms": ["մշակութային", "պատկերասրահ", "թատրոն", "թանգարան"],
154
+ "keywords": ["art", "culture", "museum", "gallery", "theater", "exhibition"],
155
+ "armenian_keywords": ["արվեստ", "մշակույթ", "թանգարան", "ցուցահանդես"],
156
+ "metadata_fields": []
157
+ },
158
+ "entertainment": {
159
+ "types": ["karaoke", "gaming", "music", "cinema"],
160
+ "json_types": [],
161
+ "armenian_terms": ["կարաոկե", "խաղ", "երաժշտություն", "կինո"],
162
+ "keywords": ["music", "karaoke", "game", "entertainment", "fun", "live music"],
163
+ "armenian_keywords": ["երաժշտություն", "կարաոկե", "խաղ", "ժամանց", "զվարճանք"],
164
+ "metadata_fields": ["live_music", "good_for_watching_sports", "good_for_business_meetings", "good_for_date_night"]
165
+ }
166
+ }
167
+
168
+ def _initialize_enhanced_conversation_templates(self) -> Dict[str, Dict]:
169
+ """Enhanced conversation templates for various scenarios"""
170
+ return {
171
+ "armenian": {
172
+ "greetings": [
173
+ "Բարև ձեզ! Ես ձեր անձնական ուղեցույցն եմ Երևանի լավագույն վայրերի համար:",
174
+ "Ողջույն! Ուրախ եմ օգնել ձեզ հայտնաբերել Երևանի հիանալի վայրերը:",
175
+ "Բարի գալուստ! Ես կօգնեմ ��եզ գտնել կատարյալ վայր Երևանում:"
176
+ ],
177
+ "recommendation_intros": [
178
+ "Ձեր հարցման համար ես գտա այս հիանալի վայրերը:",
179
+ "Ահա ինչ կարող եմ առաջարկել ձեզ:",
180
+ "Այս վայրերը կարող են ձեզ հետաքրքրել:"
181
+ ],
182
+ "location_contexts": {
183
+ "street": "Դուք փնտրում եք {location} փողոցում:",
184
+ "district": "Դուք փնտրում եք {location} թաղամասում:",
185
+ "landmark": "Դուք փնտրում եք {location} մոտակայքում:"
186
+ },
187
+ "category_matches": {
188
+ "nightlife": "Այս վայրերը հիանալի են գիշերային ժամանցի համար:",
189
+ "dining": "Այս ճաշարանները կամ սրճարանները ձեզ կհավանեն:",
190
+ "culture": "Այս մշակութային վայրերը հետաքրքիր են:",
191
+ "entertainment": "Այս ժամանցային վայրերը զվարճալի են:"
192
+ },
193
+ "endings": [
194
+ "Հուսով եմ, որ կգտնեք կատարյալ տարբերակ!",
195
+ "Բարի ժամանց!",
196
+ "Եթե հարցեր ունեք, ես այստեղ եմ:"
197
+ ]
198
+ },
199
+ "english": {
200
+ "greetings": [
201
+ "Hello! I'm your personal guide to the best places in Yerevan:",
202
+ "Welcome! I'm excited to help you discover amazing venues in Yerevan:",
203
+ "Hi there! Let me help you find the perfect spot in Yerevan:"
204
+ ],
205
+ "recommendation_intros": [
206
+ "For your query, I found these fantastic venues:",
207
+ "Here's what I can recommend for you:",
208
+ "These places might interest you:"
209
+ ],
210
+ "location_contexts": {
211
+ "street": "You're looking on {location}:",
212
+ "district": "You're exploring the {location} district:",
213
+ "landmark": "You're searching near {location}:"
214
+ },
215
+ "category_matches": {
216
+ "nightlife": "These venues are perfect for nightlife:",
217
+ "dining": "These restaurants and cafes will delight you:",
218
+ "culture": "These cultural venues are fascinating:",
219
+ "entertainment": "These entertainment spots are fun:"
220
+ },
221
+ "endings": [
222
+ "I hope you find the perfect match!",
223
+ "Enjoy your visit!",
224
+ "Feel free to ask if you need more recommendations!"
225
+ ]
226
+ }
227
+ }
228
+
229
+ def initialize(self):
230
+ """Initialize the complete venue AI system"""
231
+ logger.info("Loading venue data...")
232
+ self._load_venue_data()
233
+
234
+ logger.info("Processing 5-star reviews...")
235
+ self._process_five_star_reviews()
236
+
237
+ logger.info("Initializing conversational LLM...")
238
+ self._initialize_conversational_llm()
239
+
240
+ logger.info("Complete YerevanVenueAI initialization finished!")
241
+
242
+ def _load_venue_data(self):
243
+ """Load venue data from JSON and CSV files"""
244
+ with open(self.venues_json_path, 'r', encoding='utf-8') as f:
245
+ self.venues_data = json.load(f)
246
+
247
+ self.venues_structured = pd.read_csv(self.venues_csv_path)
248
+
249
+ logger.info(f"Loaded {len(self.venues_data)} venues from JSON")
250
+ logger.info(f"Loaded {len(self.venues_structured)} venues from CSV")
251
+
252
+ def _process_five_star_reviews(self):
253
+ """Extract and process 5-star reviews for each venue"""
254
+ for venue in self.venues_data:
255
+ venue_name = venue.get('name', '')
256
+ reviews = venue.get('reviews', [])
257
+
258
+ # Filter 5-star reviews
259
+ five_star = [review for review in reviews if review.get('rating') == 5]
260
+
261
+ if five_star:
262
+ # Separate reviews by language
263
+ english_reviews = []
264
+ armenian_reviews = []
265
+
266
+ for review in five_star:
267
+ text = review.get('text', '').strip()
268
+ language = review.get('language', 'en')
269
+ original_language = review.get('original_language', 'en')
270
+
271
+ if text and len(text) > 20: # Only meaningful reviews
272
+ if language == 'hy' or original_language == 'hy':
273
+ armenian_reviews.append(text)
274
+ else:
275
+ english_reviews.append(text)
276
+
277
+ # Store both language versions
278
+ if english_reviews or armenian_reviews:
279
+ self.five_star_reviews[venue_name] = {
280
+ 'english': english_reviews[:3], # Top 3 English reviews
281
+ 'armenian': armenian_reviews[:3] # Top 3 Armenian reviews
282
+ }
283
+
284
+ logger.info(f"Processed 5-star reviews for {len(self.five_star_reviews)} venues")
285
+
286
+ def _get_reviews_by_language(self, venue_name: str, language: str) -> List[str]:
287
+ """Get reviews in the specified language"""
288
+ if venue_name not in self.five_star_reviews:
289
+ return []
290
+
291
+ reviews_data = self.five_star_reviews[venue_name]
292
+
293
+ if language == "armenian" and reviews_data.get('armenian'):
294
+ return reviews_data['armenian']
295
+ elif reviews_data.get('english'):
296
+ return reviews_data['english']
297
+ else:
298
+ # Fallback to any available reviews
299
+ return reviews_data.get('armenian', []) + reviews_data.get('english', [])
300
+
301
+ def _detect_language(self, text: str) -> str:
302
+ """Enhanced language detection"""
303
+ armenian_chars = re.findall(r'[Ա-Ֆա-ֆ]', text)
304
+ armenian_ratio = len(armenian_chars) / len(text) if text else 0
305
+
306
+ armenian_keywords = ['բար', 'ռեստորան', 'սրճարան', 'ակումբ', 'հուկա', 'ուզում', 'գտնել', 'որտեղ', 'կարող', 'լավ', 'հետաքրքիր']
307
+ armenian_keyword_count = sum(1 for keyword in armenian_keywords if keyword in text.lower())
308
+
309
+ if armenian_ratio > 0.15 or armenian_keyword_count > 0:
310
+ return "armenian"
311
+ return "english"
312
+
313
+ def _extract_enhanced_location_context(self, query: str) -> Dict[str, List[str]]:
314
+ """Enhanced location extraction with comprehensive Armenian support"""
315
+ query_lower = query.lower()
316
+ context = {
317
+ "streets": [],
318
+ "districts": [],
319
+ "landmarks": []
320
+ }
321
+
322
+ # Enhanced street detection
323
+ for street_eng, variations in self.yerevan_streets["streets"].items():
324
+ for variation in variations:
325
+ if variation.lower() in query_lower:
326
+ context["streets"].append(street_eng)
327
+ break
328
+
329
+ # Enhanced district detection
330
+ for district_eng, variations in self.yerevan_streets["districts"].items():
331
+ for variation in variations:
332
+ if variation.lower() in query_lower:
333
+ context["districts"].append(district_eng)
334
+ break
335
+
336
+ # Enhanced landmark detection
337
+ for landmark_eng, variations in self.yerevan_streets["landmarks"].items():
338
+ for variation in variations:
339
+ if variation.lower() in query_lower:
340
+ context["landmarks"].append(landmark_eng)
341
+ break
342
+
343
+ return context
344
+
345
+ def _get_user_location_from_query(self, query: str) -> Optional[Tuple[float, float]]:
346
+ """Extract user location coordinates from street/landmark names in query"""
347
+ location_context = self._extract_enhanced_location_context(query)
348
+
349
+ # Check streets first
350
+ for street in location_context["streets"]:
351
+ if street in self.street_coordinates:
352
+ return self.street_coordinates[street]
353
+
354
+ # Check districts
355
+ for district in location_context["districts"]:
356
+ if district in self.street_coordinates:
357
+ return self.street_coordinates[district]
358
+
359
+ # Check landmarks
360
+ for landmark in location_context["landmarks"]:
361
+ if landmark in self.street_coordinates:
362
+ return self.street_coordinates[landmark]
363
+
364
+ return None
365
+
366
+ def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
367
+ """Calculate distance between user location and venue"""
368
+ try:
369
+ venue_lat = venue.get('latitude')
370
+ venue_lng = venue.get('longitude')
371
+
372
+ if venue_lat is not None and venue_lng is not None:
373
+ distance = geodesic(user_location, (venue_lat, venue_lng)).kilometers
374
+ return distance
375
+ except Exception as e:
376
+ logger.debug(f"Distance calculation error: {e}")
377
+
378
+ return None
379
+
380
+ def _smart_venue_search(self, query: str, top_k: int = 20) -> List[Dict]:
381
+ """Improved search using venue summaries, metadata, and category matching"""
382
+ query_lower = query.lower()
383
+ results = []
384
+
385
+ query_words = set(query_lower.split())
386
+
387
+ # Detect category from query
388
+ language = self._detect_language(query)
389
+ detected_category = self._detect_category(query, language)
390
+
391
+ # Detect location context for exact street matching
392
+ location_context = self._extract_enhanced_location_context(query)
393
+
394
+ for venue in self.venues_data:
395
+ score = 0
396
+ venue_name = venue.get('name', '')
397
+ venue_address = venue.get('address', '').lower()
398
+
399
+ # Get structured venue info
400
+ structured_info = self.venues_structured[
401
+ self.venues_structured['venue_name'] == venue_name
402
+ ]
403
+
404
+ if structured_info.empty:
405
+ continue
406
+
407
+ venue_category = structured_info.iloc[0]['category']
408
+ venue_summary = str(structured_info.iloc[0]['venue_summary']).lower()
409
+
410
+ # JSON metadata scoring
411
+ venue_types = venue.get('types', [])
412
+
413
+ # PRIORITY: Exact street/location matching (very high score)
414
+ exact_location_match = False
415
+ if location_context["streets"]:
416
+ for street in location_context["streets"]:
417
+ street_variations = self.yerevan_streets["streets"][street]
418
+ for variation in street_variations:
419
+ if variation.lower() in venue_address:
420
+ score += 100 # Very high score for exact street match
421
+ exact_location_match = True
422
+ break
423
+ if exact_location_match:
424
+ break
425
+
426
+ if location_context["districts"]:
427
+ for district in location_context["districts"]:
428
+ district_variations = self.yerevan_streets["districts"][district]
429
+ for variation in district_variations:
430
+ if variation.lower() in venue_address:
431
+ score += 80 # High score for district match
432
+ exact_location_match = True
433
+ break
434
+ if exact_location_match:
435
+ break
436
+
437
+ if location_context["landmarks"]:
438
+ for landmark in location_context["landmarks"]:
439
+ landmark_variations = self.yerevan_streets["landmarks"][landmark]
440
+ for variation in landmark_variations:
441
+ if variation.lower() in venue_address:
442
+ score += 90 # Very high score for landmark match
443
+ exact_location_match = True
444
+ break
445
+ if exact_location_match:
446
+ break
447
+
448
+ # Category matching (high priority)
449
+ if detected_category:
450
+ category_info = self.venue_categories[detected_category]
451
+
452
+ # Check CSV category
453
+ if venue_category in category_info["types"]:
454
+ score += 15 # High score for category match
455
+
456
+ # Check JSON types
457
+ for json_type in category_info["json_types"]:
458
+ if json_type in venue_types:
459
+ score += 20 # Even higher for JSON type match
460
+
461
+ # Check metadata fields for specific features
462
+ for metadata_field in category_info["metadata_fields"]:
463
+ if venue.get(metadata_field) is True:
464
+ score += 10 # Good score for feature match
465
+
466
+ # Extra points for specific matches
467
+ for keyword in category_info["keywords"]:
468
+ if keyword in venue_summary or keyword in venue_name.lower():
469
+ score += 5
470
+
471
+ # Enhanced keyword matching with metadata
472
+ special_keywords = {
473
+ 'draft': {
474
+ 'keywords': ['draft', 'tap', 'beer'],
475
+ 'metadata': ['serves_beer'],
476
+ 'bonus': 25
477
+ },
478
+ 'craft': {
479
+ 'keywords': ['craft', 'artisan', 'microbrewery'],
480
+ 'metadata': ['serves_beer'],
481
+ 'bonus': 20
482
+ },
483
+ 'beer': {
484
+ 'keywords': ['beer', 'brewery', 'ale', 'lager'],
485
+ 'metadata': ['serves_beer'],
486
+ 'bonus': 15
487
+ },
488
+ 'cocktail': {
489
+ 'keywords': ['cocktail', 'mixology', 'bartender'],
490
+ 'metadata': ['serves_cocktails'],
491
+ 'bonus': 15
492
+ },
493
+ 'wine': {
494
+ 'keywords': ['wine', 'vino', 'winery'],
495
+ 'metadata': ['serves_wine'],
496
+ 'bonus': 15
497
+ },
498
+ 'coffee': {
499
+ 'keywords': ['coffee', 'espresso', 'cappuccino', 'latte'],
500
+ 'metadata': ['serves_coffee'],
501
+ 'bonus': 15
502
+ },
503
+ 'breakfast': {
504
+ 'keywords': ['breakfast', 'brunch', 'morning'],
505
+ 'metadata': ['serves_breakfast', 'serves_brunch'],
506
+ 'bonus': 15
507
+ },
508
+ 'live music': {
509
+ 'keywords': ['live music', 'jazz', 'band', 'concert'],
510
+ 'metadata': ['live_music'],
511
+ 'bonus': 20
512
+ },
513
+ 'romantic': {
514
+ 'keywords': ['romantic', 'date', 'intimate', 'cozy'],
515
+ 'metadata': ['romantic', 'good_for_date_night'],
516
+ 'bonus': 15
517
+ },
518
+ 'pub': {
519
+ 'keywords': ['pub', 'tavern'],
520
+ 'metadata': ['serves_beer', 'has_bar'],
521
+ 'bonus': 20
522
+ },
523
+ 'bar': {
524
+ 'keywords': ['bar', 'lounge'],
525
+ 'metadata': ['has_bar', 'serves_spirits'],
526
+ 'bonus': 20
527
+ },
528
+ 'restaurant': {
529
+ 'keywords': ['restaurant', 'dining', 'cuisine'],
530
+ 'metadata': ['serves_lunch', 'serves_dinner'],
531
+ 'bonus': 15
532
+ }
533
+ }
534
+
535
+ # Apply special keyword scoring
536
+ for special_key, special_info in special_keywords.items():
537
+ if any(word in query_lower for word in special_info['keywords']):
538
+ # Check keywords in venue text
539
+ for keyword in special_info['keywords']:
540
+ if keyword in venue_summary or keyword in venue_name.lower():
541
+ score += special_info['bonus']
542
+
543
+ # Check metadata
544
+ for metadata_field in special_info['metadata']:
545
+ if venue.get(metadata_field) is True:
546
+ score += special_info['bonus']
547
+
548
+ # Venue name matching
549
+ venue_name_lower = venue_name.lower()
550
+ for word in query_words:
551
+ if word in venue_name_lower:
552
+ score += 8
553
+
554
+ # Summary matching (use the rich summary data)
555
+ for word in query_words:
556
+ if word in venue_summary:
557
+ score += 3
558
+
559
+ # Address matching
560
+ if venue.get('address'):
561
+ address_lower = venue['address'].lower()
562
+ for word in query_words:
563
+ if word in address_lower:
564
+ score += 2
565
+
566
+ # 5-star review matching
567
+ if venue_name in self.five_star_reviews:
568
+ reviews = self._get_reviews_by_language(venue_name, "english")
569
+ if reviews:
570
+ review_text = " ".join(reviews).lower()
571
+ for word in query_words:
572
+ if word in review_text:
573
+ score += 4
574
+
575
+ # JSON types matching
576
+ for venue_type in venue_types:
577
+ if venue_type in query_lower:
578
+ score += 12
579
+
580
+ if score > 0:
581
+ venue_copy = venue.copy()
582
+ venue_copy['similarity_score'] = score
583
+ venue_copy['category'] = venue_category
584
+ venue_copy['summary'] = structured_info.iloc[0]['venue_summary']
585
+ venue_copy['exact_location_match'] = exact_location_match
586
+ results.append(venue_copy)
587
+
588
+ # Sort by exact location match first, then by score
589
+ results.sort(key=lambda x: (x.get('exact_location_match', False), x['similarity_score']), reverse=True)
590
+ return results[:top_k]
591
+
592
+ def _filter_venues(self, venues: List[Dict], min_rating: float, price_range: str,
593
+ max_distance: float, location_context: Dict) -> List[Dict]:
594
+ """Filter venues based on criteria with distance calculation"""
595
+
596
+ filtered = []
597
+
598
+ # Get user location if specified in query
599
+ user_location = self._get_user_location_from_query_context(location_context)
600
+
601
+ for venue in venues:
602
+ # Rating filter
603
+ rating = venue.get('rating')
604
+ if rating is None:
605
+ rating = 0.0
606
+ try:
607
+ rating = float(rating)
608
+ except (ValueError, TypeError):
609
+ rating = 0.0
610
+
611
+ if rating < min_rating:
612
+ continue
613
+
614
+ # Price range filter
615
+ venue_price = str(venue.get('price_level', 'all')).lower()
616
+ if price_range != 'all' and venue_price != 'all' and venue_price != price_range:
617
+ continue
618
+
619
+ # Distance filter
620
+ if user_location:
621
+ venue_location = self._get_venue_coordinates(venue)
622
+ if venue_location:
623
+ distance = self._calculate_distance(user_location, venue)
624
+ if distance is not None and distance <= max_distance:
625
+ venue['calculated_distance'] = distance
626
+ filtered.append(venue)
627
+ else:
628
+ # If venue has no coordinates but has exact location match (street-based search),
629
+ # include it anyway since it was found via street matching
630
+ if venue.get('exact_location_match', False):
631
+ venue['calculated_distance'] = None # Mark as no distance data
632
+ filtered.append(venue)
633
+ # Otherwise exclude venues without coordinates when location is specified
634
+ else:
635
+ # If no location in query, add all venues that pass other filters
636
+ filtered.append(venue)
637
+
638
+ return filtered
639
+
640
+ def _get_user_location_from_query_context(self, location_context: Dict) -> Optional[Tuple[float, float]]:
641
+ """Get user location from extracted query context"""
642
+
643
+ # Prioritize streets, then landmarks, then districts
644
+ for loc_type in ["streets", "landmarks", "districts"]:
645
+ if location_context.get(loc_type):
646
+ # Use the first identified location of the highest priority type
647
+ location_name = location_context[loc_type][0]
648
+ return self.street_coordinates.get(location_name)
649
+
650
+ return None
651
+
652
+ def _get_venue_coordinates(self, venue: Dict) -> Optional[Tuple[float, float]]:
653
+ """Get coordinates for a venue"""
654
+ lat = venue.get('latitude')
655
+ lng = venue.get('longitude')
656
+ if lat is not None and lng is not None:
657
+ try:
658
+ return (float(lat), float(lng))
659
+ except (ValueError, TypeError):
660
+ return None
661
+ return None
662
+
663
+ def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
664
+ """Calculate distance in km between user and venue"""
665
+ venue_location = self._get_venue_coordinates(venue)
666
+ if user_location and venue_location:
667
+ return geodesic(user_location, venue_location).kilometers
668
+ return None
669
+
670
+ def _create_enhanced_response(self, venues: List[Dict], language: str, user_query: str, location_context: Dict) -> str:
671
+ """Create an enhanced, user-friendly response with location and category context"""
672
+
673
+ if not venues:
674
+ if language == 'armenian':
675
+ return "Ցավոք, ձեր հարցմանը համապատասխանող վենու չի գտնվել: Փորձեք փոխել որոնման պարամետրերը:"
676
+ return "Sorry, no venues found matching your criteria. Try adjusting your search parameters."
677
+
678
+ response_parts = []
679
+
680
+ # Get intro based on language
681
+ intro = self.conversation_templates[language]["recommendation_intros"]
682
+ response_parts.append(random.choice(intro))
683
+
684
+ # Add location context
685
+ if location_context["streets"]:
686
+ loc_str = self.conversation_templates[language]["location_contexts"]["street"].format(location=location_context["streets"][0])
687
+ response_parts.append(f"\n📍 {loc_str}")
688
+ elif location_context["landmarks"]:
689
+ loc_str = self.conversation_templates[language]["location_contexts"]["landmark"].format(location=location_context["landmarks"][0])
690
+ response_parts.append(f"\n📍 {loc_str}")
691
+ elif location_context["districts"]:
692
+ loc_str = self.conversation_templates[language]["location_contexts"]["district"].format(location=location_context["districts"][0])
693
+ response_parts.append(f"\n📍 {loc_str}")
694
+
695
+ # Add category context
696
+ detected_category = self._detect_category(user_query, language)
697
+ if detected_category:
698
+ category_str = self.conversation_templates[language]["category_matches"].get(detected_category)
699
+ if category_str:
700
+ response_parts.append(f"🏷️ {category_str}")
701
+
702
+ for i, venue in enumerate(venues[:5]):
703
+ response_parts.append(f"\n{i+1}. {self._format_enhanced_venue_info(venue, language)}")
704
+
705
+ # Add ending
706
+ response_parts.append("\n" + random.choice(self.conversation_templates[language]["endings"]))
707
+
708
+ return "\n".join(response_parts)
709
+
710
+ def _detect_category(self, query: str, language: str) -> Optional[str]:
711
+ """Detect venue category from query, respecting the detected language."""
712
+ query_lower = query.lower()
713
+
714
+ for category, info in self.venue_categories.items():
715
+ if language == "armenian":
716
+ search_terms = info.get("armenian_terms", []) + info.get("armenian_keywords", [])
717
+ else:
718
+ search_terms = info.get("keywords", [])
719
+
720
+ for term in search_terms:
721
+ if term.lower() in query_lower:
722
+ return category
723
+
724
+ # If no language-specific match, do a general search
725
+ for category, info in self.venue_categories.items():
726
+ all_terms = info.get("keywords", []) + info.get("armenian_terms", [])
727
+ for term in all_terms:
728
+ if term.lower() in query_lower:
729
+ return category
730
+
731
+ return None
732
+
733
+ def _format_enhanced_venue_info(self, venue: Dict, language: str = "english") -> str:
734
+ """Enhanced venue information formatting with 5-star reviews and metadata"""
735
+ if language == "armenian":
736
+ info_parts = [f"**{venue['name']}**"]
737
+ if venue.get('address'):
738
+ info_parts.append(f"📍 {venue['address']}")
739
+
740
+ # Safe rating display
741
+ rating = venue.get('rating')
742
+ if rating is not None and rating > 0:
743
+ info_parts.append(f"⭐ {rating}")
744
+
745
+ # Add distance
746
+ if venue.get('calculated_distance'):
747
+ distance = venue['calculated_distance']
748
+ info_parts.append(f"🚗 {distance:.1f} կմ")
749
+
750
+ # Add category and summary
751
+ if venue.get('category'):
752
+ category = venue['category']
753
+ category_map = {
754
+ "pub": "պաբ", "bar": "բար", "restaurant": "ռեստորան",
755
+ "cafe": "սրճարան", "club": "ակումբ", "hookah": "հուկա բար"
756
+ }
757
+ armenian_category = category_map.get(category, category)
758
+ info_parts.append(f"🏷️ {armenian_category}")
759
+
760
+ # Add metadata features
761
+ features = []
762
+ if venue.get('serves_beer'): features.append("գարեջուր")
763
+ if venue.get('serves_cocktails'): features.append("կոկտեյլ")
764
+ if venue.get('live_music'): features.append("կենդանի երաժշտություն")
765
+ if venue.get('outdoor_seating'): features.append("բացօթյա նստարան")
766
+ if features:
767
+ info_parts.append(f"✨ {', '.join(features)}")
768
+
769
+ # Add 5-star review
770
+ venue_name = venue.get('name', '')
771
+ if venue_name in self.five_star_reviews:
772
+ reviews = self._get_reviews_by_language(venue_name, language)
773
+ if reviews:
774
+ info_parts.append(f"💬 5⭐ \"{reviews[0][:150]}...\"")
775
+
776
+ else:
777
+ info_parts = [f"**{venue['name']}** - {venue.get('rating', 'N/A')}⭐"]
778
+ if venue.get('address'):
779
+ info_parts.append(f"📍 {venue['address']}")
780
+
781
+ # Add distance
782
+ if venue.get('calculated_distance'):
783
+ distance = venue['calculated_distance']
784
+ info_parts.append(f"🚗 {distance:.1f} km away")
785
+
786
+ # Add category and summary
787
+ if venue.get('category'):
788
+ info_parts.append(f"🏷️ {venue['category']}")
789
+
790
+ # Add metadata features
791
+ features = []
792
+ if venue.get('serves_beer'): features.append("serves beer")
793
+ if venue.get('serves_cocktails'): features.append("cocktails")
794
+ if venue.get('live_music'): features.append("live music")
795
+ if venue.get('outdoor_seating'): features.append("outdoor seating")
796
+ if venue.get('good_for_date_night'): features.append("romantic")
797
+ if venue.get('good_for_groups'): features.append("good for groups")
798
+ if features:
799
+ info_parts.append(f"✨ {', '.join(features)}")
800
+
801
+ # Add 5-star review
802
+ venue_name = venue.get('name', '')
803
+ if venue_name in self.five_star_reviews:
804
+ reviews = self._get_reviews_by_language(venue_name, language)
805
+ if reviews:
806
+ info_parts.append(f"💬 5⭐ \"{reviews[0][:150]}...\"")
807
+
808
+ return "\n".join(info_parts)
809
+
810
+ def get_enhanced_recommendations(self, user_query: str, min_rating: float = 3.0,
811
+ price_range: str = "all", max_distance: float = 10.0) -> Dict:
812
+ """
813
+ Enhanced recommendation system with conversational capabilities
814
+ Handles both venue queries and casual conversation
815
+ """
816
+ # Detect language
817
+ language = self._detect_language(user_query)
818
+
819
+ # Check if this is a venue-related query or casual conversation
820
+ is_venue_query = self._is_venue_related_query(user_query)
821
+ is_greeting_or_casual = self._detect_greeting_or_casual(user_query)
822
+
823
+ # Handle conversational queries
824
+ if not is_venue_query or is_greeting_or_casual:
825
+ conversational_response = self._generate_conversational_response(user_query, language)
826
+
827
+ # Add to conversation history
828
+ self._add_to_conversation_history(user_query, conversational_response)
829
+
830
+ # Return conversational response format
831
+ return {
832
+ "language": language,
833
+ "query": user_query,
834
+ "response_type": "conversational",
835
+ "conversational_response": conversational_response,
836
+ "venue_suggestions": [],
837
+ "total_found": 0,
838
+ "is_venue_query": False,
839
+ "location_context": {}
840
+ }
841
+
842
+ # Handle venue queries with the existing logic
843
+ location_context = self._extract_enhanced_location_context(user_query)
844
+
845
+ # Perform venue search
846
+ venues = self._smart_venue_search(user_query, top_k=50)
847
+
848
+ # Filter venues
849
+ filtered_venues = self._filter_venues(venues, min_rating, price_range, max_distance, location_context)
850
+
851
+ # Create response
852
+ response_text = self._create_enhanced_response(filtered_venues, language, user_query, location_context)
853
+
854
+ # Add venue recommendations to conversation history
855
+ self._add_to_conversation_history(user_query, f"Found {len(filtered_venues)} venues. {response_text[:100]}...")
856
+
857
+ return {
858
+ "language": language,
859
+ "query": user_query,
860
+ "response_type": "venue_recommendation",
861
+ "recommended_venues": filtered_venues[:10],
862
+ "response_text": response_text,
863
+ "total_found": len(filtered_venues),
864
+ "location_context": location_context,
865
+ "is_venue_query": True
866
+ }
867
+
868
+ def _initialize_conversational_llm(self):
869
+ """Initialize the conversational LLM for chat-like responses"""
870
+ if not LLAMA_CPP_AVAILABLE:
871
+ logger.warning("llama-cpp-python not available. Conversational features will be limited.")
872
+ return
873
+
874
+ try:
875
+ # Use TinyLlama for CPU deployment - much smaller and faster
876
+ try:
877
+ from huggingface_hub import hf_hub_download
878
+ logger.info("Downloading TinyLlama model from Hugging Face Hub...")
879
+
880
+ # Download smaller, CPU-optimized model
881
+ model_path = hf_hub_download(
882
+ repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
883
+ filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
884
+ cache_dir="./model_cache"
885
+ )
886
+
887
+ logger.info(f"TinyLlama model downloaded to: {model_path}")
888
+ model_paths = [model_path]
889
+
890
+ except Exception as e:
891
+ logger.error(f"Failed to download TinyLlama from HF Hub: {e}")
892
+ # Fallback - no local model available
893
+ logger.warning("No conversational model available. Using template responses.")
894
+ return
895
+
896
+ for model_path in model_paths:
897
+ try:
898
+ logger.info(f"Attempting to load conversational model: {model_path}")
899
+ self.conversational_llm = Llama(
900
+ model_path=model_path,
901
+ n_ctx=1024, # Smaller context window for CPU
902
+ n_threads=2, # Limit CPU threads
903
+ n_gpu_layers=0, # CPU only
904
+ verbose=False,
905
+ use_mmap=True, # Memory mapping for efficiency
906
+ use_mlock=False # Don't lock memory
907
+ )
908
+ logger.info(f"Successfully loaded TinyLlama model: {model_path}")
909
+ return # Exit after successful load
910
+ except Exception as e:
911
+ logger.warning(f"Failed to load model {model_path}: {e}")
912
+
913
+ logger.error("Could not load any conversational model. Using template responses.")
914
+
915
+ except Exception as e:
916
+ logger.error(f"Error initializing conversational LLM: {e}")
917
+ self.conversational_llm = None
918
+
919
+ def _add_to_conversation_history(self, user_message: str, ai_response: str):
920
+ """Add a user message and AI response to the conversation history"""
921
+ self.conversation_history.append({"user": user_message, "ai": ai_response})
922
+ # Keep history to a reasonable size
923
+ if len(self.conversation_history) > self.max_conversation_history:
924
+ self.conversation_history.pop(0)
925
+
926
+ def _get_conversation_context(self) -> str:
927
+ """Get the recent conversation history as a formatted string"""
928
+ context = ""
929
+ for turn in self.conversation_history:
930
+ context += f"User: {turn['user']}\nAI: {turn['ai']}\n"
931
+ return context
932
+
933
+ def _is_venue_related_query(self, query: str) -> bool:
934
+ """Determine if a query is related to finding venues"""
935
+ query_lower = query.lower()
936
+
937
+ # Keywords that indicate a venue search
938
+ venue_keywords = [
939
+ 'find', 'where', 'recommend', 'any', 'good', 'best', 'search',
940
+ 'restaurant', 'bar', 'pub', 'cafe', 'club', 'hookah',
941
+ 'ռեստորան', 'բար', 'պաբ', 'փաբ', 'սրճարան', 'ակումբ', 'հուկա',
942
+ 'գտնել', 'որտեղ', 'խորհուրդ', 'կա', 'լավ'
943
+ ]
944
+
945
+ # Location keywords
946
+ location_keywords = [
947
+ 'street', 'avenue', 'square', 'near', 'on', 'at',
948
+ 'փողոց', 'պողոտա', 'հրապարակ', 'մոտ'
949
+ ]
950
+
951
+ # Check for venue keywords
952
+ if any(keyword in query_lower for keyword in venue_keywords):
953
+ return True
954
+
955
+ # Check for location keywords
956
+ if any(keyword in query_lower for keyword in location_keywords):
957
+ return True
958
+
959
+ # Check against the known streets and landmarks
960
+ for street_info in self.yerevan_streets.values():
961
+ for variations in street_info.values():
962
+ if any(variation.lower() in query_lower for variation in variations):
963
+ return True
964
+
965
+ return False
966
+
967
+ def _generate_conversational_response(self, query: str, language: str) -> str:
968
+ """Generate a conversational response using the LLM or templates"""
969
+ if not self.conversational_llm:
970
+ return self._generate_template_response(query, language)
971
+
972
+ try:
973
+ context = self._get_conversation_context()
974
+
975
+ # Optimized prompt for TinyLlama
976
+ if language == 'armenian':
977
+ prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
978
+ User: {query}
979
+ Assistant:"""
980
+ else:
981
+ prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
982
+ User: {query}
983
+ Assistant:"""
984
+
985
+ response = self.conversational_llm(
986
+ prompt,
987
+ max_tokens=50, # Shorter responses for CPU efficiency
988
+ stop=["User:", "Assistant:", "\n"],
989
+ temperature=0.7,
990
+ echo=False,
991
+ )
992
+
993
+ generated_text = response['choices'][0]['text'].strip()
994
+ return generated_text if generated_text else self._generate_template_response(query, language)
995
+
996
+ except Exception as e:
997
+ logger.error(f"Error generating conversational response: {e}")
998
+ return self._generate_template_response(query, language)
999
+
1000
+ def _generate_template_response(self, query: str, language: str) -> str:
1001
+ """Generate template-based responses when LLM is not available"""
1002
+ query_lower = query.lower()
1003
+
1004
+ # Greeting responses
1005
+ if any(word in query_lower for word in ['hi', 'hello', 'hey', 'բարև', 'ողջույն']):
1006
+ if language == "armenian":
1007
+ return "Բարև ձեզ! Ես Երևանի վենուների ուղեցույցն եմ: Ինչ եք փնտրում?"
1008
+ return "Hello! I'm your Yerevan venue guide. What are you looking for?"
1009
+
1010
+ # How are you responses
1011
+ if any(phrase in query_lower for phrase in ['how are you', 'ինչպես ես', 'ոնց ես']):
1012
+ if language == "armenian":
1013
+ return "Շնորհակալություն հարցնելու համար! Ես պատրաստ եմ օգնել ձեզ գտնել լավագույն վայրերը Երևանում:"
1014
+ return "Thanks for asking! I'm ready to help you find the best venues in Yerevan!"
1015
+
1016
+ # What can you do responses
1017
+ if any(phrase in query_lower for phrase in ['what can you', 'ինչ կարող ես', 'քո մասին']):
1018
+ if language == "armenian":
1019
+ return "Ես կարող եմ օգնել ձեզ գտնել ռեստորաններ, բարեր, սրճարաններ և այլ վայրեր Երևանում: Ինչ եք փնտրում?"
1020
+ return "I can help you find restaurants, bars, cafes and other venues in Yerevan! What are you looking for?"
1021
+
1022
+ # Thanks responses
1023
+ if any(word in query_lower for word in ['thanks', 'thank you', 'շնորհակալություն']):
1024
+ if language == "armenian":
1025
+ return "Խնդրեմ! Ուրախ եմ, որ կարողացա օգնել:"
1026
+ return "You're welcome! Happy to help!"
1027
+
1028
+ # Default responses
1029
+ if language == "armenian":
1030
+ return "Ես կարող եմ օգնել ձեզ գտնել վայրեր Երևանում: Ինչ եք փնտրում?"
1031
+ return "I can help you find venues in Yerevan! What are you looking for?"
1032
+
1033
+ def _detect_greeting_or_casual(self, query: str) -> bool:
1034
+ """Detect if the query is a greeting or casual conversation"""
1035
+ casual_patterns = [
1036
+ # English
1037
+ r'\b(hi|hello|hey|good morning|good evening|how are you|what\'s up|thanks|thank you)\b',
1038
+ r'\b(who are you|what can you do|help|about you)\b',
1039
+ # Armenian
1040
+ r'\b(բարև|ողջույն|բարի լույս|բարի երեկո|ինչպես ես|ինչ կա|շնորհակալություն)\b',
1041
+ r'\b(ով ես|ինչ կարող ես|օգնություն|քո մասին)\b'
1042
+ ]
1043
+
1044
+ query_lower = query.lower()
1045
+ for pattern in casual_patterns:
1046
+ if re.search(pattern, query_lower):
1047
+ return True
1048
+ return False
1049
+
1050
+ # Global AI instance
1051
+ ai_instance = None
1052
+
1053
+ def initialize_ai():
1054
+ """Initialize the global AI instance"""
1055
+ global ai_instance
1056
+
1057
+ if ai_instance is None:
1058
+ try:
1059
+ # Initialize with the data paths
1060
+ venues_json = "yerevan_pubs_bars_20250623_193205.json"
1061
+ venues_csv = "yerevan_venues_structured.csv"
1062
+
1063
+ # Check if files exist
1064
+ import os
1065
+ if not os.path.exists(venues_json):
1066
+ raise FileNotFoundError(f"Venue JSON file not found: {venues_json}")
1067
+ if not os.path.exists(venues_csv):
1068
+ raise FileNotFoundError(f"Venue CSV file not found: {venues_csv}")
1069
+
1070
+ logger.info("Creating CompleteYerevanVenueAI instance...")
1071
+ ai_instance = CompleteYerevanVenueAI(venues_json, venues_csv)
1072
+
1073
+ logger.info("Initializing venue data...")
1074
+ ai_instance.initialize()
1075
+
1076
+ logger.info("Global AI instance initialized successfully")
1077
+
1078
+ except Exception as e:
1079
+ logger.error(f"Failed to initialize AI instance: {e}")
1080
+ ai_instance = None
1081
+ raise e
1082
+
1083
+ return ai_instance
1084
+
1085
+ def get_recommendations(query, min_rating, price_range, max_distance):
1086
+ """Gradio interface function with conversational support"""
1087
+ global ai_instance
1088
+
1089
+ if not query.strip():
1090
+ return "Please enter a question or venue request."
1091
+
1092
+ # Ensure AI instance is initialized
1093
+ if ai_instance is None:
1094
+ try:
1095
+ initialize_ai()
1096
+ except Exception as e:
1097
+ logger.error(f"Failed to initialize AI: {e}")
1098
+ return f"Sorry, I'm having trouble starting up. Error: {str(e)}"
1099
+
1100
+ # Double check AI instance exists
1101
+ if ai_instance is None:
1102
+ return "Sorry, the AI system is not available right now. Please try again later."
1103
+
1104
+ try:
1105
+ # Get recommendations (handles both conversational and venue queries)
1106
+ result = ai_instance.get_enhanced_recommendations(
1107
+ user_query=query,
1108
+ min_rating=min_rating,
1109
+ price_range=price_range,
1110
+ max_distance=max_distance
1111
+ )
1112
+
1113
+ # Handle conversational responses
1114
+ if result.get("response_type") == "conversational":
1115
+ return result["conversational_response"]
1116
+
1117
+ # Handle venue recommendations
1118
+ elif result.get("response_type") == "venue_recommendation":
1119
+ return result["response_text"]
1120
+
1121
+ # Fallback
1122
+ else:
1123
+ return "I can help you find venues in Yerevan or have a casual conversation. What would you like to know?"
1124
+
1125
+ except Exception as e:
1126
+ logger.error(f"Error in get_recommendations: {e}")
1127
+ return f"Sorry, I encountered an error: {str(e)}"
1128
+
1129
+ def create_gradio_interface():
1130
+ """Create enhanced Gradio interface with conversational capabilities"""
1131
+
1132
+ with gr.Blocks(
1133
+ title="🇦🇲 Yerevan Venue AI Assistant",
1134
+ theme=gr.themes.Soft(),
1135
+ css="""
1136
+ .gradio-container {
1137
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1138
+ }
1139
+ .gr-button-primary {
1140
+ background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
1141
+ border: none;
1142
+ }
1143
+ """
1144
+ ) as interface:
1145
+
1146
+ gr.Markdown("""
1147
+ # 🇦🇲 Yerevan Venue AI Assistant
1148
+ ### Your Conversational Guide to Yerevan's Best Venues
1149
+
1150
+ I can help you with:
1151
+ - 🍽️ **Restaurant & Bar Recommendations** - Find the perfect dining spot
1152
+ - 🗺️ **Location-Based Search** - Venues near specific streets or landmarks
1153
+ - 💬 **Casual Conversation** - Ask me anything or just say hello!
1154
+ - 🇦🇲 **Bilingual Support** - Chat in Armenian or English
1155
+
1156
+ **Examples:**
1157
+ - "Hello! How are you?"
1158
+ - "Find me a good pub on Pushkin Street"
1159
+ - "բարեր Մաշտոցի մոտ" (bars near Mashtots)
1160
+ - "What can you help me with?"
1161
+ """)
1162
+
1163
+ with gr.Row():
1164
+ with gr.Column(scale=3):
1165
+ query_input = gr.Textbox(
1166
+ label="💬 Ask me anything or request venue recommendations",
1167
+ placeholder="Try: 'Hello!' or 'Find me a restaurant near Opera House' or 'բարեր Պուշկին փողոցում'",
1168
+ lines=2
1169
+ )
1170
+
1171
+ with gr.Row():
1172
+ min_rating = gr.Slider(
1173
+ minimum=0, maximum=5, value=3.0, step=0.1,
1174
+ label="⭐ Minimum Rating (for venue searches)"
1175
+ )
1176
+ max_distance = gr.Slider(
1177
+ minimum=0.5, maximum=20, value=5.0, step=0.5,
1178
+ label="📍 Max Distance (km, for venue searches)"
1179
+ )
1180
+
1181
+ price_range = gr.Radio(
1182
+ choices=["all", "budget", "mid", "expensive"],
1183
+ value="all",
1184
+ label="💰 Price Range (for venue searches)"
1185
+ )
1186
+
1187
+ search_btn = gr.Button("🔍 Chat / Search", variant="primary", size="lg")
1188
+
1189
+ with gr.Column(scale=2):
1190
+ gr.Markdown("""
1191
+ ### 💡 Tips:
1192
+ - **Start a conversation**: "Hi", "Hello", "How are you?"
1193
+ - **Ask about me**: "What can you do?", "Who are you?"
1194
+ - **Get venue help**: "Find restaurants", "Bars near Opera"
1195
+ - **Use Armenian**: "բարև", "ռեստորան", "բար"
1196
+ - **Be specific**: Include location, cuisine type, or atmosphere
1197
+
1198
+ ### 🗺️ Known Locations:
1199
+ Pushkin Street, Mashtots Avenue, Saryan Street, Republic Square, Opera House, Cascade, Northern Avenue, Nalbandyan Street
1200
+ """)
1201
+
1202
+ output = gr.Textbox(
1203
+ label="🤖 AI Response",
1204
+ lines=15,
1205
+ max_lines=20,
1206
+ show_copy_button=True
1207
+ )
1208
+
1209
+ # Examples for quick testing
1210
+ gr.Examples(
1211
+ examples=[
1212
+ ["Hello! How are you today?"],
1213
+ ["What can you help me with?"],
1214
+ ["Find me a good pub with draft beer"],
1215
+ ["Restaurants near Opera House"],
1216
+ ["բարև ձեզ, ինչպես եք?"],
1217
+ ["բարեր Պուշկին փողոցում"],
1218
+ ["pubs on Nalbandyan street"],
1219
+ ["Thanks for your help!"]
1220
+ ],
1221
+ inputs=[query_input],
1222
+ label="💬 Try these examples:"
1223
+ )
1224
+
1225
+ search_btn.click(
1226
+ fn=get_recommendations,
1227
+ inputs=[query_input, min_rating, price_range, max_distance],
1228
+ outputs=output
1229
+ )
1230
+
1231
+ # Auto-submit on Enter
1232
+ query_input.submit(
1233
+ fn=get_recommendations,
1234
+ inputs=[query_input, min_rating, price_range, max_distance],
1235
+ outputs=output
1236
+ )
1237
+
1238
+ return interface
1239
+
1240
+ if __name__ == "__main__":
1241
+ print("Launching Yerevan Venue AI Assistant with Conversational Capabilities...")
1242
+
1243
+ # Initialize the AI system
1244
+ initialize_ai()
1245
+
1246
+ # Create and launch Gradio interface
1247
+ interface = create_gradio_interface()
1248
+ interface.launch(
1249
+ server_name="0.0.0.0",
1250
+ server_port=7861,
1251
+ share=True,
1252
+ show_error=True
1253
+ )
yerevan_pubs_bars_20250623_193205.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e726d829e432c66821d25e28edbc28da9967e0041cb3a59a5965d586f152e2e0
3
+ size 31571522
yerevan_venues_structured.csv ADDED
The diff for this file is too large to render. See raw diff