vibe_sip / venue_ai_complete.py
artush-habetyan's picture
Upload 8 files
fb3a3b8 verified
import gzip
import json
import pandas as pd
import numpy as np
from typing import List, Dict, Optional, Tuple
import logging
from datetime import datetime
import re
import gradio as gr
import random
from geopy.distance import geodesic
from collections import defaultdict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
from llama_cpp import Llama
LLAMA_CPP_AVAILABLE = True
logger.info("llama-cpp-python successfully imported")
except ImportError as e:
logger.warning(f"llama-cpp-python not available due to import error: {e}")
LLAMA_CPP_AVAILABLE = False
except Exception as e:
logger.warning(f"llama-cpp-python failed to load: {e}")
LLAMA_CPP_AVAILABLE = False
from lightweight_rag import LightweightRAGEnhancer
# Ultra-lightweight LLM for optimal HF Spaces performance
try:
from ultra_lightweight_llm import UltraLightweightLLM
ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = True
logger.info("Ultra-lightweight LLM available for optimal performance")
except ImportError as e:
logger.warning(f"Ultra-lightweight LLM not available: {e}")
ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = False
class CompleteYerevanVenueAI:
"""
Complete Bilingual (Armenian/English) AI Assistant for Yerevan Venue Recommendations
With enhanced templates, location parsing, filtering, distance calculation and metadata usage
"""
def __init__(self, venues_json_path: str, venues_csv_path: str):
self.venues_json_path = venues_json_path
self.venues_csv_path = venues_csv_path
# Core data
self.venues_data = []
self.venues_structured = None
self.five_star_reviews = {}
# Conversational LLM
self.conversational_llm = None
self.conversation_history = []
self.max_conversation_history = 10
# Enhanced location and category knowledge
self.yerevan_streets = self._initialize_enhanced_street_knowledge()
self.venue_categories = self._initialize_category_knowledge()
self.conversation_templates = self._initialize_enhanced_conversation_templates()
# Initialize lightweight RAG enhancer with comprehensive geo data
self.rag_enhancer = LightweightRAGEnhancer()
# Merge RAG geo data with existing street coordinates
self.street_coordinates = self._merge_geo_data()
logger.info("Initialized Complete YerevanVenueAI with RAG enhancement and comprehensive geo data")
def _initialize_street_coordinates(self) -> Dict[str, Tuple[float, float]]:
"""Initialize street coordinates for distance calculation"""
return {
# Major streets with approximate center coordinates (lat, lng)
"Mashtots Avenue": (40.1845, 44.5117),
"Abovyan Street": (40.1776, 44.5146),
"Saryan Street": (40.1851, 44.5086),
"Tumanyan Street": (40.1822, 44.5149),
"Amiryan Street": (40.1798, 44.5139),
"Pushkin Street": (40.1774, 44.5154),
"Khorenatsi Street": (40.1751, 44.5181),
"Teryan Street": (40.1828, 44.5163),
"Paronyan Street": (40.1812, 44.5134),
"Northern Avenue": (40.1792, 44.5146),
"Sayat Nova Avenue": (40.1834, 44.5098),
"Baghramyan Avenue": (40.1951, 44.5089),
"Vazgen Sargsyan Street": (40.1823, 44.5201),
"Tigran Mets Avenue": (40.1743, 44.5289),
"Nalbandyan Street": (40.1800, 44.5182),
# Districts (approximate centers)
"Kentron": (40.1792, 44.5146),
"Arabkir": (40.2089, 44.4856),
"Avan": (40.2156, 44.5489),
"Davtashen": (40.2267, 44.4567),
"Erebuni": (40.1345, 44.5234),
# Landmarks
"Republic Square": (40.1761, 44.5126),
"Opera House": (40.1836, 44.5098),
"Cascade": (40.1876, 44.5086),
"Swan Lake": (40.1837, 44.5135),
"Blue Mosque": (40.1733, 44.5151)
}
def _initialize_enhanced_street_knowledge(self) -> Dict[str, Dict]:
"""Enhanced Yerevan geography knowledge with Armenian names"""
return {
"streets": {
"Mashtots Avenue": ["մաշտոցի", "մաշտոց", "mashtots", "mesrop mashtots"],
"Abovyan Street": ["աբովյանի", "աբովյան", "abovyan"],
"Saryan Street": ["սարյանի", "սարյան", "saryan", "martiros saryan"],
"Tumanyan Street": ["թումանյանի", "թումանյան", "tumanyan", "hovhannes tumanyan"],
"Amiryan Street": ["ամիրյանի", "ամիրյան", "amiryan"],
"Pushkin Street": ["պուշկինի", "պուշկին", "pushkin"],
"Khorenatsi Street": ["խորենացի", "խորենաց", "khorenatsi"],
"Teryan Street": ["տերյանի", "տերյան", "teryan"],
"Paronyan Street": ["պարոնյանի", "պարոնյան", "paronyan"],
"Northern Avenue": ["հյուսիսային", "northern", "northern avenue"],
"Sayat Nova Avenue": ["սայաթ նովա", "sayat nova"],
"Baghramyan Avenue": ["բաղրամյանի", "բաղրամյան", "baghramyan"],
"Vazgen Sargsyan Street": ["վազգեն սարգսյանի", "vazgen sargsyan"],
"Tigran Mets Avenue": ["տիգրան մեծի", "tigran mets"],
"Nalbandyan Street": ["նալբանդյանի", "նալբանդյան", "nalbandyan"]
},
"districts": {
"Kentron": ["կենտրոն", "կենտրում", "center", "downtown", "central"],
"Arabkir": ["արաբկիր", "arabkir"],
"Avan": ["ավան", "avan"],
"Davtashen": ["դավթաշեն", "davtashen"],
"Erebuni": ["էրեբունի", "erebuni"],
"Kanaker-Zeytun": ["կանակեր", "զեյթուն", "kanaker", "zeytun"],
"Malatia-Sebastia": ["մալաթիա", "սեբաստիա", "malatia", "sebastia"],
"Nor Nork": ["նոր նորք", "nor nork"],
"Shengavit": ["շենգավիթ", "shengavit"],
"Ajapnyak": ["աջափնյակ", "ajapnyak"]
},
"landmarks": {
"Republic Square": ["հանրապետության հրապարակ", "հանրապետության", "republic square", "republic"],
"Opera House": ["օպերա", "օպերայի տուն", "opera", "opera house"],
"Cascade": ["կասկադ", "cascade"],
"Northern Avenue": ["հյուսիսային պողոտա", "northern avenue"],
"Swan Lake": ["կարապի լիճ", "swan lake"],
"Vernissage Market": ["վերնիսաժ", "vernissage"],
"Blue Mosque": ["կապույտ մզկիթ", "blue mosque"],
"Mother Armenia": ["մայր հայաստան", "mother armenia"],
"Matenadaran": ["մատենադարան", "matenadaran"],
"Cascade Complex": ["կասկադային համալիր", "cascade complex"]
}
}
def _initialize_category_knowledge(self) -> Dict[str, Dict]:
"""Enhanced category knowledge with Armenian terms and JSON metadata"""
return {
"nightlife": {
"types": ["pub", "bar", "club", "hookah", "night_club"],
"json_types": ["bar", "night_club"],
"armenian_terms": ["բար", "պաբ", "փաբ", "փաբեր", "ակումբ", "հուկա", "գիշերային", "ժամանց"],
"keywords": ["drink", "beer", "cocktail", "party", "night", "dance", "draft", "tap", "alcohol", "whiskey", "vodka", "pub", "bar", "nightclub"],
"armenian_keywords": ["խմիչք", "գարեջուր", "կոկտեյլ", "պարտի", "գիշեր", "պար", "ալկոհոլ"],
"metadata_fields": ["serves_beer", "serves_spirits", "serves_cocktails", "serves_wine", "has_bar", "has_happy_hour", "good_for_dancing", "serves_happy_hour_drinks", "serves_late_night_food"]
},
"dining": {
"types": ["restaurant", "cafe", "fast_food", "bakery"],
"json_types": ["restaurant", "cafe"],
"armenian_terms": ["ռեստորան", "սրճարան", "արագ սնունդ", "հացագործություն"],
"keywords": ["food", "eat", "meal", "coffee", "breakfast", "lunch", "dinner", "cuisine", "dining", "restaurant", "cafe"],
"armenian_keywords": ["ուտելիք", "ուտել", "ճաշ", "սուրճ", "նախաճաշ", "ճաշ", "ընթրիք"],
"metadata_fields": ["serves_breakfast", "serves_brunch", "serves_lunch", "serves_dinner", "serves_coffee", "serves_dessert", "serves_vegetarian_food", "menu_for_children", "good_for_children", "good_for_groups"]
},
"culture": {
"types": ["cultural", "gallery", "theatre", "museum"],
"json_types": [],
"armenian_terms": ["մշակութային", "պատկերասրահ", "թատրոն", "թանգարան"],
"keywords": ["art", "culture", "museum", "gallery", "theater", "exhibition"],
"armenian_keywords": ["արվեստ", "մշակույթ", "թանգարան", "ցուցահանդես"],
"metadata_fields": []
},
"entertainment": {
"types": ["karaoke", "gaming", "music", "cinema"],
"json_types": [],
"armenian_terms": ["կարաոկե", "խաղ", "երաժշտություն", "կինո"],
"keywords": ["music", "karaoke", "game", "entertainment", "fun", "live music"],
"armenian_keywords": ["երաժշտություն", "կարաոկե", "խաղ", "ժամանց", "զվարճանք"],
"metadata_fields": ["live_music", "good_for_watching_sports", "good_for_business_meetings", "good_for_date_night"]
}
}
def _initialize_enhanced_conversation_templates(self) -> Dict[str, Dict]:
"""Enhanced conversation templates for various scenarios"""
return {
"armenian": {
"greetings": [
"Բարև ձեզ! Ես ձեր անձնական ուղեցույցն եմ Երևանի լավագույն վայրերի համար:",
"Ողջույն! Ուրախ եմ օգնել ձեզ հայտնաբերել Երևանի հիանալի վայրերը:",
"Բարի գալուստ! Ես կօգնեմ ձեզ գտնել կատարյալ վայր Երևանում:"
],
"recommendation_intros": [
"Ձեր հարցման համար ես գտա այս հիանալի վայրերը:",
"Ահա ինչ կարող եմ առաջարկել ձեզ:",
"Այս վայրերը կարող են ձեզ հետաքրքրել:"
],
"location_contexts": {
"street": "Դուք փնտրում եք {location} փողոցում:",
"district": "Դուք փնտրում եք {location} թաղամասում:",
"landmark": "Դուք փնտրում եք {location} մոտակայքում:"
},
"category_matches": {
"nightlife": "Այս վայրերը հիանալի են գիշերային ժամանցի համար:",
"dining": "Այս ճաշարանները կամ սրճարանները ձեզ կհավանեն:",
"culture": "Այս մշակութային վայրերը հետաքրքիր են:",
"entertainment": "Այս ժամանցային վայրերը զվարճալի են:"
},
"endings": [
"Հուսով եմ, որ կգտնեք կատարյալ տարբերակ!",
"Բարի ժամանց!",
"Եթե հարցեր ունեք, ես այստեղ եմ:"
]
},
"english": {
"greetings": [
"Hello! I'm your personal guide to the best places in Yerevan:",
"Welcome! I'm excited to help you discover amazing venues in Yerevan:",
"Hi there! Let me help you find the perfect spot in Yerevan:"
],
"recommendation_intros": [
"For your query, I found these fantastic venues:",
"Here's what I can recommend for you:",
"These places might interest you:"
],
"location_contexts": {
"street": "You're looking on {location}:",
"district": "You're exploring the {location} district:",
"landmark": "You're searching near {location}:"
},
"category_matches": {
"nightlife": "These venues are perfect for nightlife:",
"dining": "These restaurants and cafes will delight you:",
"culture": "These cultural venues are fascinating:",
"entertainment": "These entertainment spots are fun:"
},
"endings": [
"I hope you find the perfect match!",
"Enjoy your visit!",
"Feel free to ask if you need more recommendations!"
]
}
}
def initialize(self):
"""Initialize the complete venue AI system"""
logger.info("Loading venue data...")
self._load_venue_data()
logger.info("Processing 5-star reviews...")
self._process_five_star_reviews()
logger.info("Initializing conversational LLM...")
self._initialize_conversational_llm()
logger.info("Complete YerevanVenueAI initialization finished!")
def _load_venue_data(self):
"""Load venue data from JSON and CSV files"""
with open(self.venues_json_path, 'r', encoding='utf-8') as f:
self.venues_data = json.load(f)
self.venues_structured = pd.read_csv(self.venues_csv_path)
logger.info(f"Loaded {len(self.venues_data)} venues from JSON")
logger.info(f"Loaded {len(self.venues_structured)} venues from CSV")
def _process_five_star_reviews(self):
"""Extract and process 5-star reviews for each venue"""
for venue in self.venues_data:
venue_name = venue.get('name', '')
reviews = venue.get('reviews', [])
# Filter 5-star reviews
five_star = [review for review in reviews if review.get('rating') == 5]
if five_star:
# Separate reviews by language
english_reviews = []
armenian_reviews = []
for review in five_star:
text = review.get('text', '').strip()
language = review.get('language', 'en')
original_language = review.get('original_language', 'en')
if text and len(text) > 20: # Only meaningful reviews
if language == 'hy' or original_language == 'hy':
armenian_reviews.append(text)
else:
english_reviews.append(text)
# Store both language versions
if english_reviews or armenian_reviews:
self.five_star_reviews[venue_name] = {
'english': english_reviews[:3], # Top 3 English reviews
'armenian': armenian_reviews[:3] # Top 3 Armenian reviews
}
logger.info(f"Processed 5-star reviews for {len(self.five_star_reviews)} venues")
def _get_reviews_by_language(self, venue_name: str, language: str) -> List[str]:
"""Get reviews in the specified language"""
if venue_name not in self.five_star_reviews:
return []
reviews_data = self.five_star_reviews[venue_name]
if language == "armenian" and reviews_data.get('armenian'):
return reviews_data['armenian']
elif reviews_data.get('english'):
return reviews_data['english']
else:
# Fallback to any available reviews
return reviews_data.get('armenian', []) + reviews_data.get('english', [])
def _detect_language(self, text: str) -> str:
"""Enhanced language detection"""
armenian_chars = re.findall(r'[Ա-Ֆա-ֆ]', text)
armenian_ratio = len(armenian_chars) / len(text) if text else 0
armenian_keywords = ['բար', 'ռեստորան', 'սրճարան', 'ակումբ', 'հուկա', 'ուզում', 'գտնել', 'որտեղ', 'կարող', 'լավ', 'հետաքրքիր']
armenian_keyword_count = sum(1 for keyword in armenian_keywords if keyword in text.lower())
if armenian_ratio > 0.15 or armenian_keyword_count > 0:
return "armenian"
return "english"
def _extract_enhanced_location_context(self, query: str) -> Dict[str, List[str]]:
"""Enhanced location extraction with comprehensive Armenian support"""
query_lower = query.lower()
context = {
"streets": [],
"districts": [],
"landmarks": []
}
# Enhanced street detection
for street_eng, variations in self.yerevan_streets["streets"].items():
for variation in variations:
if variation.lower() in query_lower:
context["streets"].append(street_eng)
break
# Enhanced district detection
for district_eng, variations in self.yerevan_streets["districts"].items():
for variation in variations:
if variation.lower() in query_lower:
context["districts"].append(district_eng)
break
# Enhanced landmark detection
for landmark_eng, variations in self.yerevan_streets["landmarks"].items():
for variation in variations:
if variation.lower() in query_lower:
context["landmarks"].append(landmark_eng)
break
return context
def _get_user_location_from_query(self, query: str) -> Optional[Tuple[float, float]]:
"""Extract user location coordinates from street/landmark names in query"""
location_context = self._extract_enhanced_location_context(query)
# Check streets first
for street in location_context["streets"]:
if street in self.street_coordinates:
return self.street_coordinates[street]
# Check districts
for district in location_context["districts"]:
if district in self.street_coordinates:
return self.street_coordinates[district]
# Check landmarks
for landmark in location_context["landmarks"]:
if landmark in self.street_coordinates:
return self.street_coordinates[landmark]
return None
def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
"""Calculate distance between user location and venue"""
try:
venue_lat = venue.get('latitude')
venue_lng = venue.get('longitude')
if venue_lat is not None and venue_lng is not None:
distance = geodesic(user_location, (venue_lat, venue_lng)).kilometers
return distance
except Exception as e:
logger.debug(f"Distance calculation error: {e}")
return None
def _smart_venue_search(self, query: str, top_k: int = 20) -> List[Dict]:
"""Enhanced search using RAG system with comprehensive geo data and smart scoring"""
query_lower = query.lower()
results = []
# Use RAG enhancer for query analysis
enhanced_query = self.rag_enhancer.enhance_query(query)
query_words = set(query_lower.split())
expanded_terms = set(enhanced_query["expanded_terms"])
# Detect category from query
language = self._detect_language(query)
detected_category = self._detect_category(query, language)
# Get enhanced location context from RAG
geo_context = enhanced_query["geo_context"]
location_context = self._extract_enhanced_location_context(query)
for venue in self.venues_data:
venue_name = venue.get('name', '')
venue_address = venue.get('address', '').lower()
# Get structured venue info
structured_info = self.venues_structured[
self.venues_structured['venue_name'] == venue_name
]
if structured_info.empty:
continue
venue_category = structured_info.iloc[0]['category']
venue_summary = str(structured_info.iloc[0]['venue_summary']).lower()
# Prepare venue data for RAG scoring
venue_for_rag = {
'name': venue_name,
'category': venue_category,
'summary': venue_summary,
'latitude': venue.get('latitude'),
'longitude': venue.get('longitude'),
'rating': venue.get('rating', 0)
}
# Get RAG enhanced score
rag_score, rag_explanation = self.rag_enhancer.calculate_enhanced_score(venue_for_rag, enhanced_query)
# Start with RAG score as base
score = rag_score
# JSON metadata scoring
venue_types = venue.get('types', [])
# PRIORITY: Exact street/location matching (very high score)
exact_location_match = False
if location_context["streets"]:
for street in location_context["streets"]:
street_variations = self.yerevan_streets["streets"][street]
for variation in street_variations:
if variation.lower() in venue_address:
score += 100 # Very high score for exact street match
exact_location_match = True
break
if exact_location_match:
break
if location_context["districts"]:
for district in location_context["districts"]:
district_variations = self.yerevan_streets["districts"][district]
for variation in district_variations:
if variation.lower() in venue_address:
score += 80 # High score for district match
exact_location_match = True
break
if exact_location_match:
break
if location_context["landmarks"]:
for landmark in location_context["landmarks"]:
landmark_variations = self.yerevan_streets["landmarks"][landmark]
for variation in landmark_variations:
if variation.lower() in venue_address:
score += 90 # Very high score for landmark match
exact_location_match = True
break
if exact_location_match:
break
# Category matching (high priority)
if detected_category:
category_info = self.venue_categories[detected_category]
# Check CSV category
if venue_category in category_info["types"]:
score += 15 # High score for category match
# Check JSON types
for json_type in category_info["json_types"]:
if json_type in venue_types:
score += 20 # Even higher for JSON type match
# Check metadata fields for specific features
for metadata_field in category_info["metadata_fields"]:
if venue.get(metadata_field) is True:
score += 10 # Good score for feature match
# Extra points for specific matches
for keyword in category_info["keywords"]:
if keyword in venue_summary or keyword in venue_name.lower():
score += 5
# Additional scoring with expanded terms from RAG
additional_score = 0
# Enhanced keyword matching with expanded terms
for term in expanded_terms:
# Check in venue name
if term in venue_name.lower():
additional_score += 3
# Check in venue summary
if term in venue_summary:
additional_score += 2
# Check in venue address
if term in venue_address:
additional_score += 1
score += additional_score
# Venue name matching
venue_name_lower = venue_name.lower()
for word in query_words:
if word in venue_name_lower:
score += 8
# Summary matching (use the rich summary data with higher scoring)
for word in query_words:
if word in venue_summary:
score += 5 # Increased score for summary matches
# Additional bonus for detailed summary matches
summary_bonus_keywords = ['draft', 'tap', 'craft', 'brewery', 'beer']
for keyword in summary_bonus_keywords:
if keyword in query_lower and keyword in venue_summary:
score += 15 # High bonus for specific beer-related terms in summary
# Address matching
if venue.get('address'):
address_lower = venue['address'].lower()
for word in query_words:
if word in address_lower:
score += 2
# 5-star review matching
if venue_name in self.five_star_reviews:
reviews = self._get_reviews_by_language(venue_name, "english")
if reviews:
review_text = " ".join(reviews).lower()
for word in query_words:
if word in review_text:
score += 4
# JSON types matching
for venue_type in venue_types:
if venue_type in query_lower:
score += 12
if score > 0:
venue_copy = venue.copy()
venue_copy['similarity_score'] = score
venue_copy['category'] = venue_category
venue_copy['summary'] = structured_info.iloc[0]['venue_summary']
venue_copy['exact_location_match'] = exact_location_match
venue_copy['rag_score'] = rag_score
venue_copy['rag_explanation'] = rag_explanation
results.append(venue_copy)
# Sort by exact location match first, then by score
results.sort(key=lambda x: (x.get('exact_location_match', False), x['similarity_score']), reverse=True)
return results[:top_k]
def _filter_venues(self, venues: List[Dict], min_rating: float, price_range: str,
max_distance: float, location_context: Dict) -> List[Dict]:
"""Filter venues based on criteria with distance calculation"""
filtered = []
# Get user location if specified in query
user_location = self._get_user_location_from_query_context(location_context)
for venue in venues:
# Rating filter
rating = venue.get('rating')
if rating is None:
rating = 0.0
try:
rating = float(rating)
except (ValueError, TypeError):
rating = 0.0
if rating < min_rating:
continue
# Price range filter
venue_price = str(venue.get('price_level', 'all')).lower()
if price_range != 'all' and venue_price != 'all' and venue_price != price_range:
continue
# Distance filter
if user_location:
venue_location = self._get_venue_coordinates(venue)
if venue_location:
distance = self._calculate_distance(user_location, venue)
if distance is not None and distance <= max_distance:
venue['calculated_distance'] = distance
filtered.append(venue)
else:
# If venue has no coordinates but has exact location match (street-based search),
# include it anyway since it was found via street matching
if venue.get('exact_location_match', False):
venue['calculated_distance'] = None # Mark as no distance data
filtered.append(venue)
# Otherwise exclude venues without coordinates when location is specified
else:
# If no location in query, add all venues that pass other filters
filtered.append(venue)
return filtered
def _get_user_location_from_query_context(self, location_context: Dict) -> Optional[Tuple[float, float]]:
"""Get user location from extracted query context"""
# Prioritize streets, then landmarks, then districts
for loc_type in ["streets", "landmarks", "districts"]:
if location_context.get(loc_type):
# Use the first identified location of the highest priority type
location_name = location_context[loc_type][0]
return self.street_coordinates.get(location_name)
return None
def _get_venue_coordinates(self, venue: Dict) -> Optional[Tuple[float, float]]:
"""Get coordinates for a venue"""
lat = venue.get('latitude')
lng = venue.get('longitude')
if lat is not None and lng is not None:
try:
return (float(lat), float(lng))
except (ValueError, TypeError):
return None
return None
def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
"""Calculate distance in km between user and venue"""
venue_location = self._get_venue_coordinates(venue)
if user_location and venue_location:
return geodesic(user_location, venue_location).kilometers
return None
def _create_enhanced_response(self, venues: List[Dict], language: str, user_query: str, location_context: Dict) -> str:
"""Create an enhanced, user-friendly response with location and category context"""
if not venues:
if language == 'armenian':
return "Ցավոք, ձեր հարցմանը համապատասխանող վենու չի գտնվել: Փորձեք փոխել որոնման պարամետրերը:"
return "Sorry, no venues found matching your criteria. Try adjusting your search parameters."
response_parts = []
# Get intro based on language
intro = self.conversation_templates[language]["recommendation_intros"]
response_parts.append(random.choice(intro))
# Add location context
if location_context["streets"]:
loc_str = self.conversation_templates[language]["location_contexts"]["street"].format(location=location_context["streets"][0])
response_parts.append(f"\n📍 {loc_str}")
elif location_context["landmarks"]:
loc_str = self.conversation_templates[language]["location_contexts"]["landmark"].format(location=location_context["landmarks"][0])
response_parts.append(f"\n📍 {loc_str}")
elif location_context["districts"]:
loc_str = self.conversation_templates[language]["location_contexts"]["district"].format(location=location_context["districts"][0])
response_parts.append(f"\n📍 {loc_str}")
# Add category context
detected_category = self._detect_category(user_query, language)
if detected_category:
category_str = self.conversation_templates[language]["category_matches"].get(detected_category)
if category_str:
response_parts.append(f"🏷️ {category_str}")
for i, venue in enumerate(venues[:5]):
response_parts.append(f"\n{i+1}. {self._format_enhanced_venue_info(venue, language)}")
# Add ending
response_parts.append("\n" + random.choice(self.conversation_templates[language]["endings"]))
return "\n".join(response_parts)
def get_search_explanation(self, query: str, venues: List[Dict]) -> str:
"""Get detailed explanation of search results using RAG system"""
if not venues:
return "No venues found matching your criteria."
# Prepare top venues with RAG explanations
top_venues = []
for venue in venues[:3]:
if 'rag_explanation' in venue:
top_venues.append((venue, venue.get('similarity_score', 0), venue['rag_explanation']))
if top_venues:
return self.rag_enhancer.get_search_explanation(query, top_venues)
else:
return f"Found {len(venues)} venues matching '{query}'"
def _detect_category(self, query: str, language: str) -> Optional[str]:
"""Detect venue category from query, respecting the detected language."""
query_lower = query.lower()
for category, info in self.venue_categories.items():
if language == "armenian":
search_terms = info.get("armenian_terms", []) + info.get("armenian_keywords", [])
else:
search_terms = info.get("keywords", [])
for term in search_terms:
if term.lower() in query_lower:
return category
# If no language-specific match, do a general search
for category, info in self.venue_categories.items():
all_terms = info.get("keywords", []) + info.get("armenian_terms", [])
for term in all_terms:
if term.lower() in query_lower:
return category
return None
def _format_enhanced_venue_info(self, venue: Dict, language: str = "english") -> str:
"""Enhanced venue information formatting with CSV summary, 5-star reviews and metadata"""
if language == "armenian":
info_parts = [f"**{venue['name']}**"]
if venue.get('address'):
info_parts.append(f"📍 {venue['address']}")
# Safe rating display
rating = venue.get('rating')
if rating is not None and rating > 0:
info_parts.append(f"⭐ {rating}")
# Add distance
if venue.get('calculated_distance'):
distance = venue['calculated_distance']
info_parts.append(f"🚗 {distance:.1f} կմ")
# Add category
if venue.get('category'):
category = venue['category']
category_map = {
"pub": "պաբ", "bar": "բար", "restaurant": "ռեստորան",
"cafe": "սրճարան", "club": "ակումբ", "hookah": "հուկա բար"
}
armenian_category = category_map.get(category, category)
info_parts.append(f"🏷️ {armenian_category}")
# Add metadata features (skip common ones for pubs/bars)
features = []
venue_category = venue.get('category', '').lower()
# Only show beer for non-pub/bar venues
if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
features.append("գարեջուր")
if venue.get('serves_cocktails'): features.append("կոկտեյլ")
if venue.get('live_music'): features.append("կենդանի երաժշտություն")
if venue.get('outdoor_seating'): features.append("բացօթյա նստարան")
if venue.get('good_for_date_night'): features.append("ռոմանտիկ")
if venue.get('good_for_groups'): features.append("խմբերի համար")
if features:
info_parts.append(f"✨ {', '.join(features)}")
# Add comprehensive venue summary from CSV
if venue.get('summary'):
summary = venue['summary']
# Truncate summary for readability but keep much more detail
if len(summary) > 500:
summary = summary[:500] + "..."
info_parts.append(f"📋 {summary}")
# Add 5-star review
venue_name = venue.get('name', '')
if venue_name in self.five_star_reviews:
reviews = self._get_reviews_by_language(venue_name, language)
if reviews:
info_parts.append(f"💬 5⭐ \"{reviews[0][:300]}...\"")
else:
info_parts = [f"**{venue['name']}** - {venue.get('rating', 'N/A')}⭐"]
if venue.get('address'):
info_parts.append(f"📍 {venue['address']}")
# Add distance
if venue.get('calculated_distance'):
distance = venue['calculated_distance']
info_parts.append(f"🚗 {distance:.1f} km away")
# Add category
if venue.get('category'):
info_parts.append(f"🏷️ {venue['category']}")
# Add metadata features (skip common ones for pubs/bars)
features = []
venue_category = venue.get('category', '').lower()
# Only show beer for non-pub/bar venues
if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
features.append("serves beer")
if venue.get('serves_cocktails'): features.append("cocktails")
if venue.get('live_music'): features.append("live music")
if venue.get('outdoor_seating'): features.append("outdoor seating")
if venue.get('good_for_date_night'): features.append("romantic")
if venue.get('good_for_groups'): features.append("good for groups")
if features:
info_parts.append(f"✨ {', '.join(features)}")
# Add comprehensive venue summary from CSV
if venue.get('summary'):
summary = venue['summary']
# Truncate summary for readability but keep much more detail
if len(summary) > 500:
summary = summary[:500] + "..."
info_parts.append(f"📋 {summary}")
# Add 5-star review
venue_name = venue.get('name', '')
if venue_name in self.five_star_reviews:
reviews = self._get_reviews_by_language(venue_name, language)
if reviews:
info_parts.append(f"💬 5⭐ \"{reviews[0][:300]}...\"")
return "\n".join(info_parts)
def get_enhanced_recommendations(self, user_query: str, min_rating: float = 3.0,
price_range: str = "all", max_distance: float = 10.0) -> Dict:
"""
Enhanced recommendation system with conversational capabilities
Handles both venue queries and casual conversation
"""
# Detect language
language = self._detect_language(user_query)
# Check if this is a venue-related query or casual conversation
is_venue_query = self._is_venue_related_query(user_query)
is_greeting_or_casual = self._detect_greeting_or_casual(user_query)
# Handle conversational queries
if not is_venue_query or is_greeting_or_casual:
conversational_response = self._generate_conversational_response(user_query, language)
# Add to conversation history
self._add_to_conversation_history(user_query, conversational_response)
# Return conversational response format
return {
"language": language,
"query": user_query,
"response_type": "conversational",
"conversational_response": conversational_response,
"venue_suggestions": [],
"total_found": 0,
"is_venue_query": False,
"location_context": {}
}
# Handle venue queries with the existing logic
location_context = self._extract_enhanced_location_context(user_query)
# Perform venue search (full search for comprehensive results)
venues = self._smart_venue_search(user_query, top_k=100)
# Filter venues
filtered_venues = self._filter_venues(venues, min_rating, price_range, max_distance, location_context)
# Create response
response_text = self._create_enhanced_response(filtered_venues, language, user_query, location_context)
# Add venue recommendations to conversation history
self._add_to_conversation_history(user_query, f"Found {len(filtered_venues)} venues. {response_text[:100]}...")
return {
"language": language,
"query": user_query,
"response_type": "venue_recommendation",
"recommended_venues": filtered_venues[:10],
"response_text": response_text,
"total_found": len(filtered_venues),
"location_context": location_context,
"is_venue_query": True
}
def _initialize_conversational_llm(self):
"""Initialize ultra-lightweight conversational system"""
if ULTRA_LIGHTWEIGHT_LLM_AVAILABLE:
try:
logger.info("Initializing ultra-lightweight conversational system...")
self.conversational_llm = UltraLightweightLLM()
logger.info("Successfully initialized ultra-lightweight conversational system")
return
except Exception as e:
logger.warning(f"Failed to initialize ultra-lightweight LLM: {e}")
logger.info("Using template-based responses for optimal performance")
self.conversational_llm = None
def _add_to_conversation_history(self, user_message: str, ai_response: str):
"""Add a user message and AI response to the conversation history"""
self.conversation_history.append({"user": user_message, "ai": ai_response})
# Keep history to a reasonable size
if len(self.conversation_history) > self.max_conversation_history:
self.conversation_history.pop(0)
def _get_conversation_context(self) -> str:
"""Get the recent conversation history as a formatted string"""
context = ""
for turn in self.conversation_history:
context += f"User: {turn['user']}\nAI: {turn['ai']}\n"
return context
def _is_venue_related_query(self, query: str) -> bool:
"""Determine if a query is related to finding venues"""
query_lower = query.lower()
# Keywords that indicate a venue search
venue_keywords = [
'find', 'where', 'recommend', 'any', 'good', 'best', 'search',
'restaurant', 'bar', 'pub', 'cafe', 'club', 'hookah',
'ռեստորան', 'բար', 'պաբ', 'փաբ', 'սրճարան', 'ակումբ', 'հուկա',
'գտնել', 'որտեղ', 'խորհուրդ', 'կա', 'լավ'
]
# Location keywords
location_keywords = [
'street', 'avenue', 'square', 'near', 'on', 'at',
'փողոց', 'պողոտա', 'հրապարակ', 'մոտ'
]
# Check for venue keywords
if any(keyword in query_lower for keyword in venue_keywords):
return True
# Check for location keywords
if any(keyword in query_lower for keyword in location_keywords):
return True
# Check against the known streets and landmarks
for street_info in self.yerevan_streets.values():
for variations in street_info.values():
if any(variation.lower() in query_lower for variation in variations):
return True
return False
def _generate_conversational_response(self, query: str, language: str) -> str:
"""Generate a conversational response using the LLM or templates"""
if not self.conversational_llm:
return self._generate_template_response(query, language)
try:
# Check if this is the new lightweight model
if hasattr(self.conversational_llm, 'generate_response'):
# Use the lightweight model's generate_response method
return self.conversational_llm.generate_response("", query, max_length=100)
else:
# Legacy llama-cpp model
context = self._get_conversation_context()
if language == 'armenian':
prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
User: {query}
Assistant:"""
else:
prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
User: {query}
Assistant:"""
response = self.conversational_llm(
prompt,
max_tokens=50,
stop=["User:", "Assistant:", "\n"],
temperature=0.7,
echo=False,
)
generated_text = response['choices'][0]['text'].strip()
return generated_text if generated_text else self._generate_template_response(query, language)
except Exception as e:
logger.error(f"Error generating conversational response: {e}")
return self._generate_template_response(query, language)
def _generate_template_response(self, query: str, language: str) -> str:
"""Generate template-based responses when LLM is not available"""
query_lower = query.lower()
# Greeting responses
if any(word in query_lower for word in ['hi', 'hello', 'hey', 'բարև', 'ողջույն']):
if language == "armenian":
return "Բարև ձեզ! Ես Երևանի վենուների ուղեցույցն եմ: Ինչ եք փնտրում?"
return "Hello! I'm your Yerevan venue guide. What are you looking for?"
# How are you responses
if any(phrase in query_lower for phrase in ['how are you', 'ինչպես ես', 'ոնց ես']):
if language == "armenian":
return "Շնորհակալություն հարցնելու համար! Ես պատրաստ եմ օգնել ձեզ գտնել լավագույն վայրերը Երևանում:"
return "Thanks for asking! I'm ready to help you find the best venues in Yerevan!"
# What can you do responses
if any(phrase in query_lower for phrase in ['what can you', 'ինչ կարող ես', 'քո մասին']):
if language == "armenian":
return "Ես կարող եմ օգնել ձեզ գտնել ռեստորաններ, բարեր, սրճարաններ և այլ վայրեր Երևանում: Ինչ եք փնտրում?"
return "I can help you find restaurants, bars, cafes and other venues in Yerevan! What are you looking for?"
# Thanks responses
if any(word in query_lower for word in ['thanks', 'thank you', 'շնորհակալություն']):
if language == "armenian":
return "Խնդրեմ! Ուրախ եմ, որ կարողացա օգնել:"
return "You're welcome! Happy to help!"
# Default responses
if language == "armenian":
return "Ես կարող եմ օգնել ձեզ գտնել վայրեր Երևանում: Ինչ եք փնտրում?"
return "I can help you find venues in Yerevan! What are you looking for?"
def _detect_greeting_or_casual(self, query: str) -> bool:
"""Detect if the query is a greeting or casual conversation"""
casual_patterns = [
# English
r'\b(hi|hello|hey|good morning|good evening|how are you|what\'s up|thanks|thank you)\b',
r'\b(who are you|what can you do|help|about you)\b',
# Armenian
r'\b(բարև|ողջույն|բարի լույս|բարի երեկո|ինչպես ես|ինչ կա|շնորհակալություն)\b',
r'\b(ով ես|ինչ կարող ես|օգնություն|քո մասին)\b'
]
query_lower = query.lower()
for pattern in casual_patterns:
if re.search(pattern, query_lower):
return True
return False
def _merge_geo_data(self) -> Dict[str, Tuple[float, float]]:
"""Merge existing street coordinates with comprehensive RAG geo data"""
# Start with existing coordinates
merged_coords = {
# Major streets with approximate center coordinates (lat, lng)
"Mashtots Avenue": (40.1845, 44.5117),
"Abovyan Street": (40.1776, 44.5146),
"Saryan Street": (40.1851, 44.5086),
"Tumanyan Street": (40.1822, 44.5149),
"Amiryan Street": (40.1798, 44.5139),
"Pushkin Street": (40.1774, 44.5154),
"Khorenatsi Street": (40.1751, 44.5181),
"Teryan Street": (40.1828, 44.5163),
"Paronyan Street": (40.1812, 44.5134),
"Northern Avenue": (40.1792, 44.5146),
"Sayat Nova Avenue": (40.1834, 44.5098),
"Baghramyan Avenue": (40.1951, 44.5089),
"Vazgen Sargsyan Street": (40.1823, 44.5201),
"Tigran Mets Avenue": (40.1743, 44.5289),
"Nalbandyan Street": (40.1800, 44.5182),
# Districts (approximate centers)
"Kentron": (40.1792, 44.5146),
"Arabkir": (40.2089, 44.4856),
"Avan": (40.2156, 44.5489),
"Davtashen": (40.2267, 44.4567),
"Erebuni": (40.1345, 44.5234),
# Landmarks
"Republic Square": (40.1761, 44.5126),
"Opera House": (40.1836, 44.5098),
"Cascade": (40.1876, 44.5086),
"Swan Lake": (40.1837, 44.5135),
"Blue Mosque": (40.1733, 44.5151)
}
# Add comprehensive geo data from RAG enhancer
for landmark, data in self.rag_enhancer.geo_landmarks.items():
merged_coords[landmark] = data["coords"]
# Also add primary aliases for better matching
for alias in data["aliases"][:2]: # Add first 2 aliases
if alias not in merged_coords:
merged_coords[alias] = data["coords"]
logger.info(f"Merged geo data: {len(merged_coords)} locations available")
return merged_coords
# Global AI instance
ai_instance = None
def initialize_ai():
"""Initialize the global AI instance"""
global ai_instance
if ai_instance is None:
try:
# Initialize with the data paths
venues_json = "yerevan_pubs_bars_20250623_193205.json"
venues_csv = "yerevan_venues_structured.csv"
# Check if files exist
import os
if not os.path.exists(venues_json):
raise FileNotFoundError(f"Venue JSON file not found: {venues_json}")
if not os.path.exists(venues_csv):
raise FileNotFoundError(f"Venue CSV file not found: {venues_csv}")
logger.info("Creating CompleteYerevanVenueAI instance...")
ai_instance = CompleteYerevanVenueAI(venues_json, venues_csv)
logger.info("Initializing venue data...")
ai_instance.initialize()
logger.info("Global AI instance initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize AI instance: {e}")
ai_instance = None
raise e
return ai_instance
def get_recommendations(query, min_rating, price_range, max_distance):
"""Gradio interface function with conversational support"""
global ai_instance
if not query.strip():
return "Please enter a question or venue request."
# Ensure AI instance is initialized
if ai_instance is None:
try:
initialize_ai()
except Exception as e:
logger.error(f"Failed to initialize AI: {e}")
return f"Sorry, I'm having trouble starting up. Error: {str(e)}"
# Double check AI instance exists
if ai_instance is None:
return "Sorry, the AI system is not available right now. Please try again later."
try:
# Get recommendations (handles both conversational and venue queries)
result = ai_instance.get_enhanced_recommendations(
user_query=query,
min_rating=min_rating,
price_range=price_range,
max_distance=max_distance
)
# Handle conversational responses
if result.get("response_type") == "conversational":
return result["conversational_response"]
# Handle venue recommendations
elif result.get("response_type") == "venue_recommendation":
return result["response_text"]
# Fallback
else:
return "I can help you find venues in Yerevan or have a casual conversation. What would you like to know?"
except Exception as e:
logger.error(f"Error in get_recommendations: {e}")
return f"Sorry, I encountered an error: {str(e)}"
def create_gradio_interface():
"""Create enhanced Gradio interface with conversational capabilities"""
with gr.Blocks(
title=" Yerevan Venue AI Assistant",
theme=gr.themes.Soft(),
css="""
.gradio-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
}
.gr-button-primary {
background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
border: none;
}
"""
) as interface:
gr.Markdown("""
# Yerevan Venue AI Assistant
### Your Conversational Guide to Yerevan's Best Venues
I can help you with:
- 🍽️ **Restaurant & Bar Recommendations** - Find the perfect dining spot
- 🗺️ **Location-Based Search** - Venues near specific streets or landmarks
- 💬 **Casual Conversation** - Ask me anything or just say hello!
- **Bilingual Support** - Chat in Armenian or English
**Examples:**
- "Hello! How are you?"
- "Find me a good pub on Pushkin Street"
- "բարեր Մաշտոցի մոտ" (bars near Mashtots)
- "What can you help me with?"
""")
with gr.Row():
with gr.Column(scale=3):
query_input = gr.Textbox(
label="💬 Ask me anything or request venue recommendations",
placeholder="Try: 'Hello!' or 'Find me a restaurant near Opera House' or 'բարեր Պուշկին փողոցում'",
lines=2
)
with gr.Row():
min_rating = gr.Slider(
minimum=0, maximum=5, value=3.0, step=0.1,
label="⭐ Minimum Rating (for venue searches)"
)
max_distance = gr.Slider(
minimum=0.5, maximum=20, value=5.0, step=0.5,
label="📍 Max Distance (km, for venue searches)"
)
price_range = gr.Radio(
choices=["all", "budget", "mid", "expensive"],
value="all",
label="💰 Price Range (for venue searches)"
)
search_btn = gr.Button("🔍 Chat / Search", variant="primary", size="lg")
with gr.Column(scale=2):
gr.Markdown("""
### 💡 Tips:
- **Start a conversation**: "Hi", "Hello", "How are you?"
- **Ask about me**: "What can you do?", "Who are you?"
- **Get venue help**: "Find restaurants", "Bars near Opera"
- **Use Armenian**: "բարև", "ռեստորան", "բար"
- **Be specific**: Include location, cuisine type, or atmosphere
### 🗺️ Known Locations:
Pushkin Street, Mashtots Avenue, Saryan Street, Republic Square, Opera House, Cascade, Northern Avenue, Nalbandyan Street
""")
output = gr.Textbox(
label="🤖 AI Response",
lines=15,
max_lines=20,
show_copy_button=True
)
# Examples for quick testing
gr.Examples(
examples=[
["Hello! How are you today?"],
["What can you help me with?"],
["Find me a good pub with draft beer"],
["Restaurants near Opera House"],
["բարև ձեզ, ինչպես եք?"],
["բարեր Պուշկին փողոցում"],
["pubs on Nalbandyan street"],
["Thanks for your help!"]
],
inputs=[query_input],
label="💬 Try these examples:"
)
def handle_submit(query, min_rating, price_range, max_distance):
return get_recommendations(query, min_rating, price_range, max_distance)
search_btn.click(
fn=handle_submit,
inputs=[query_input, min_rating, price_range, max_distance],
outputs=output
)
query_input.submit(
fn=handle_submit,
inputs=[query_input, min_rating, price_range, max_distance],
outputs=output
)
return interface
if __name__ == "__main__":
print("Launching Yerevan Venue AI Assistant with Conversational Capabilities...")
# Initialize the AI system
initialize_ai()
# Create and launch Gradio interface
interface = create_gradio_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7861,
share=True,
show_error=True
)