Spaces:

artush-habetyan
/

vibe_sip

Sleeping

App Files Files Community

vibe_sip / venue_ai_complete.py

artush-habetyan

Upload 8 files

fb3a3b8 verified 6 months ago

raw

history blame contribute delete

60.8 kB

	import gzip
	import json
	import pandas as pd
	import numpy as np
	from typing import List, Dict, Optional, Tuple
	import logging
	from datetime import datetime
	import re
	import gradio as gr
	import random
	from geopy.distance import geodesic
	from collections import defaultdict


	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	try:
	from llama_cpp import Llama
	LLAMA_CPP_AVAILABLE = True
	logger.info("llama-cpp-python successfully imported")
	except ImportError as e:
	logger.warning(f"llama-cpp-python not available due to import error: {e}")
	LLAMA_CPP_AVAILABLE = False
	except Exception as e:
	logger.warning(f"llama-cpp-python failed to load: {e}")
	LLAMA_CPP_AVAILABLE = False

	from lightweight_rag import LightweightRAGEnhancer


	# Ultra-lightweight LLM for optimal HF Spaces performance
	try:
	from ultra_lightweight_llm import UltraLightweightLLM
	ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = True
	logger.info("Ultra-lightweight LLM available for optimal performance")
	except ImportError as e:
	logger.warning(f"Ultra-lightweight LLM not available: {e}")
	ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = False

	class CompleteYerevanVenueAI:
	"""
	Complete Bilingual (Armenian/English) AI Assistant for Yerevan Venue Recommendations
	With enhanced templates, location parsing, filtering, distance calculation and metadata usage
	"""

	def __init__(self, venues_json_path: str, venues_csv_path: str):
	self.venues_json_path = venues_json_path
	self.venues_csv_path = venues_csv_path

	# Core data
	self.venues_data = []
	self.venues_structured = None
	self.five_star_reviews = {}

	# Conversational LLM
	self.conversational_llm = None
	self.conversation_history = []
	self.max_conversation_history = 10

	# Enhanced location and category knowledge
	self.yerevan_streets = self._initialize_enhanced_street_knowledge()
	self.venue_categories = self._initialize_category_knowledge()
	self.conversation_templates = self._initialize_enhanced_conversation_templates()

	# Initialize lightweight RAG enhancer with comprehensive geo data
	self.rag_enhancer = LightweightRAGEnhancer()

	# Merge RAG geo data with existing street coordinates
	self.street_coordinates = self._merge_geo_data()

	logger.info("Initialized Complete YerevanVenueAI with RAG enhancement and comprehensive geo data")

	def _initialize_street_coordinates(self) -> Dict[str, Tuple[float, float]]:
	"""Initialize street coordinates for distance calculation"""
	return {
	# Major streets with approximate center coordinates (lat, lng)
	"Mashtots Avenue": (40.1845, 44.5117),
	"Abovyan Street": (40.1776, 44.5146),
	"Saryan Street": (40.1851, 44.5086),
	"Tumanyan Street": (40.1822, 44.5149),
	"Amiryan Street": (40.1798, 44.5139),
	"Pushkin Street": (40.1774, 44.5154),
	"Khorenatsi Street": (40.1751, 44.5181),
	"Teryan Street": (40.1828, 44.5163),
	"Paronyan Street": (40.1812, 44.5134),
	"Northern Avenue": (40.1792, 44.5146),
	"Sayat Nova Avenue": (40.1834, 44.5098),
	"Baghramyan Avenue": (40.1951, 44.5089),
	"Vazgen Sargsyan Street": (40.1823, 44.5201),
	"Tigran Mets Avenue": (40.1743, 44.5289),
	"Nalbandyan Street": (40.1800, 44.5182),
	# Districts (approximate centers)
	"Kentron": (40.1792, 44.5146),
	"Arabkir": (40.2089, 44.4856),
	"Avan": (40.2156, 44.5489),
	"Davtashen": (40.2267, 44.4567),
	"Erebuni": (40.1345, 44.5234),
	# Landmarks
	"Republic Square": (40.1761, 44.5126),
	"Opera House": (40.1836, 44.5098),
	"Cascade": (40.1876, 44.5086),
	"Swan Lake": (40.1837, 44.5135),
	"Blue Mosque": (40.1733, 44.5151)
	}

	def _initialize_enhanced_street_knowledge(self) -> Dict[str, Dict]:
	"""Enhanced Yerevan geography knowledge with Armenian names"""
	return {
	"streets": {
	"Mashtots Avenue": ["մաշտոցի", "մաշտոց", "mashtots", "mesrop mashtots"],
	"Abovyan Street": ["աբովյանի", "աբովյան", "abovyan"],
	"Saryan Street": ["սարյանի", "սարյան", "saryan", "martiros saryan"],
	"Tumanyan Street": ["թումանյանի", "թումանյան", "tumanyan", "hovhannes tumanyan"],
	"Amiryan Street": ["ամիրյանի", "ամիրյան", "amiryan"],
	"Pushkin Street": ["պուշկինի", "պուշկին", "pushkin"],
	"Khorenatsi Street": ["խորենացի", "խորենաց", "khorenatsi"],
	"Teryan Street": ["տերյանի", "տերյան", "teryan"],
	"Paronyan Street": ["պարոնյանի", "պարոնյան", "paronyan"],
	"Northern Avenue": ["հյուսիսային", "northern", "northern avenue"],
	"Sayat Nova Avenue": ["սայաթ նովա", "sayat nova"],
	"Baghramyan Avenue": ["բաղրամյանի", "բաղրամյան", "baghramyan"],
	"Vazgen Sargsyan Street": ["վազգեն սարգսյանի", "vazgen sargsyan"],
	"Tigran Mets Avenue": ["տիգրան մեծի", "tigran mets"],
	"Nalbandyan Street": ["նալբանդյանի", "նալբանդյան", "nalbandyan"]
	},
	"districts": {
	"Kentron": ["կենտրոն", "կենտրում", "center", "downtown", "central"],
	"Arabkir": ["արաբկիր", "arabkir"],
	"Avan": ["ավան", "avan"],
	"Davtashen": ["դավթաշեն", "davtashen"],
	"Erebuni": ["էրեբունի", "erebuni"],
	"Kanaker-Zeytun": ["կանակեր", "զեյթուն", "kanaker", "zeytun"],
	"Malatia-Sebastia": ["մալաթիա", "սեբաստիա", "malatia", "sebastia"],
	"Nor Nork": ["նոր նորք", "nor nork"],
	"Shengavit": ["շենգավիթ", "shengavit"],
	"Ajapnyak": ["աջափնյակ", "ajapnyak"]
	},
	"landmarks": {
	"Republic Square": ["հանրապետության հրապարակ", "հանրապետության", "republic square", "republic"],
	"Opera House": ["օպերա", "օպերայի տուն", "opera", "opera house"],
	"Cascade": ["կասկադ", "cascade"],
	"Northern Avenue": ["հյուսիսային պողոտա", "northern avenue"],
	"Swan Lake": ["կարապի լիճ", "swan lake"],
	"Vernissage Market": ["վերնիսաժ", "vernissage"],
	"Blue Mosque": ["կապույտ մզկիթ", "blue mosque"],
	"Mother Armenia": ["մայր հայաստան", "mother armenia"],
	"Matenadaran": ["մատենադարան", "matenadaran"],
	"Cascade Complex": ["կասկադային համալիր", "cascade complex"]
	}
	}

	def _initialize_category_knowledge(self) -> Dict[str, Dict]:
	"""Enhanced category knowledge with Armenian terms and JSON metadata"""
	return {
	"nightlife": {
	"types": ["pub", "bar", "club", "hookah", "night_club"],
	"json_types": ["bar", "night_club"],
	"armenian_terms": ["բար", "պաբ", "փաբ", "փաբեր", "ակումբ", "հուկա", "գիշերային", "ժամանց"],
	"keywords": ["drink", "beer", "cocktail", "party", "night", "dance", "draft", "tap", "alcohol", "whiskey", "vodka", "pub", "bar", "nightclub"],
	"armenian_keywords": ["խմիչք", "գարեջուր", "կոկտեյլ", "պարտի", "գիշեր", "պար", "ալկոհոլ"],
	"metadata_fields": ["serves_beer", "serves_spirits", "serves_cocktails", "serves_wine", "has_bar", "has_happy_hour", "good_for_dancing", "serves_happy_hour_drinks", "serves_late_night_food"]
	},
	"dining": {
	"types": ["restaurant", "cafe", "fast_food", "bakery"],
	"json_types": ["restaurant", "cafe"],
	"armenian_terms": ["ռեստորան", "սրճարան", "արագ սնունդ", "հացագործություն"],
	"keywords": ["food", "eat", "meal", "coffee", "breakfast", "lunch", "dinner", "cuisine", "dining", "restaurant", "cafe"],
	"armenian_keywords": ["ուտելիք", "ուտել", "ճաշ", "սուրճ", "նախաճաշ", "ճաշ", "ընթրիք"],
	"metadata_fields": ["serves_breakfast", "serves_brunch", "serves_lunch", "serves_dinner", "serves_coffee", "serves_dessert", "serves_vegetarian_food", "menu_for_children", "good_for_children", "good_for_groups"]
	},
	"culture": {
	"types": ["cultural", "gallery", "theatre", "museum"],
	"json_types": [],
	"armenian_terms": ["մշակութային", "պատկերասրահ", "թատրոն", "թանգարան"],
	"keywords": ["art", "culture", "museum", "gallery", "theater", "exhibition"],
	"armenian_keywords": ["արվեստ", "մշակույթ", "թանգարան", "ցուցահանդես"],
	"metadata_fields": []
	},
	"entertainment": {
	"types": ["karaoke", "gaming", "music", "cinema"],
	"json_types": [],
	"armenian_terms": ["կարաոկե", "խաղ", "երաժշտություն", "կինո"],
	"keywords": ["music", "karaoke", "game", "entertainment", "fun", "live music"],
	"armenian_keywords": ["երաժշտություն", "կարաոկե", "խաղ", "ժամանց", "զվարճանք"],
	"metadata_fields": ["live_music", "good_for_watching_sports", "good_for_business_meetings", "good_for_date_night"]
	}
	}

	def _initialize_enhanced_conversation_templates(self) -> Dict[str, Dict]:
	"""Enhanced conversation templates for various scenarios"""
	return {
	"armenian": {
	"greetings": [
	"Բարև ձեզ! Ես ձեր անձնական ուղեցույցն եմ Երևանի լավագույն վայրերի համար:",
	"Ողջույն! Ուրախ եմ օգնել ձեզ հայտնաբերել Երևանի հիանալի վայրերը:",
	"Բարի գալուստ! Ես կօգնեմ ձեզ գտնել կատարյալ վայր Երևանում:"
	],
	"recommendation_intros": [
	"Ձեր հարցման համար ես գտա այս հիանալի վայրերը:",
	"Ահա ինչ կարող եմ առաջարկել ձեզ:",
	"Այս վայրերը կարող են ձեզ հետաքրքրել:"
	],
	"location_contexts": {
	"street": "Դուք փնտրում եք {location} փողոցում:",
	"district": "Դուք փնտրում եք {location} թաղամասում:",
	"landmark": "Դուք փնտրում եք {location} մոտակայքում:"
	},
	"category_matches": {
	"nightlife": "Այս վայրերը հիանալի են գիշերային ժամանցի համար:",
	"dining": "Այս ճաշարանները կամ սրճարանները ձեզ կհավանեն:",
	"culture": "Այս մշակութային վայրերը հետաքրքիր են:",
	"entertainment": "Այս ժամանցային վայրերը զվարճալի են:"
	},
	"endings": [
	"Հուսով եմ, որ կգտնեք կատարյալ տարբերակ!",
	"Բարի ժամանց!",
	"Եթե հարցեր ունեք, ես այստեղ եմ:"
	]
	},
	"english": {
	"greetings": [
	"Hello! I'm your personal guide to the best places in Yerevan:",
	"Welcome! I'm excited to help you discover amazing venues in Yerevan:",
	"Hi there! Let me help you find the perfect spot in Yerevan:"
	],
	"recommendation_intros": [
	"For your query, I found these fantastic venues:",
	"Here's what I can recommend for you:",
	"These places might interest you:"
	],
	"location_contexts": {
	"street": "You're looking on {location}:",
	"district": "You're exploring the {location} district:",
	"landmark": "You're searching near {location}:"
	},
	"category_matches": {
	"nightlife": "These venues are perfect for nightlife:",
	"dining": "These restaurants and cafes will delight you:",
	"culture": "These cultural venues are fascinating:",
	"entertainment": "These entertainment spots are fun:"
	},
	"endings": [
	"I hope you find the perfect match!",
	"Enjoy your visit!",
	"Feel free to ask if you need more recommendations!"
	]
	}
	}

	def initialize(self):
	"""Initialize the complete venue AI system"""
	logger.info("Loading venue data...")
	self._load_venue_data()

	logger.info("Processing 5-star reviews...")
	self._process_five_star_reviews()

	logger.info("Initializing conversational LLM...")
	self._initialize_conversational_llm()

	logger.info("Complete YerevanVenueAI initialization finished!")

	def _load_venue_data(self):
	"""Load venue data from JSON and CSV files"""
	with open(self.venues_json_path, 'r', encoding='utf-8') as f:
	self.venues_data = json.load(f)

	self.venues_structured = pd.read_csv(self.venues_csv_path)

	logger.info(f"Loaded {len(self.venues_data)} venues from JSON")
	logger.info(f"Loaded {len(self.venues_structured)} venues from CSV")

	def _process_five_star_reviews(self):
	"""Extract and process 5-star reviews for each venue"""
	for venue in self.venues_data:
	venue_name = venue.get('name', '')
	reviews = venue.get('reviews', [])

	# Filter 5-star reviews
	five_star = [review for review in reviews if review.get('rating') == 5]

	if five_star:
	# Separate reviews by language
	english_reviews = []
	armenian_reviews = []

	for review in five_star:
	text = review.get('text', '').strip()
	language = review.get('language', 'en')
	original_language = review.get('original_language', 'en')

	if text and len(text) > 20: # Only meaningful reviews
	if language == 'hy' or original_language == 'hy':
	armenian_reviews.append(text)
	else:
	english_reviews.append(text)

	# Store both language versions
	if english_reviews or armenian_reviews:
	self.five_star_reviews[venue_name] = {
	'english': english_reviews[:3], # Top 3 English reviews
	'armenian': armenian_reviews[:3] # Top 3 Armenian reviews
	}

	logger.info(f"Processed 5-star reviews for {len(self.five_star_reviews)} venues")

	def _get_reviews_by_language(self, venue_name: str, language: str) -> List[str]:
	"""Get reviews in the specified language"""
	if venue_name not in self.five_star_reviews:
	return []

	reviews_data = self.five_star_reviews[venue_name]

	if language == "armenian" and reviews_data.get('armenian'):
	return reviews_data['armenian']
	elif reviews_data.get('english'):
	return reviews_data['english']
	else:
	# Fallback to any available reviews
	return reviews_data.get('armenian', []) + reviews_data.get('english', [])

	def _detect_language(self, text: str) -> str:
	"""Enhanced language detection"""
	armenian_chars = re.findall(r'[Ա-Ֆա-ֆ]', text)
	armenian_ratio = len(armenian_chars) / len(text) if text else 0

	armenian_keywords = ['բար', 'ռեստորան', 'սրճարան', 'ակումբ', 'հուկա', 'ուզում', 'գտնել', 'որտեղ', 'կարող', 'լավ', 'հետաքրքիր']
	armenian_keyword_count = sum(1 for keyword in armenian_keywords if keyword in text.lower())

	if armenian_ratio > 0.15 or armenian_keyword_count > 0:
	return "armenian"
	return "english"

	def _extract_enhanced_location_context(self, query: str) -> Dict[str, List[str]]:
	"""Enhanced location extraction with comprehensive Armenian support"""
	query_lower = query.lower()
	context = {
	"streets": [],
	"districts": [],
	"landmarks": []
	}

	# Enhanced street detection
	for street_eng, variations in self.yerevan_streets["streets"].items():
	for variation in variations:
	if variation.lower() in query_lower:
	context["streets"].append(street_eng)
	break

	# Enhanced district detection
	for district_eng, variations in self.yerevan_streets["districts"].items():
	for variation in variations:
	if variation.lower() in query_lower:
	context["districts"].append(district_eng)
	break

	# Enhanced landmark detection
	for landmark_eng, variations in self.yerevan_streets["landmarks"].items():
	for variation in variations:
	if variation.lower() in query_lower:
	context["landmarks"].append(landmark_eng)
	break

	return context

	def _get_user_location_from_query(self, query: str) -> Optional[Tuple[float, float]]:
	"""Extract user location coordinates from street/landmark names in query"""
	location_context = self._extract_enhanced_location_context(query)

	# Check streets first
	for street in location_context["streets"]:
	if street in self.street_coordinates:
	return self.street_coordinates[street]

	# Check districts
	for district in location_context["districts"]:
	if district in self.street_coordinates:
	return self.street_coordinates[district]

	# Check landmarks
	for landmark in location_context["landmarks"]:
	if landmark in self.street_coordinates:
	return self.street_coordinates[landmark]

	return None

	def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
	"""Calculate distance between user location and venue"""
	try:
	venue_lat = venue.get('latitude')
	venue_lng = venue.get('longitude')

	if venue_lat is not None and venue_lng is not None:
	distance = geodesic(user_location, (venue_lat, venue_lng)).kilometers
	return distance
	except Exception as e:
	logger.debug(f"Distance calculation error: {e}")

	return None

	def _smart_venue_search(self, query: str, top_k: int = 20) -> List[Dict]:
	"""Enhanced search using RAG system with comprehensive geo data and smart scoring"""
	query_lower = query.lower()
	results = []

	# Use RAG enhancer for query analysis
	enhanced_query = self.rag_enhancer.enhance_query(query)

	query_words = set(query_lower.split())
	expanded_terms = set(enhanced_query["expanded_terms"])

	# Detect category from query
	language = self._detect_language(query)
	detected_category = self._detect_category(query, language)

	# Get enhanced location context from RAG
	geo_context = enhanced_query["geo_context"]
	location_context = self._extract_enhanced_location_context(query)

	for venue in self.venues_data:
	venue_name = venue.get('name', '')
	venue_address = venue.get('address', '').lower()

	# Get structured venue info
	structured_info = self.venues_structured[
	self.venues_structured['venue_name'] == venue_name
	]

	if structured_info.empty:
	continue

	venue_category = structured_info.iloc[0]['category']
	venue_summary = str(structured_info.iloc[0]['venue_summary']).lower()

	# Prepare venue data for RAG scoring
	venue_for_rag = {
	'name': venue_name,
	'category': venue_category,
	'summary': venue_summary,
	'latitude': venue.get('latitude'),
	'longitude': venue.get('longitude'),
	'rating': venue.get('rating', 0)
	}

	# Get RAG enhanced score
	rag_score, rag_explanation = self.rag_enhancer.calculate_enhanced_score(venue_for_rag, enhanced_query)

	# Start with RAG score as base
	score = rag_score

	# JSON metadata scoring
	venue_types = venue.get('types', [])

	# PRIORITY: Exact street/location matching (very high score)
	exact_location_match = False
	if location_context["streets"]:
	for street in location_context["streets"]:
	street_variations = self.yerevan_streets["streets"][street]
	for variation in street_variations:
	if variation.lower() in venue_address:
	score += 100 # Very high score for exact street match
	exact_location_match = True
	break
	if exact_location_match:
	break

	if location_context["districts"]:
	for district in location_context["districts"]:
	district_variations = self.yerevan_streets["districts"][district]
	for variation in district_variations:
	if variation.lower() in venue_address:
	score += 80 # High score for district match
	exact_location_match = True
	break
	if exact_location_match:
	break

	if location_context["landmarks"]:
	for landmark in location_context["landmarks"]:
	landmark_variations = self.yerevan_streets["landmarks"][landmark]
	for variation in landmark_variations:
	if variation.lower() in venue_address:
	score += 90 # Very high score for landmark match
	exact_location_match = True
	break
	if exact_location_match:
	break

	# Category matching (high priority)
	if detected_category:
	category_info = self.venue_categories[detected_category]

	# Check CSV category
	if venue_category in category_info["types"]:
	score += 15 # High score for category match

	# Check JSON types
	for json_type in category_info["json_types"]:
	if json_type in venue_types:
	score += 20 # Even higher for JSON type match

	# Check metadata fields for specific features
	for metadata_field in category_info["metadata_fields"]:
	if venue.get(metadata_field) is True:
	score += 10 # Good score for feature match

	# Extra points for specific matches
	for keyword in category_info["keywords"]:
	if keyword in venue_summary or keyword in venue_name.lower():
	score += 5

	# Additional scoring with expanded terms from RAG
	additional_score = 0

	# Enhanced keyword matching with expanded terms
	for term in expanded_terms:
	# Check in venue name
	if term in venue_name.lower():
	additional_score += 3

	# Check in venue summary
	if term in venue_summary:
	additional_score += 2

	# Check in venue address
	if term in venue_address:
	additional_score += 1

	score += additional_score

	# Venue name matching
	venue_name_lower = venue_name.lower()
	for word in query_words:
	if word in venue_name_lower:
	score += 8

	# Summary matching (use the rich summary data with higher scoring)
	for word in query_words:
	if word in venue_summary:
	score += 5 # Increased score for summary matches

	# Additional bonus for detailed summary matches
	summary_bonus_keywords = ['draft', 'tap', 'craft', 'brewery', 'beer']
	for keyword in summary_bonus_keywords:
	if keyword in query_lower and keyword in venue_summary:
	score += 15 # High bonus for specific beer-related terms in summary

	# Address matching
	if venue.get('address'):
	address_lower = venue['address'].lower()
	for word in query_words:
	if word in address_lower:
	score += 2

	# 5-star review matching
	if venue_name in self.five_star_reviews:
	reviews = self._get_reviews_by_language(venue_name, "english")
	if reviews:
	review_text = " ".join(reviews).lower()
	for word in query_words:
	if word in review_text:
	score += 4

	# JSON types matching
	for venue_type in venue_types:
	if venue_type in query_lower:
	score += 12

	if score > 0:
	venue_copy = venue.copy()
	venue_copy['similarity_score'] = score
	venue_copy['category'] = venue_category
	venue_copy['summary'] = structured_info.iloc[0]['venue_summary']
	venue_copy['exact_location_match'] = exact_location_match
	venue_copy['rag_score'] = rag_score
	venue_copy['rag_explanation'] = rag_explanation
	results.append(venue_copy)

	# Sort by exact location match first, then by score
	results.sort(key=lambda x: (x.get('exact_location_match', False), x['similarity_score']), reverse=True)
	return results[:top_k]

	def _filter_venues(self, venues: List[Dict], min_rating: float, price_range: str,
	max_distance: float, location_context: Dict) -> List[Dict]:
	"""Filter venues based on criteria with distance calculation"""

	filtered = []

	# Get user location if specified in query
	user_location = self._get_user_location_from_query_context(location_context)

	for venue in venues:
	# Rating filter
	rating = venue.get('rating')
	if rating is None:
	rating = 0.0
	try:
	rating = float(rating)
	except (ValueError, TypeError):
	rating = 0.0

	if rating < min_rating:
	continue

	# Price range filter
	venue_price = str(venue.get('price_level', 'all')).lower()
	if price_range != 'all' and venue_price != 'all' and venue_price != price_range:
	continue

	# Distance filter
	if user_location:
	venue_location = self._get_venue_coordinates(venue)
	if venue_location:
	distance = self._calculate_distance(user_location, venue)
	if distance is not None and distance <= max_distance:
	venue['calculated_distance'] = distance
	filtered.append(venue)
	else:
	# If venue has no coordinates but has exact location match (street-based search),
	# include it anyway since it was found via street matching
	if venue.get('exact_location_match', False):
	venue['calculated_distance'] = None # Mark as no distance data
	filtered.append(venue)
	# Otherwise exclude venues without coordinates when location is specified
	else:
	# If no location in query, add all venues that pass other filters
	filtered.append(venue)

	return filtered

	def _get_user_location_from_query_context(self, location_context: Dict) -> Optional[Tuple[float, float]]:
	"""Get user location from extracted query context"""

	# Prioritize streets, then landmarks, then districts
	for loc_type in ["streets", "landmarks", "districts"]:
	if location_context.get(loc_type):
	# Use the first identified location of the highest priority type
	location_name = location_context[loc_type][0]
	return self.street_coordinates.get(location_name)

	return None

	def _get_venue_coordinates(self, venue: Dict) -> Optional[Tuple[float, float]]:
	"""Get coordinates for a venue"""
	lat = venue.get('latitude')
	lng = venue.get('longitude')
	if lat is not None and lng is not None:
	try:
	return (float(lat), float(lng))
	except (ValueError, TypeError):
	return None
	return None

	def _calculate_distance(self, user_location: Tuple[float, float], venue: Dict) -> Optional[float]:
	"""Calculate distance in km between user and venue"""
	venue_location = self._get_venue_coordinates(venue)
	if user_location and venue_location:
	return geodesic(user_location, venue_location).kilometers
	return None

	def _create_enhanced_response(self, venues: List[Dict], language: str, user_query: str, location_context: Dict) -> str:
	"""Create an enhanced, user-friendly response with location and category context"""

	if not venues:
	if language == 'armenian':
	return "Ցավոք, ձեր հարցմանը համապատասխանող վենու չի գտնվել: Փորձեք փոխել որոնման պարամետրերը:"
	return "Sorry, no venues found matching your criteria. Try adjusting your search parameters."

	response_parts = []

	# Get intro based on language
	intro = self.conversation_templates[language]["recommendation_intros"]
	response_parts.append(random.choice(intro))

	# Add location context
	if location_context["streets"]:
	loc_str = self.conversation_templates[language]["location_contexts"]["street"].format(location=location_context["streets"][0])
	response_parts.append(f"\n📍 {loc_str}")
	elif location_context["landmarks"]:
	loc_str = self.conversation_templates[language]["location_contexts"]["landmark"].format(location=location_context["landmarks"][0])
	response_parts.append(f"\n📍 {loc_str}")
	elif location_context["districts"]:
	loc_str = self.conversation_templates[language]["location_contexts"]["district"].format(location=location_context["districts"][0])
	response_parts.append(f"\n📍 {loc_str}")

	# Add category context
	detected_category = self._detect_category(user_query, language)
	if detected_category:
	category_str = self.conversation_templates[language]["category_matches"].get(detected_category)
	if category_str:
	response_parts.append(f"🏷️ {category_str}")

	for i, venue in enumerate(venues[:5]):
	response_parts.append(f"\n{i+1}. {self._format_enhanced_venue_info(venue, language)}")

	# Add ending
	response_parts.append("\n" + random.choice(self.conversation_templates[language]["endings"]))

	return "\n".join(response_parts)

	def get_search_explanation(self, query: str, venues: List[Dict]) -> str:
	"""Get detailed explanation of search results using RAG system"""
	if not venues:
	return "No venues found matching your criteria."

	# Prepare top venues with RAG explanations
	top_venues = []
	for venue in venues[:3]:
	if 'rag_explanation' in venue:
	top_venues.append((venue, venue.get('similarity_score', 0), venue['rag_explanation']))

	if top_venues:
	return self.rag_enhancer.get_search_explanation(query, top_venues)
	else:
	return f"Found {len(venues)} venues matching '{query}'"

	def _detect_category(self, query: str, language: str) -> Optional[str]:
	"""Detect venue category from query, respecting the detected language."""
	query_lower = query.lower()

	for category, info in self.venue_categories.items():
	if language == "armenian":
	search_terms = info.get("armenian_terms", []) + info.get("armenian_keywords", [])
	else:
	search_terms = info.get("keywords", [])

	for term in search_terms:
	if term.lower() in query_lower:
	return category

	# If no language-specific match, do a general search
	for category, info in self.venue_categories.items():
	all_terms = info.get("keywords", []) + info.get("armenian_terms", [])
	for term in all_terms:
	if term.lower() in query_lower:
	return category

	return None

	def _format_enhanced_venue_info(self, venue: Dict, language: str = "english") -> str:
	"""Enhanced venue information formatting with CSV summary, 5-star reviews and metadata"""
	if language == "armenian":
	info_parts = [f"{venue['name']}"]
	if venue.get('address'):
	info_parts.append(f"📍 {venue['address']}")

	# Safe rating display
	rating = venue.get('rating')
	if rating is not None and rating > 0:
	info_parts.append(f"⭐ {rating}")

	# Add distance
	if venue.get('calculated_distance'):
	distance = venue['calculated_distance']
	info_parts.append(f"🚗 {distance:.1f} կմ")

	# Add category
	if venue.get('category'):
	category = venue['category']
	category_map = {
	"pub": "պաբ", "bar": "բար", "restaurant": "ռեստորան",
	"cafe": "սրճարան", "club": "ակումբ", "hookah": "հուկա բար"
	}
	armenian_category = category_map.get(category, category)
	info_parts.append(f"🏷️ {armenian_category}")

	# Add metadata features (skip common ones for pubs/bars)
	features = []
	venue_category = venue.get('category', '').lower()

	# Only show beer for non-pub/bar venues
	if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
	features.append("գարեջուր")
	if venue.get('serves_cocktails'): features.append("կոկտեյլ")
	if venue.get('live_music'): features.append("կենդանի երաժշտություն")
	if venue.get('outdoor_seating'): features.append("բացօթյա նստարան")
	if venue.get('good_for_date_night'): features.append("ռոմանտիկ")
	if venue.get('good_for_groups'): features.append("խմբերի համար")

	if features:
	info_parts.append(f"✨ {', '.join(features)}")

	# Add comprehensive venue summary from CSV
	if venue.get('summary'):
	summary = venue['summary']
	# Truncate summary for readability but keep much more detail
	if len(summary) > 500:
	summary = summary[:500] + "..."
	info_parts.append(f"📋 {summary}")

	# Add 5-star review
	venue_name = venue.get('name', '')
	if venue_name in self.five_star_reviews:
	reviews = self._get_reviews_by_language(venue_name, language)
	if reviews:
	info_parts.append(f"💬 5⭐ \"{reviews[0][:300]}...\"")

	else:
	info_parts = [f"{venue['name']} - {venue.get('rating', 'N/A')}⭐"]
	if venue.get('address'):
	info_parts.append(f"📍 {venue['address']}")

	# Add distance
	if venue.get('calculated_distance'):
	distance = venue['calculated_distance']
	info_parts.append(f"🚗 {distance:.1f} km away")

	# Add category
	if venue.get('category'):
	info_parts.append(f"🏷️ {venue['category']}")

	# Add metadata features (skip common ones for pubs/bars)
	features = []
	venue_category = venue.get('category', '').lower()

	# Only show beer for non-pub/bar venues
	if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
	features.append("serves beer")
	if venue.get('serves_cocktails'): features.append("cocktails")
	if venue.get('live_music'): features.append("live music")
	if venue.get('outdoor_seating'): features.append("outdoor seating")
	if venue.get('good_for_date_night'): features.append("romantic")
	if venue.get('good_for_groups'): features.append("good for groups")

	if features:
	info_parts.append(f"✨ {', '.join(features)}")

	# Add comprehensive venue summary from CSV
	if venue.get('summary'):
	summary = venue['summary']
	# Truncate summary for readability but keep much more detail
	if len(summary) > 500:
	summary = summary[:500] + "..."
	info_parts.append(f"📋 {summary}")

	# Add 5-star review
	venue_name = venue.get('name', '')
	if venue_name in self.five_star_reviews:
	reviews = self._get_reviews_by_language(venue_name, language)
	if reviews:
	info_parts.append(f"💬 5⭐ \"{reviews[0][:300]}...\"")

	return "\n".join(info_parts)

	def get_enhanced_recommendations(self, user_query: str, min_rating: float = 3.0,
	price_range: str = "all", max_distance: float = 10.0) -> Dict:
	"""
	Enhanced recommendation system with conversational capabilities
	Handles both venue queries and casual conversation
	"""
	# Detect language
	language = self._detect_language(user_query)

	# Check if this is a venue-related query or casual conversation
	is_venue_query = self._is_venue_related_query(user_query)
	is_greeting_or_casual = self._detect_greeting_or_casual(user_query)

	# Handle conversational queries
	if not is_venue_query or is_greeting_or_casual:
	conversational_response = self._generate_conversational_response(user_query, language)

	# Add to conversation history
	self._add_to_conversation_history(user_query, conversational_response)

	# Return conversational response format
	return {
	"language": language,
	"query": user_query,
	"response_type": "conversational",
	"conversational_response": conversational_response,
	"venue_suggestions": [],
	"total_found": 0,
	"is_venue_query": False,
	"location_context": {}
	}

	# Handle venue queries with the existing logic
	location_context = self._extract_enhanced_location_context(user_query)

	# Perform venue search (full search for comprehensive results)
	venues = self._smart_venue_search(user_query, top_k=100)

	# Filter venues
	filtered_venues = self._filter_venues(venues, min_rating, price_range, max_distance, location_context)

	# Create response
	response_text = self._create_enhanced_response(filtered_venues, language, user_query, location_context)

	# Add venue recommendations to conversation history
	self._add_to_conversation_history(user_query, f"Found {len(filtered_venues)} venues. {response_text[:100]}...")

	return {
	"language": language,
	"query": user_query,
	"response_type": "venue_recommendation",
	"recommended_venues": filtered_venues[:10],
	"response_text": response_text,
	"total_found": len(filtered_venues),
	"location_context": location_context,
	"is_venue_query": True
	}

	def _initialize_conversational_llm(self):
	"""Initialize ultra-lightweight conversational system"""
	if ULTRA_LIGHTWEIGHT_LLM_AVAILABLE:
	try:
	logger.info("Initializing ultra-lightweight conversational system...")
	self.conversational_llm = UltraLightweightLLM()
	logger.info("Successfully initialized ultra-lightweight conversational system")
	return
	except Exception as e:
	logger.warning(f"Failed to initialize ultra-lightweight LLM: {e}")

	logger.info("Using template-based responses for optimal performance")
	self.conversational_llm = None

	def _add_to_conversation_history(self, user_message: str, ai_response: str):
	"""Add a user message and AI response to the conversation history"""
	self.conversation_history.append({"user": user_message, "ai": ai_response})
	# Keep history to a reasonable size
	if len(self.conversation_history) > self.max_conversation_history:
	self.conversation_history.pop(0)

	def _get_conversation_context(self) -> str:
	"""Get the recent conversation history as a formatted string"""
	context = ""
	for turn in self.conversation_history:
	context += f"User: {turn['user']}\nAI: {turn['ai']}\n"
	return context

	def _is_venue_related_query(self, query: str) -> bool:
	"""Determine if a query is related to finding venues"""
	query_lower = query.lower()

	# Keywords that indicate a venue search
	venue_keywords = [
	'find', 'where', 'recommend', 'any', 'good', 'best', 'search',
	'restaurant', 'bar', 'pub', 'cafe', 'club', 'hookah',
	'ռեստորան', 'բար', 'պաբ', 'փաբ', 'սրճարան', 'ակումբ', 'հուկա',
	'գտնել', 'որտեղ', 'խորհուրդ', 'կա', 'լավ'
	]

	# Location keywords
	location_keywords = [
	'street', 'avenue', 'square', 'near', 'on', 'at',
	'փողոց', 'պողոտա', 'հրապարակ', 'մոտ'
	]

	# Check for venue keywords
	if any(keyword in query_lower for keyword in venue_keywords):
	return True

	# Check for location keywords
	if any(keyword in query_lower for keyword in location_keywords):
	return True

	# Check against the known streets and landmarks
	for street_info in self.yerevan_streets.values():
	for variations in street_info.values():
	if any(variation.lower() in query_lower for variation in variations):
	return True

	return False

	def _generate_conversational_response(self, query: str, language: str) -> str:
	"""Generate a conversational response using the LLM or templates"""
	if not self.conversational_llm:
	return self._generate_template_response(query, language)

	try:
	# Check if this is the new lightweight model
	if hasattr(self.conversational_llm, 'generate_response'):
	# Use the lightweight model's generate_response method
	return self.conversational_llm.generate_response("", query, max_length=100)
	else:
	# Legacy llama-cpp model
	context = self._get_conversation_context()

	if language == 'armenian':
	prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
	User: {query}
	Assistant:"""
	else:
	prompt = f"""You are a helpful assistant for Yerevan, Armenia. Be brief and friendly.
	User: {query}
	Assistant:"""

	response = self.conversational_llm(
	prompt,
	max_tokens=50,
	stop=["User:", "Assistant:", "\n"],
	temperature=0.7,
	echo=False,
	)

	generated_text = response['choices'][0]['text'].strip()
	return generated_text if generated_text else self._generate_template_response(query, language)

	except Exception as e:
	logger.error(f"Error generating conversational response: {e}")
	return self._generate_template_response(query, language)

	def _generate_template_response(self, query: str, language: str) -> str:
	"""Generate template-based responses when LLM is not available"""
	query_lower = query.lower()

	# Greeting responses
	if any(word in query_lower for word in ['hi', 'hello', 'hey', 'բարև', 'ողջույն']):
	if language == "armenian":
	return "Բարև ձեզ! Ես Երևանի վենուների ուղեցույցն եմ: Ինչ եք փնտրում?"
	return "Hello! I'm your Yerevan venue guide. What are you looking for?"

	# How are you responses
	if any(phrase in query_lower for phrase in ['how are you', 'ինչպես ես', 'ոնց ես']):
	if language == "armenian":
	return "Շնորհակալություն հարցնելու համար! Ես պատրաստ եմ օգնել ձեզ գտնել լավագույն վայրերը Երևանում:"
	return "Thanks for asking! I'm ready to help you find the best venues in Yerevan!"

	# What can you do responses
	if any(phrase in query_lower for phrase in ['what can you', 'ինչ կարող ես', 'քո մասին']):
	if language == "armenian":
	return "Ես կարող եմ օգնել ձեզ գտնել ռեստորաններ, բարեր, սրճարաններ և այլ վայրեր Երևանում: Ինչ եք փնտրում?"
	return "I can help you find restaurants, bars, cafes and other venues in Yerevan! What are you looking for?"

	# Thanks responses
	if any(word in query_lower for word in ['thanks', 'thank you', 'շնորհակալություն']):
	if language == "armenian":
	return "Խնդրեմ! Ուրախ եմ, որ կարողացա օգնել:"
	return "You're welcome! Happy to help!"

	# Default responses
	if language == "armenian":
	return "Ես կարող եմ օգնել ձեզ գտնել վայրեր Երևանում: Ինչ եք փնտրում?"
	return "I can help you find venues in Yerevan! What are you looking for?"

	def _detect_greeting_or_casual(self, query: str) -> bool:
	"""Detect if the query is a greeting or casual conversation"""
	casual_patterns = [
	# English
	r'\b(hi\|hello\|hey\|good morning\|good evening\|how are you\|what\'s up\|thanks\|thank you)\b',
	r'\b(who are you\|what can you do\|help\|about you)\b',
	# Armenian
	r'\b(բարև\|ողջույն\|բարի լույս\|բարի երեկո\|ինչպես ես\|ինչ կա\|շնորհակալություն)\b',
	r'\b(ով ես\|ինչ կարող ես\|օգնություն\|քո մասին)\b'
	]

	query_lower = query.lower()
	for pattern in casual_patterns:
	if re.search(pattern, query_lower):
	return True
	return False

	def _merge_geo_data(self) -> Dict[str, Tuple[float, float]]:
	"""Merge existing street coordinates with comprehensive RAG geo data"""
	# Start with existing coordinates
	merged_coords = {
	# Major streets with approximate center coordinates (lat, lng)
	"Mashtots Avenue": (40.1845, 44.5117),
	"Abovyan Street": (40.1776, 44.5146),
	"Saryan Street": (40.1851, 44.5086),
	"Tumanyan Street": (40.1822, 44.5149),
	"Amiryan Street": (40.1798, 44.5139),
	"Pushkin Street": (40.1774, 44.5154),
	"Khorenatsi Street": (40.1751, 44.5181),
	"Teryan Street": (40.1828, 44.5163),
	"Paronyan Street": (40.1812, 44.5134),
	"Northern Avenue": (40.1792, 44.5146),
	"Sayat Nova Avenue": (40.1834, 44.5098),
	"Baghramyan Avenue": (40.1951, 44.5089),
	"Vazgen Sargsyan Street": (40.1823, 44.5201),
	"Tigran Mets Avenue": (40.1743, 44.5289),
	"Nalbandyan Street": (40.1800, 44.5182),
	# Districts (approximate centers)
	"Kentron": (40.1792, 44.5146),
	"Arabkir": (40.2089, 44.4856),
	"Avan": (40.2156, 44.5489),
	"Davtashen": (40.2267, 44.4567),
	"Erebuni": (40.1345, 44.5234),
	# Landmarks
	"Republic Square": (40.1761, 44.5126),
	"Opera House": (40.1836, 44.5098),
	"Cascade": (40.1876, 44.5086),
	"Swan Lake": (40.1837, 44.5135),
	"Blue Mosque": (40.1733, 44.5151)
	}

	# Add comprehensive geo data from RAG enhancer
	for landmark, data in self.rag_enhancer.geo_landmarks.items():
	merged_coords[landmark] = data["coords"]

	# Also add primary aliases for better matching
	for alias in data["aliases"][:2]: # Add first 2 aliases
	if alias not in merged_coords:
	merged_coords[alias] = data["coords"]

	logger.info(f"Merged geo data: {len(merged_coords)} locations available")
	return merged_coords

	# Global AI instance
	ai_instance = None

	def initialize_ai():
	"""Initialize the global AI instance"""
	global ai_instance

	if ai_instance is None:
	try:
	# Initialize with the data paths
	venues_json = "yerevan_pubs_bars_20250623_193205.json"
	venues_csv = "yerevan_venues_structured.csv"

	# Check if files exist
	import os
	if not os.path.exists(venues_json):
	raise FileNotFoundError(f"Venue JSON file not found: {venues_json}")
	if not os.path.exists(venues_csv):
	raise FileNotFoundError(f"Venue CSV file not found: {venues_csv}")

	logger.info("Creating CompleteYerevanVenueAI instance...")
	ai_instance = CompleteYerevanVenueAI(venues_json, venues_csv)

	logger.info("Initializing venue data...")
	ai_instance.initialize()

	logger.info("Global AI instance initialized successfully")

	except Exception as e:
	logger.error(f"Failed to initialize AI instance: {e}")
	ai_instance = None
	raise e

	return ai_instance

	def get_recommendations(query, min_rating, price_range, max_distance):
	"""Gradio interface function with conversational support"""
	global ai_instance

	if not query.strip():
	return "Please enter a question or venue request."

	# Ensure AI instance is initialized
	if ai_instance is None:
	try:
	initialize_ai()
	except Exception as e:
	logger.error(f"Failed to initialize AI: {e}")
	return f"Sorry, I'm having trouble starting up. Error: {str(e)}"

	# Double check AI instance exists
	if ai_instance is None:
	return "Sorry, the AI system is not available right now. Please try again later."

	try:
	# Get recommendations (handles both conversational and venue queries)
	result = ai_instance.get_enhanced_recommendations(
	user_query=query,
	min_rating=min_rating,
	price_range=price_range,
	max_distance=max_distance
	)

	# Handle conversational responses
	if result.get("response_type") == "conversational":
	return result["conversational_response"]

	# Handle venue recommendations
	elif result.get("response_type") == "venue_recommendation":
	return result["response_text"]

	# Fallback
	else:
	return "I can help you find venues in Yerevan or have a casual conversation. What would you like to know?"

	except Exception as e:
	logger.error(f"Error in get_recommendations: {e}")
	return f"Sorry, I encountered an error: {str(e)}"

	def create_gradio_interface():
	"""Create enhanced Gradio interface with conversational capabilities"""

	with gr.Blocks(
	title=" Yerevan Venue AI Assistant",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	}
	.gr-button-primary {
	background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
	border: none;
	}
	"""
	) as interface:

	gr.Markdown("""
	# Yerevan Venue AI Assistant
	### Your Conversational Guide to Yerevan's Best Venues

	I can help you with:
	- 🍽️ Restaurant & Bar Recommendations - Find the perfect dining spot
	- 🗺️ Location-Based Search - Venues near specific streets or landmarks
	- 💬 Casual Conversation - Ask me anything or just say hello!
	- Bilingual Support - Chat in Armenian or English

	Examples:
	- "Hello! How are you?"
	- "Find me a good pub on Pushkin Street"
	- "բարեր Մաշտոցի մոտ" (bars near Mashtots)
	- "What can you help me with?"
	""")

	with gr.Row():
	with gr.Column(scale=3):
	query_input = gr.Textbox(
	label="💬 Ask me anything or request venue recommendations",
	placeholder="Try: 'Hello!' or 'Find me a restaurant near Opera House' or 'բարեր Պուշկին փողոցում'",
	lines=2
	)

	with gr.Row():
	min_rating = gr.Slider(
	minimum=0, maximum=5, value=3.0, step=0.1,
	label="⭐ Minimum Rating (for venue searches)"
	)
	max_distance = gr.Slider(
	minimum=0.5, maximum=20, value=5.0, step=0.5,
	label="📍 Max Distance (km, for venue searches)"
	)

	price_range = gr.Radio(
	choices=["all", "budget", "mid", "expensive"],
	value="all",
	label="💰 Price Range (for venue searches)"
	)

	search_btn = gr.Button("🔍 Chat / Search", variant="primary", size="lg")

	with gr.Column(scale=2):
	gr.Markdown("""
	### 💡 Tips:
	- Start a conversation: "Hi", "Hello", "How are you?"
	- Ask about me: "What can you do?", "Who are you?"
	- Get venue help: "Find restaurants", "Bars near Opera"
	- Use Armenian: "բարև", "ռեստորան", "բար"
	- Be specific: Include location, cuisine type, or atmosphere

	### 🗺️ Known Locations:
	Pushkin Street, Mashtots Avenue, Saryan Street, Republic Square, Opera House, Cascade, Northern Avenue, Nalbandyan Street
	""")

	output = gr.Textbox(
	label="🤖 AI Response",
	lines=15,
	max_lines=20,
	show_copy_button=True
	)

	# Examples for quick testing
	gr.Examples(
	examples=[
	["Hello! How are you today?"],
	["What can you help me with?"],
	["Find me a good pub with draft beer"],
	["Restaurants near Opera House"],
	["բարև ձեզ, ինչպես եք?"],
	["բարեր Պուշկին փողոցում"],
	["pubs on Nalbandyan street"],
	["Thanks for your help!"]
	],
	inputs=[query_input],
	label="💬 Try these examples:"
	)

	def handle_submit(query, min_rating, price_range, max_distance):
	return get_recommendations(query, min_rating, price_range, max_distance)

	search_btn.click(
	fn=handle_submit,
	inputs=[query_input, min_rating, price_range, max_distance],
	outputs=output
	)

	query_input.submit(
	fn=handle_submit,
	inputs=[query_input, min_rating, price_range, max_distance],
	outputs=output
	)

	return interface

	if __name__ == "__main__":
	print("Launching Yerevan Venue AI Assistant with Conversational Capabilities...")

	# Initialize the AI system
	initialize_ai()

	# Create and launch Gradio interface
	interface = create_gradio_interface()
	interface.launch(
	server_name="0.0.0.0",
	server_port=7861,
	share=True,
	show_error=True
	)