import re import json import math from typing import List, Dict, Optional, Set, Tuple from collections import defaultdict, Counter from geopy.distance import geodesic import logging from datetime import datetime logger = logging.getLogger(__name__) class LightweightRAGEnhancer: def __init__(self): self.geo_landmarks = self._initialize_comprehensive_geo_data() self.synonym_map = self._initialize_smart_synonyms() self.query_patterns = self._initialize_query_patterns() self.user_preferences = defaultdict(float) self.query_history = [] def _initialize_comprehensive_geo_data(self) -> Dict[str, Dict]: return { # Government & Administrative "Republic Square": { "coords": (40.1761, 44.5126), "aliases": ["հանրապետության հրապարակ", "republic", "central square", "main square"], "category": "landmark", "importance": 10, "description": "Central square of Yerevan, heart of the city" }, "Presidential Palace": { "coords": (40.1789, 44.5145), "aliases": ["նախագահական", "presidential", "palace"], "category": "government", "importance": 8, "description": "Official residence of Armenian President" }, "National Assembly": { "coords": (40.1823, 44.5167), "aliases": ["ազգային ժողով", "parliament", "assembly"], "category": "government", "importance": 7, "description": "Armenian Parliament building" }, # Cultural & Historical Sites "Opera House": { "coords": (40.1836, 44.5098), "aliases": ["օպերա", "օպերայի տուն", "opera", "opera house", "spendiaryan"], "category": "cultural", "importance": 10, "description": "Armenian National Opera and Ballet Theatre" }, "Cascade": { "coords": (40.1876, 44.5086), "aliases": ["կասկադ", "cascade complex", "cafesjian", "art center"], "category": "cultural", "importance": 10, "description": "Giant stairway and cultural center with modern art" }, "Matenadaran": { "coords": (40.1901, 44.5167), "aliases": ["մատենադարան", "manuscript repository", "mesrop mashtots"], "category": "cultural", "importance": 9, "description": "Ancient manuscript repository and museum" }, "Blue Mosque": { "coords": (40.1733, 44.5151), "aliases": ["կապույտ մզկիթ", "blue mosque", "gök medrese"], "category": "religious", "importance": 8, "description": "Historic 18th century mosque" }, "Mother Armenia": { "coords": (40.1856, 44.5098), "aliases": ["մայր հայաստան", "mother armenia", "victory park"], "category": "monument", "importance": 9, "description": "Iconic statue overlooking Yerevan" }, "Erebuni Fortress": { "coords": (40.1234, 44.5345), "aliases": ["էրեբունի", "erebuni", "fortress", "ancient yerevan"], "category": "historical", "importance": 8, "description": "Ancient Urartian fortress, birthplace of Yerevan" }, "Saint Gregory Cathedral": { "coords": (40.1756, 44.5089), "aliases": ["սուրբ գրիգոր", "cathedral", "gregory illuminator"], "category": "religious", "importance": 8, "description": "Largest Armenian Apostolic cathedral" }, # Shopping & Commercial "Northern Avenue": { "coords": (40.1792, 44.5146), "aliases": ["հյուսիսային պողոտա", "northern", "pedestrian street"], "category": "shopping", "importance": 9, "description": "Main pedestrian shopping street" }, "Vernissage Market": { "coords": (40.1823, 44.5134), "aliases": ["վերնիսաժ", "vernissage", "flea market", "weekend market"], "category": "shopping", "importance": 8, "description": "Famous weekend arts and crafts market" }, "Dalma Garden Mall": { "coords": (40.1567, 44.4789), "aliases": ["դալմա", "dalma", "mall", "shopping center"], "category": "shopping", "importance": 7, "description": "Large shopping and entertainment complex" }, "Yerevan Mall": { "coords": (40.1934, 44.4823), "aliases": ["yerevan mall", "mall", "shopping"], "category": "shopping", "importance": 7, "description": "Major shopping mall in Yerevan" }, "Rossia Mall": { "coords": (40.1612, 44.4934), "aliases": ["ռոսիա", "rossia", "russia mall"], "category": "shopping", "importance": 6, "description": "Shopping center with various stores" }, "Zvartnots Airport": { "coords": (40.1473, 44.3959), "aliases": ["զվարթնոց", "airport", "international airport"], "category": "transport", "importance": 9, "description": "Main international airport of Armenia" }, "Central Railway Station": { "coords": (40.1567, 44.4912), "aliases": ["երկաթգծային", "train station", "railway"], "category": "transport", "importance": 6, "description": "Main railway station" }, "Kilikia Bus Station": { "coords": (40.1645, 44.4823), "aliases": ["կիլիկիա", "bus station", "central bus"], "category": "transport", "importance": 7, "description": "Central bus terminal" }, "Victory Park": { "coords": (40.1876, 44.5098), "aliases": ["հաղթանակի այգի", "victory", "park", "amusement park"], "category": "park", "importance": 8, "description": "Large park with amusement rides and lake" }, "Lovers Park": { "coords": (40.1823, 44.5089), "aliases": ["սիրահարների այգի", "lovers", "romantic park"], "category": "park", "importance": 7, "description": "Romantic park popular for dates" }, "English Park": { "coords": (40.1789, 44.5178), "aliases": ["անգլիական այգի", "english", "park"], "category": "park", "importance": 6, "description": "Quiet park in city center" }, "Children's Park": { "coords": (40.1845, 44.5134), "aliases": ["երեխաների այգի", "children", "kids park"], "category": "park", "importance": 6, "description": "Family-friendly park with playgrounds" }, "Circular Park": { "coords": (40.1823, 44.5201), "aliases": ["շրջանային այգի", "circular", "round park"], "category": "park", "importance": 5, "description": "Circular park around city center" }, # Universities & Education "American University": { "coords": (40.1934, 44.4912), "aliases": ["ամերիկյան համալսարան", "aua", "american uni"], "category": "education", "importance": 7, "description": "American University of Armenia" }, "Yerevan State University": { "coords": (40.1789, 44.5189), "aliases": ["երևանի պետական", "ysu", "state university"], "category": "education", "importance": 8, "description": "Main state university of Armenia" }, "French University": { "coords": (40.1756, 44.5234), "aliases": ["ֆրանսիական համալսարան", "french uni", "ufar"], "category": "education", "importance": 6, "description": "French University of Armenia" }, "Armenia Marriott": { "coords": (40.1761, 44.5145), "aliases": ["մարիոտ", "marriott", "luxury hotel"], "category": "hotel", "importance": 8, "description": "Luxury hotel on Republic Square" }, "Tufenkian Historic Hotel": { "coords": (40.1789, 44.5156), "aliases": ["տուֆենկյան", "tufenkian", "historic hotel"], "category": "hotel", "importance": 7, "description": "Boutique historic hotel" }, "Grand Hotel Yerevan": { "coords": (40.1823, 44.5123), "aliases": ["գրանդ հոտել", "grand hotel"], "category": "hotel", "importance": 7, "description": "Centrally located grand hotel" }, # Markets & Food "GUM Market": { "coords": (40.1789, 44.5178), "aliases": ["գում", "central market", "covered market"], "category": "market", "importance": 8, "description": "Historic covered market" }, "Pak Shuka": { "coords": (40.1567, 44.5289), "aliases": ["փակ շուկա", "closed market", "weekend market"], "category": "market", "importance": 6, "description": "Large weekend market" }, "Fish Market": { "coords": (40.1634, 44.5167), "aliases": ["ձկան շուկա", "fish", "seafood market"], "category": "market", "importance": 5, "description": "Specialized fish and seafood market" }, # Entertainment & Nightlife Districts "Saryan Street": { "coords": (40.1851, 44.5086), "aliases": ["սարյանի", "saryan", "martiros saryan", "nightlife street"], "category": "district", "importance": 9, "description": "Popular street with bars, restaurants and nightlife" }, "Abovyan Street": { "coords": (40.1776, 44.5146), "aliases": ["աբովյանի", "abovyan", "main street"], "category": "district", "importance": 8, "description": "Historic street with shops and cafes" }, "Tumanyan Street": { "coords": (40.1822, 44.5149), "aliases": ["թումանյանի", "tumanyan", "hovhannes tumanyan"], "category": "district", "importance": 7, "description": "Cultural street with bookstores and cafes" }, # Specific Neighborhoods "Kentron District": { "coords": (40.1792, 44.5146), "aliases": ["կենտրոն", "center", "downtown", "city center"], "category": "district", "importance": 10, "description": "Central district of Yerevan" }, "Arabkir": { "coords": (40.2089, 44.4856), "aliases": ["արաբկիր", "arabkir district"], "category": "district", "importance": 6, "description": "Northern residential district" }, "Avan": { "coords": (40.2156, 44.5489), "aliases": ["ավան", "avan district"], "category": "district", "importance": 5, "description": "Northern district of Yerevan" }, "Erebuni": { "coords": (40.1345, 44.5234), "aliases": ["էրեբունի", "erebuni district"], "category": "district", "importance": 6, "description": "Southern district with historical sites" }, # Sports & Recreation "Republican Stadium": { "coords": (40.1856, 44.5178), "aliases": ["հանրապետական", "stadium", "football stadium"], "category": "sports", "importance": 7, "description": "Main football stadium of Armenia" }, "Karen Demirchyan Complex": { "coords": (40.1923, 44.5089), "aliases": ["դեմիրճյան", "sports complex", "hamalir"], "category": "sports", "importance": 7, "description": "Large sports and concert complex" }, "Tennis Academy": { "coords": (40.1789, 44.4967), "aliases": ["թենիսի ակադեմիա", "tennis", "sports academy"], "category": "sports", "importance": 5, "description": "Professional tennis training facility" }, # Business Centers "Business Center Yerevan": { "coords": (40.1823, 44.5201), "aliases": ["բիզնես կենտրոն", "business center", "office complex"], "category": "business", "importance": 6, "description": "Modern business and office complex" }, "Kentron Business Center": { "coords": (40.1789, 44.5167), "aliases": ["կենտրոն բիզնես", "central business"], "category": "business", "importance": 5, "description": "Central business district offices" }, # Medical Centers "Nairi Medical Center": { "coords": (40.1867, 44.5123), "aliases": ["նաիրի բժշկական", "nairi", "medical center"], "category": "medical", "importance": 6, "description": "Major private medical facility" }, "Surb Grigor Hospital": { "coords": (40.1756, 44.5201), "aliases": ["սուրբ գրիգոր", "hospital", "medical"], "category": "medical", "importance": 6, "description": "Major hospital in Yerevan" }, # Additional Landmarks "Swan Lake": { "coords": (40.1837, 44.5135), "aliases": ["կարապի լիճ", "swan lake", "lake"], "category": "landmark", "importance": 7, "description": "Artificial lake in city center" }, "Freedom Square": { "coords": (40.1834, 44.5089), "aliases": ["ազատության հրապարակ", "freedom", "liberty square"], "category": "landmark", "importance": 7, "description": "Historic square near Opera House" }, "Charles Aznavour Square": { "coords": (40.1845, 44.5101), "aliases": ["ազնավուր", "aznavour", "charles aznavour"], "category": "landmark", "importance": 6, "description": "Square dedicated to famous Armenian-French singer" } } def _initialize_smart_synonyms(self) -> Dict[str, Set[str]]: """Initialize smart synonym mapping for better search""" return { # Venue types "pub": {"bar", "tavern", "brewpub", "beerhouse", "ale house", "պաբ", "փաբ"}, "bar": {"pub", "lounge", "cocktail bar", "wine bar", "բար", "բառ"}, "restaurant": {"dining", "eatery", "bistro", "cafe", "ռեստորան"}, "cafe": {"coffee shop", "coffeehouse", "bistro", "սրճարան"}, "club": {"nightclub", "disco", "dance club", "ակումբ"}, "hookah": {"shisha", "waterpipe", "հուկա", "նարգիլե"}, # Food & Drink "beer": {"ale", "lager", "draft", "tap", "brew", "գարեջուր"}, "draft": {"tap", "on tap", "draught", "fresh beer"}, "craft": {"artisan", "microbrewery", "specialty", "handcrafted"}, "cocktail": {"mixed drink", "martini", "mojito", "կոկտեյլ"}, "wine": {"vino", "vintage", "grape", "գինի"}, "coffee": {"espresso", "cappuccino", "latte", "սուրճ"}, # Atmosphere "romantic": {"intimate", "cozy", "date night", "couples"}, "lively": {"energetic", "vibrant", "busy", "active"}, "quiet": {"peaceful", "calm", "relaxed", "tranquil"}, "outdoor": {"terrace", "patio", "garden", "rooftop"}, # Location terms "near": {"close to", "by", "next to", "around", "մոտ", "կողքին"}, "center": {"central", "downtown", "middle", "կենտրոն"}, "walking": {"on foot", "pedestrian", "walk", "քայլելով"}, # Quality descriptors "best": {"top", "excellent", "finest", "premium", "լավագույն"}, "good": {"nice", "decent", "quality", "լավ"}, "cheap": {"affordable", "budget", "inexpensive", "էժան"}, "expensive": {"pricey", "upscale", "luxury", "թանկ"} } def _initialize_query_patterns(self) -> Dict[str, str]: """Initialize common query patterns for better understanding""" return { r"near|close to|by|next to|around|մոտ|կողքին": "proximity", r"best|top|finest|excellent|լավագույն": "quality_high", r"cheap|affordable|budget|էժան": "price_low", r"expensive|upscale|luxury|թանկ": "price_high", r"walking distance|walk|on foot|քայլելով": "walking", r"romantic|date|intimate|ռոմանտիկ": "romantic", r"group|friends|party|խումբ": "social", r"quiet|peaceful|calm|հանգիստ": "quiet", r"lively|busy|energetic|կենդանի": "lively", r"outdoor|terrace|patio|բացօթյա": "outdoor" } def enhance_query(self, query: str) -> Dict[str, any]: """ Enhance query with expanded terms, geo context, and smart scoring """ enhanced_data = { "original_query": query, "expanded_terms": self._expand_query_terms(query), "geo_context": self._extract_geo_context(query), "query_intent": self._analyze_query_intent(query), "scoring_weights": self._calculate_scoring_weights(query), "search_radius": self._determine_search_radius(query) } # Learn from user query patterns self._update_user_preferences(query, enhanced_data) return enhanced_data def _expand_query_terms(self, query: str) -> List[str]: """Expand query with synonyms and related terms""" query_lower = query.lower() expanded = set([query_lower]) # Add synonyms for term, synonyms in self.synonym_map.items(): if term in query_lower: expanded.update(synonyms) # Add partial matches for synonym in synonyms: if len(synonym) > 3: # Avoid very short terms expanded.add(synonym) # Add morphological variations (simple stemming) words = query_lower.split() for word in words: if len(word) > 4: # Add common endings expanded.add(word + "s") expanded.add(word + "ing") if word.endswith("s"): expanded.add(word[:-1]) if word.endswith("ing"): expanded.add(word[:-3]) return list(expanded) def _extract_geo_context(self, query: str) -> Dict[str, any]: """Extract geographical context from query""" query_lower = query.lower() geo_context = { "landmarks": [], "proximity_terms": [], "radius_hints": 1.0, # Default 1km "coordinates": None } # Find mentioned landmarks for landmark, data in self.geo_landmarks.items(): landmark_lower = landmark.lower() if landmark_lower in query_lower: geo_context["landmarks"].append({ "name": landmark, "coords": data["coords"], "importance": data["importance"], "category": data["category"] }) continue # Check aliases for alias in data["aliases"]: if alias.lower() in query_lower: geo_context["landmarks"].append({ "name": landmark, "coords": data["coords"], "importance": data["importance"], "category": data["category"] }) break # Extract proximity terms proximity_patterns = [ r"within (\d+)\s*(km|kilometers|miles?)", r"(\d+)\s*(km|kilometers|miles?) (from|of|near)", r"close to|near|by|next to|around|մոտ|կողքին" ] for pattern in proximity_patterns: matches = re.findall(pattern, query_lower) if matches: geo_context["proximity_terms"].extend(matches) # Extract radius if specified for match in matches: if isinstance(match, tuple) and len(match) >= 2: try: radius = float(match[0]) unit = match[1].lower() if "mile" in unit: radius *= 1.609 # Convert to km geo_context["radius_hints"] = radius except (ValueError, IndexError): pass # Set primary coordinate if landmark found if geo_context["landmarks"]: # Use highest importance landmark as primary primary = max(geo_context["landmarks"], key=lambda x: x["importance"]) geo_context["coordinates"] = primary["coords"] return geo_context def _analyze_query_intent(self, query: str) -> Dict[str, float]: """Analyze query intent with confidence scores""" intent_scores = defaultdict(float) query_lower = query.lower() for pattern, intent in self.query_patterns.items(): if re.search(pattern, query_lower, re.IGNORECASE): intent_scores[intent] += 1.0 # Normalize scores if intent_scores: max_score = max(intent_scores.values()) for intent in intent_scores: intent_scores[intent] /= max_score return dict(intent_scores) def _calculate_scoring_weights(self, query: str) -> Dict[str, float]: """Calculate dynamic scoring weights based on query""" weights = { "name_match": 1.0, "category_match": 1.0, "summary_match": 1.0, "location_match": 1.0, "rating_boost": 1.0, "distance_penalty": 1.0 } query_lower = query.lower() # Boost location matching for geo queries geo_terms = ["near", "close", "by", "walking", "distance", "մոտ", "կողքին"] if any(term in query_lower for term in geo_terms): weights["location_match"] = 2.0 weights["distance_penalty"] = 1.5 # Boost name matching for specific venue searches if len(query.split()) <= 3 and not any(term in query_lower for term in geo_terms): weights["name_match"] = 2.0 # Boost category for type-specific searches category_terms = ["pub", "bar", "restaurant", "cafe", "club"] if any(term in query_lower for term in category_terms): weights["category_match"] = 1.5 # Boost rating for quality searches quality_terms = ["best", "top", "excellent", "good", "լավագույն"] if any(term in query_lower for term in quality_terms): weights["rating_boost"] = 1.5 return weights def _determine_search_radius(self, query: str) -> float: """Determine appropriate search radius based on query""" query_lower = query.lower() # Walking distance queries if any(term in query_lower for term in ["walk", "walking", "on foot", "քայլելով"]): return 0.5 # 500m # Neighborhood queries if any(term in query_lower for term in ["neighborhood", "area", "district", "թաղամաս"]): return 2.0 # 2km # City-wide queries if any(term in query_lower for term in ["yerevan", "city", "երևան", "քաղաք"]): return 10.0 # 10km # Default radius return 1.5 # 1.5km def _update_user_preferences(self, query: str, enhanced_data: Dict): """Learn from user query patterns (lightweight learning)""" self.query_history.append({ "query": query, "timestamp": len(self.query_history), # Simple timestamp "geo_context": enhanced_data["geo_context"], "intent": enhanced_data["query_intent"] }) # Keep only recent history (memory efficient) if len(self.query_history) > 100: self.query_history = self.query_history[-50:] # Update preferences based on patterns for intent, score in enhanced_data["query_intent"].items(): self.user_preferences[intent] += score * 0.1 # Small learning rate def calculate_enhanced_score(self, venue: Dict, enhanced_query: Dict) -> Tuple[float, Dict]: """ Calculate enhanced relevance score with explanation """ score = 0.0 explanation = { "name_match": 0, "category_match": 0, "summary_match": 0, "location_match": 0, "rating_boost": 0, "distance_penalty": 0, "total": 0 } venue_name = venue.get('name', '').lower() venue_category = venue.get('category', '').lower() venue_summary = venue.get('summary', '').lower() weights = enhanced_query["scoring_weights"] expanded_terms = enhanced_query["expanded_terms"] # Name matching with expanded terms name_score = 0 for term in expanded_terms: if term in venue_name: name_score += 5 explanation["name_match"] = name_score * weights["name_match"] score += explanation["name_match"] # Category matching category_score = 0 for term in expanded_terms: if term in venue_category: category_score += 3 explanation["category_match"] = category_score * weights["category_match"] score += explanation["category_match"] # Summary matching (enhanced with TF-IDF-like scoring) summary_score = 0 summary_words = venue_summary.split() for term in expanded_terms: term_count = summary_words.count(term) if term_count > 0: # TF-IDF-like: more points for rare terms term_weight = min(3.0, 1.0 / max(1, term_count * 0.1)) summary_score += term_count * term_weight explanation["summary_match"] = summary_score * weights["summary_match"] score += explanation["summary_match"] # Location/proximity scoring geo_context = enhanced_query["geo_context"] location_score = 0 distance_penalty = 0 if geo_context["coordinates"] and venue.get('latitude') and venue.get('longitude'): venue_coords = (venue['latitude'], venue['longitude']) distance = geodesic(geo_context["coordinates"], venue_coords).kilometers search_radius = enhanced_query["search_radius"] if distance <= search_radius: # Closer venues get higher scores location_score = max(0, 10 * (1 - distance / search_radius)) # Apply distance penalty for very far venues if distance > search_radius * 0.5: distance_penalty = (distance - search_radius * 0.5) * 2 explanation["location_match"] = location_score * weights["location_match"] explanation["distance_penalty"] = distance_penalty * weights["distance_penalty"] score += explanation["location_match"] score -= explanation["distance_penalty"] # Rating boost rating = venue.get('rating', 0) if rating is not None: try: rating = float(rating) if rating > 0: rating_boost = (rating - 3.0) * 2 # Boost for ratings above 3.0 explanation["rating_boost"] = max(0, rating_boost * weights["rating_boost"]) score += explanation["rating_boost"] except (ValueError, TypeError): rating = 0 explanation["total"] = score return max(0, score), explanation def get_search_explanation(self, query: str, top_venues: List[Tuple[Dict, float, Dict]]) -> str: """Generate human-readable explanation of search results""" if not top_venues: return "No venues found matching your criteria." explanations = [] explanations.append(f"🔍 Search results for: '{query}'\n") for i, (venue, score, details) in enumerate(top_venues[:3], 1): venue_name = venue.get('name', 'Unknown') rating = venue.get('rating', 'N/A') explanation_parts = [] if details['name_match'] > 0: explanation_parts.append(f"name match ({details['name_match']:.1f})") if details['category_match'] > 0: explanation_parts.append(f"category match ({details['category_match']:.1f})") if details['summary_match'] > 0: explanation_parts.append(f"content match ({details['summary_match']:.1f})") if details['location_match'] > 0: explanation_parts.append(f"location match ({details['location_match']:.1f})") if details['rating_boost'] > 0: explanation_parts.append(f"high rating ({rating}⭐)") explanation_text = ", ".join(explanation_parts) if explanation_parts else "general match" explanations.append(f"{i}. **{venue_name}** (Score: {score:.1f}) - {explanation_text}") return "\n".join(explanations) # Integration with your existing system def integrate_lightweight_rag(venue_ai_instance): """Add lightweight RAG to your existing CompleteYerevanVenueAI""" # Add the enhancer venue_ai_instance.rag_enhancer = LightweightRAGEnhancer() # Modify the existing search method original_search = venue_ai_instance._smart_venue_search def enhanced_smart_search(query, top_k=20): # Get initial results from your existing method initial_results = original_search(query, top_k * 2) # Get more for reranking # Apply lightweight RAG enhancement enhanced_results = venue_ai_instance.rag_enhancer.enhance_search( query, initial_results, top_k ) return enhanced_results # Replace the method venue_ai_instance._smart_venue_search = enhanced_smart_search return venue_ai_instance # Example usage if __name__ == "__main__": # Test the lightweight RAG enhancer = LightweightRAGEnhancer() # Mock venue data test_venues = [ { 'name': 'Dargett Craft Beer', 'category': 'pub', 'summary': 'Armenia\'s first craft brewery offering artisanal beers on tap', 'rating': 4.6, 'address': '72 Arami Street' }, { 'name': 'Coffee Central', 'category': 'cafe', 'summary': 'Cozy coffee shop with outdoor seating', 'rating': 4.2, 'address': '15 Mashtots Avenue' } ] # Test search results = enhancer.enhance_search("find craft beer pub", test_venues) for result in results: print(f"Venue: {result['name']}") print(f"Score: {result['enhanced_score']:.2f}") print(f"Reasons: {', '.join(result['match_reasons'])}") print("-" * 30)