Spaces:
Sleeping
Sleeping
| import re | |
| import json | |
| import math | |
| from typing import List, Dict, Optional, Set, Tuple | |
| from collections import defaultdict, Counter | |
| from geopy.distance import geodesic | |
| import logging | |
| from datetime import datetime | |
| logger = logging.getLogger(__name__) | |
| class LightweightRAGEnhancer: | |
| def __init__(self): | |
| self.geo_landmarks = self._initialize_comprehensive_geo_data() | |
| self.synonym_map = self._initialize_smart_synonyms() | |
| self.query_patterns = self._initialize_query_patterns() | |
| self.user_preferences = defaultdict(float) | |
| self.query_history = [] | |
| def _initialize_comprehensive_geo_data(self) -> Dict[str, Dict]: | |
| return { | |
| # Government & Administrative | |
| "Republic Square": { | |
| "coords": (40.1761, 44.5126), | |
| "aliases": ["հանրապետության հրապարակ", "republic", "central square", "main square"], | |
| "category": "landmark", | |
| "importance": 10, | |
| "description": "Central square of Yerevan, heart of the city" | |
| }, | |
| "Presidential Palace": { | |
| "coords": (40.1789, 44.5145), | |
| "aliases": ["նախագահական", "presidential", "palace"], | |
| "category": "government", | |
| "importance": 8, | |
| "description": "Official residence of Armenian President" | |
| }, | |
| "National Assembly": { | |
| "coords": (40.1823, 44.5167), | |
| "aliases": ["ազգային ժողով", "parliament", "assembly"], | |
| "category": "government", | |
| "importance": 7, | |
| "description": "Armenian Parliament building" | |
| }, | |
| # Cultural & Historical Sites | |
| "Opera House": { | |
| "coords": (40.1836, 44.5098), | |
| "aliases": ["օպերա", "օպերայի տուն", "opera", "opera house", "spendiaryan"], | |
| "category": "cultural", | |
| "importance": 10, | |
| "description": "Armenian National Opera and Ballet Theatre" | |
| }, | |
| "Cascade": { | |
| "coords": (40.1876, 44.5086), | |
| "aliases": ["կասկադ", "cascade complex", "cafesjian", "art center"], | |
| "category": "cultural", | |
| "importance": 10, | |
| "description": "Giant stairway and cultural center with modern art" | |
| }, | |
| "Matenadaran": { | |
| "coords": (40.1901, 44.5167), | |
| "aliases": ["մատենադարան", "manuscript repository", "mesrop mashtots"], | |
| "category": "cultural", | |
| "importance": 9, | |
| "description": "Ancient manuscript repository and museum" | |
| }, | |
| "Blue Mosque": { | |
| "coords": (40.1733, 44.5151), | |
| "aliases": ["կապույտ մզկիթ", "blue mosque", "gök medrese"], | |
| "category": "religious", | |
| "importance": 8, | |
| "description": "Historic 18th century mosque" | |
| }, | |
| "Mother Armenia": { | |
| "coords": (40.1856, 44.5098), | |
| "aliases": ["մայր հայաստան", "mother armenia", "victory park"], | |
| "category": "monument", | |
| "importance": 9, | |
| "description": "Iconic statue overlooking Yerevan" | |
| }, | |
| "Erebuni Fortress": { | |
| "coords": (40.1234, 44.5345), | |
| "aliases": ["էրեբունի", "erebuni", "fortress", "ancient yerevan"], | |
| "category": "historical", | |
| "importance": 8, | |
| "description": "Ancient Urartian fortress, birthplace of Yerevan" | |
| }, | |
| "Saint Gregory Cathedral": { | |
| "coords": (40.1756, 44.5089), | |
| "aliases": ["սուրբ գրիգոր", "cathedral", "gregory illuminator"], | |
| "category": "religious", | |
| "importance": 8, | |
| "description": "Largest Armenian Apostolic cathedral" | |
| }, | |
| # Shopping & Commercial | |
| "Northern Avenue": { | |
| "coords": (40.1792, 44.5146), | |
| "aliases": ["հյուսիսային պողոտա", "northern", "pedestrian street"], | |
| "category": "shopping", | |
| "importance": 9, | |
| "description": "Main pedestrian shopping street" | |
| }, | |
| "Vernissage Market": { | |
| "coords": (40.1823, 44.5134), | |
| "aliases": ["վերնիսաժ", "vernissage", "flea market", "weekend market"], | |
| "category": "shopping", | |
| "importance": 8, | |
| "description": "Famous weekend arts and crafts market" | |
| }, | |
| "Dalma Garden Mall": { | |
| "coords": (40.1567, 44.4789), | |
| "aliases": ["դալմա", "dalma", "mall", "shopping center"], | |
| "category": "shopping", | |
| "importance": 7, | |
| "description": "Large shopping and entertainment complex" | |
| }, | |
| "Yerevan Mall": { | |
| "coords": (40.1934, 44.4823), | |
| "aliases": ["yerevan mall", "mall", "shopping"], | |
| "category": "shopping", | |
| "importance": 7, | |
| "description": "Major shopping mall in Yerevan" | |
| }, | |
| "Rossia Mall": { | |
| "coords": (40.1612, 44.4934), | |
| "aliases": ["ռոսիա", "rossia", "russia mall"], | |
| "category": "shopping", | |
| "importance": 6, | |
| "description": "Shopping center with various stores" | |
| }, | |
| "Zvartnots Airport": { | |
| "coords": (40.1473, 44.3959), | |
| "aliases": ["զվարթնոց", "airport", "international airport"], | |
| "category": "transport", | |
| "importance": 9, | |
| "description": "Main international airport of Armenia" | |
| }, | |
| "Central Railway Station": { | |
| "coords": (40.1567, 44.4912), | |
| "aliases": ["երկաթգծային", "train station", "railway"], | |
| "category": "transport", | |
| "importance": 6, | |
| "description": "Main railway station" | |
| }, | |
| "Kilikia Bus Station": { | |
| "coords": (40.1645, 44.4823), | |
| "aliases": ["կիլիկիա", "bus station", "central bus"], | |
| "category": "transport", | |
| "importance": 7, | |
| "description": "Central bus terminal" | |
| }, | |
| "Victory Park": { | |
| "coords": (40.1876, 44.5098), | |
| "aliases": ["հաղթանակի այգի", "victory", "park", "amusement park"], | |
| "category": "park", | |
| "importance": 8, | |
| "description": "Large park with amusement rides and lake" | |
| }, | |
| "Lovers Park": { | |
| "coords": (40.1823, 44.5089), | |
| "aliases": ["սիրահարների այգի", "lovers", "romantic park"], | |
| "category": "park", | |
| "importance": 7, | |
| "description": "Romantic park popular for dates" | |
| }, | |
| "English Park": { | |
| "coords": (40.1789, 44.5178), | |
| "aliases": ["անգլիական այգի", "english", "park"], | |
| "category": "park", | |
| "importance": 6, | |
| "description": "Quiet park in city center" | |
| }, | |
| "Children's Park": { | |
| "coords": (40.1845, 44.5134), | |
| "aliases": ["երեխաների այգի", "children", "kids park"], | |
| "category": "park", | |
| "importance": 6, | |
| "description": "Family-friendly park with playgrounds" | |
| }, | |
| "Circular Park": { | |
| "coords": (40.1823, 44.5201), | |
| "aliases": ["շրջանային այգի", "circular", "round park"], | |
| "category": "park", | |
| "importance": 5, | |
| "description": "Circular park around city center" | |
| }, | |
| # Universities & Education | |
| "American University": { | |
| "coords": (40.1934, 44.4912), | |
| "aliases": ["ամերիկյան համալսարան", "aua", "american uni"], | |
| "category": "education", | |
| "importance": 7, | |
| "description": "American University of Armenia" | |
| }, | |
| "Yerevan State University": { | |
| "coords": (40.1789, 44.5189), | |
| "aliases": ["երևանի պետական", "ysu", "state university"], | |
| "category": "education", | |
| "importance": 8, | |
| "description": "Main state university of Armenia" | |
| }, | |
| "French University": { | |
| "coords": (40.1756, 44.5234), | |
| "aliases": ["ֆրանսիական համալսարան", "french uni", "ufar"], | |
| "category": "education", | |
| "importance": 6, | |
| "description": "French University of Armenia" | |
| }, | |
| "Armenia Marriott": { | |
| "coords": (40.1761, 44.5145), | |
| "aliases": ["մարիոտ", "marriott", "luxury hotel"], | |
| "category": "hotel", | |
| "importance": 8, | |
| "description": "Luxury hotel on Republic Square" | |
| }, | |
| "Tufenkian Historic Hotel": { | |
| "coords": (40.1789, 44.5156), | |
| "aliases": ["տուֆենկյան", "tufenkian", "historic hotel"], | |
| "category": "hotel", | |
| "importance": 7, | |
| "description": "Boutique historic hotel" | |
| }, | |
| "Grand Hotel Yerevan": { | |
| "coords": (40.1823, 44.5123), | |
| "aliases": ["գրանդ հոտել", "grand hotel"], | |
| "category": "hotel", | |
| "importance": 7, | |
| "description": "Centrally located grand hotel" | |
| }, | |
| # Markets & Food | |
| "GUM Market": { | |
| "coords": (40.1789, 44.5178), | |
| "aliases": ["գում", "central market", "covered market"], | |
| "category": "market", | |
| "importance": 8, | |
| "description": "Historic covered market" | |
| }, | |
| "Pak Shuka": { | |
| "coords": (40.1567, 44.5289), | |
| "aliases": ["փակ շուկա", "closed market", "weekend market"], | |
| "category": "market", | |
| "importance": 6, | |
| "description": "Large weekend market" | |
| }, | |
| "Fish Market": { | |
| "coords": (40.1634, 44.5167), | |
| "aliases": ["ձկան շուկա", "fish", "seafood market"], | |
| "category": "market", | |
| "importance": 5, | |
| "description": "Specialized fish and seafood market" | |
| }, | |
| # Entertainment & Nightlife Districts | |
| "Saryan Street": { | |
| "coords": (40.1851, 44.5086), | |
| "aliases": ["սարյանի", "saryan", "martiros saryan", "nightlife street"], | |
| "category": "district", | |
| "importance": 9, | |
| "description": "Popular street with bars, restaurants and nightlife" | |
| }, | |
| "Abovyan Street": { | |
| "coords": (40.1776, 44.5146), | |
| "aliases": ["աբովյանի", "abovyan", "main street"], | |
| "category": "district", | |
| "importance": 8, | |
| "description": "Historic street with shops and cafes" | |
| }, | |
| "Tumanyan Street": { | |
| "coords": (40.1822, 44.5149), | |
| "aliases": ["թումանյանի", "tumanyan", "hovhannes tumanyan"], | |
| "category": "district", | |
| "importance": 7, | |
| "description": "Cultural street with bookstores and cafes" | |
| }, | |
| # Specific Neighborhoods | |
| "Kentron District": { | |
| "coords": (40.1792, 44.5146), | |
| "aliases": ["կենտրոն", "center", "downtown", "city center"], | |
| "category": "district", | |
| "importance": 10, | |
| "description": "Central district of Yerevan" | |
| }, | |
| "Arabkir": { | |
| "coords": (40.2089, 44.4856), | |
| "aliases": ["արաբկիր", "arabkir district"], | |
| "category": "district", | |
| "importance": 6, | |
| "description": "Northern residential district" | |
| }, | |
| "Avan": { | |
| "coords": (40.2156, 44.5489), | |
| "aliases": ["ավան", "avan district"], | |
| "category": "district", | |
| "importance": 5, | |
| "description": "Northern district of Yerevan" | |
| }, | |
| "Erebuni": { | |
| "coords": (40.1345, 44.5234), | |
| "aliases": ["էրեբունի", "erebuni district"], | |
| "category": "district", | |
| "importance": 6, | |
| "description": "Southern district with historical sites" | |
| }, | |
| # Sports & Recreation | |
| "Republican Stadium": { | |
| "coords": (40.1856, 44.5178), | |
| "aliases": ["հանրապետական", "stadium", "football stadium"], | |
| "category": "sports", | |
| "importance": 7, | |
| "description": "Main football stadium of Armenia" | |
| }, | |
| "Karen Demirchyan Complex": { | |
| "coords": (40.1923, 44.5089), | |
| "aliases": ["դեմիրճյան", "sports complex", "hamalir"], | |
| "category": "sports", | |
| "importance": 7, | |
| "description": "Large sports and concert complex" | |
| }, | |
| "Tennis Academy": { | |
| "coords": (40.1789, 44.4967), | |
| "aliases": ["թենիսի ակադեմիա", "tennis", "sports academy"], | |
| "category": "sports", | |
| "importance": 5, | |
| "description": "Professional tennis training facility" | |
| }, | |
| # Business Centers | |
| "Business Center Yerevan": { | |
| "coords": (40.1823, 44.5201), | |
| "aliases": ["բիզնես կենտրոն", "business center", "office complex"], | |
| "category": "business", | |
| "importance": 6, | |
| "description": "Modern business and office complex" | |
| }, | |
| "Kentron Business Center": { | |
| "coords": (40.1789, 44.5167), | |
| "aliases": ["կենտրոն բիզնես", "central business"], | |
| "category": "business", | |
| "importance": 5, | |
| "description": "Central business district offices" | |
| }, | |
| # Medical Centers | |
| "Nairi Medical Center": { | |
| "coords": (40.1867, 44.5123), | |
| "aliases": ["նաիրի բժշկական", "nairi", "medical center"], | |
| "category": "medical", | |
| "importance": 6, | |
| "description": "Major private medical facility" | |
| }, | |
| "Surb Grigor Hospital": { | |
| "coords": (40.1756, 44.5201), | |
| "aliases": ["սուրբ գրիգոր", "hospital", "medical"], | |
| "category": "medical", | |
| "importance": 6, | |
| "description": "Major hospital in Yerevan" | |
| }, | |
| # Additional Landmarks | |
| "Swan Lake": { | |
| "coords": (40.1837, 44.5135), | |
| "aliases": ["կարապի լիճ", "swan lake", "lake"], | |
| "category": "landmark", | |
| "importance": 7, | |
| "description": "Artificial lake in city center" | |
| }, | |
| "Freedom Square": { | |
| "coords": (40.1834, 44.5089), | |
| "aliases": ["ազատության հրապարակ", "freedom", "liberty square"], | |
| "category": "landmark", | |
| "importance": 7, | |
| "description": "Historic square near Opera House" | |
| }, | |
| "Charles Aznavour Square": { | |
| "coords": (40.1845, 44.5101), | |
| "aliases": ["ազնավուր", "aznavour", "charles aznavour"], | |
| "category": "landmark", | |
| "importance": 6, | |
| "description": "Square dedicated to famous Armenian-French singer" | |
| } | |
| } | |
| def _initialize_smart_synonyms(self) -> Dict[str, Set[str]]: | |
| """Initialize smart synonym mapping for better search""" | |
| return { | |
| # Venue types | |
| "pub": {"bar", "tavern", "brewpub", "beerhouse", "ale house", "պաբ", "փաբ"}, | |
| "bar": {"pub", "lounge", "cocktail bar", "wine bar", "բար", "բառ"}, | |
| "restaurant": {"dining", "eatery", "bistro", "cafe", "ռեստորան"}, | |
| "cafe": {"coffee shop", "coffeehouse", "bistro", "սրճարան"}, | |
| "club": {"nightclub", "disco", "dance club", "ակումբ"}, | |
| "hookah": {"shisha", "waterpipe", "հուկա", "նարգիլե"}, | |
| # Food & Drink | |
| "beer": {"ale", "lager", "draft", "tap", "brew", "գարեջուր"}, | |
| "draft": {"tap", "on tap", "draught", "fresh beer"}, | |
| "craft": {"artisan", "microbrewery", "specialty", "handcrafted"}, | |
| "cocktail": {"mixed drink", "martini", "mojito", "կոկտեյլ"}, | |
| "wine": {"vino", "vintage", "grape", "գինի"}, | |
| "coffee": {"espresso", "cappuccino", "latte", "սուրճ"}, | |
| # Atmosphere | |
| "romantic": {"intimate", "cozy", "date night", "couples"}, | |
| "lively": {"energetic", "vibrant", "busy", "active"}, | |
| "quiet": {"peaceful", "calm", "relaxed", "tranquil"}, | |
| "outdoor": {"terrace", "patio", "garden", "rooftop"}, | |
| # Location terms | |
| "near": {"close to", "by", "next to", "around", "մոտ", "կողքին"}, | |
| "center": {"central", "downtown", "middle", "կենտրոն"}, | |
| "walking": {"on foot", "pedestrian", "walk", "քայլելով"}, | |
| # Quality descriptors | |
| "best": {"top", "excellent", "finest", "premium", "լավագույն"}, | |
| "good": {"nice", "decent", "quality", "լավ"}, | |
| "cheap": {"affordable", "budget", "inexpensive", "էժան"}, | |
| "expensive": {"pricey", "upscale", "luxury", "թանկ"} | |
| } | |
| def _initialize_query_patterns(self) -> Dict[str, str]: | |
| """Initialize common query patterns for better understanding""" | |
| return { | |
| r"near|close to|by|next to|around|մոտ|կողքին": "proximity", | |
| r"best|top|finest|excellent|լավագույն": "quality_high", | |
| r"cheap|affordable|budget|էժան": "price_low", | |
| r"expensive|upscale|luxury|թանկ": "price_high", | |
| r"walking distance|walk|on foot|քայլելով": "walking", | |
| r"romantic|date|intimate|ռոմանտիկ": "romantic", | |
| r"group|friends|party|խումբ": "social", | |
| r"quiet|peaceful|calm|հանգիստ": "quiet", | |
| r"lively|busy|energetic|կենդանի": "lively", | |
| r"outdoor|terrace|patio|բացօթյա": "outdoor" | |
| } | |
| def enhance_query(self, query: str) -> Dict[str, any]: | |
| """ | |
| Enhance query with expanded terms, geo context, and smart scoring | |
| """ | |
| enhanced_data = { | |
| "original_query": query, | |
| "expanded_terms": self._expand_query_terms(query), | |
| "geo_context": self._extract_geo_context(query), | |
| "query_intent": self._analyze_query_intent(query), | |
| "scoring_weights": self._calculate_scoring_weights(query), | |
| "search_radius": self._determine_search_radius(query) | |
| } | |
| # Learn from user query patterns | |
| self._update_user_preferences(query, enhanced_data) | |
| return enhanced_data | |
| def _expand_query_terms(self, query: str) -> List[str]: | |
| """Expand query with synonyms and related terms""" | |
| query_lower = query.lower() | |
| expanded = set([query_lower]) | |
| # Add synonyms | |
| for term, synonyms in self.synonym_map.items(): | |
| if term in query_lower: | |
| expanded.update(synonyms) | |
| # Add partial matches | |
| for synonym in synonyms: | |
| if len(synonym) > 3: # Avoid very short terms | |
| expanded.add(synonym) | |
| # Add morphological variations (simple stemming) | |
| words = query_lower.split() | |
| for word in words: | |
| if len(word) > 4: | |
| # Add common endings | |
| expanded.add(word + "s") | |
| expanded.add(word + "ing") | |
| if word.endswith("s"): | |
| expanded.add(word[:-1]) | |
| if word.endswith("ing"): | |
| expanded.add(word[:-3]) | |
| return list(expanded) | |
| def _extract_geo_context(self, query: str) -> Dict[str, any]: | |
| """Extract geographical context from query""" | |
| query_lower = query.lower() | |
| geo_context = { | |
| "landmarks": [], | |
| "proximity_terms": [], | |
| "radius_hints": 1.0, # Default 1km | |
| "coordinates": None | |
| } | |
| # Find mentioned landmarks | |
| for landmark, data in self.geo_landmarks.items(): | |
| landmark_lower = landmark.lower() | |
| if landmark_lower in query_lower: | |
| geo_context["landmarks"].append({ | |
| "name": landmark, | |
| "coords": data["coords"], | |
| "importance": data["importance"], | |
| "category": data["category"] | |
| }) | |
| continue | |
| # Check aliases | |
| for alias in data["aliases"]: | |
| if alias.lower() in query_lower: | |
| geo_context["landmarks"].append({ | |
| "name": landmark, | |
| "coords": data["coords"], | |
| "importance": data["importance"], | |
| "category": data["category"] | |
| }) | |
| break | |
| # Extract proximity terms | |
| proximity_patterns = [ | |
| r"within (\d+)\s*(km|kilometers|miles?)", | |
| r"(\d+)\s*(km|kilometers|miles?) (from|of|near)", | |
| r"close to|near|by|next to|around|մոտ|կողքին" | |
| ] | |
| for pattern in proximity_patterns: | |
| matches = re.findall(pattern, query_lower) | |
| if matches: | |
| geo_context["proximity_terms"].extend(matches) | |
| # Extract radius if specified | |
| for match in matches: | |
| if isinstance(match, tuple) and len(match) >= 2: | |
| try: | |
| radius = float(match[0]) | |
| unit = match[1].lower() | |
| if "mile" in unit: | |
| radius *= 1.609 # Convert to km | |
| geo_context["radius_hints"] = radius | |
| except (ValueError, IndexError): | |
| pass | |
| # Set primary coordinate if landmark found | |
| if geo_context["landmarks"]: | |
| # Use highest importance landmark as primary | |
| primary = max(geo_context["landmarks"], key=lambda x: x["importance"]) | |
| geo_context["coordinates"] = primary["coords"] | |
| return geo_context | |
| def _analyze_query_intent(self, query: str) -> Dict[str, float]: | |
| """Analyze query intent with confidence scores""" | |
| intent_scores = defaultdict(float) | |
| query_lower = query.lower() | |
| for pattern, intent in self.query_patterns.items(): | |
| if re.search(pattern, query_lower, re.IGNORECASE): | |
| intent_scores[intent] += 1.0 | |
| # Normalize scores | |
| if intent_scores: | |
| max_score = max(intent_scores.values()) | |
| for intent in intent_scores: | |
| intent_scores[intent] /= max_score | |
| return dict(intent_scores) | |
| def _calculate_scoring_weights(self, query: str) -> Dict[str, float]: | |
| """Calculate dynamic scoring weights based on query""" | |
| weights = { | |
| "name_match": 1.0, | |
| "category_match": 1.0, | |
| "summary_match": 1.0, | |
| "location_match": 1.0, | |
| "rating_boost": 1.0, | |
| "distance_penalty": 1.0 | |
| } | |
| query_lower = query.lower() | |
| # Boost location matching for geo queries | |
| geo_terms = ["near", "close", "by", "walking", "distance", "մոտ", "կողքին"] | |
| if any(term in query_lower for term in geo_terms): | |
| weights["location_match"] = 2.0 | |
| weights["distance_penalty"] = 1.5 | |
| # Boost name matching for specific venue searches | |
| if len(query.split()) <= 3 and not any(term in query_lower for term in geo_terms): | |
| weights["name_match"] = 2.0 | |
| # Boost category for type-specific searches | |
| category_terms = ["pub", "bar", "restaurant", "cafe", "club"] | |
| if any(term in query_lower for term in category_terms): | |
| weights["category_match"] = 1.5 | |
| # Boost rating for quality searches | |
| quality_terms = ["best", "top", "excellent", "good", "լավագույն"] | |
| if any(term in query_lower for term in quality_terms): | |
| weights["rating_boost"] = 1.5 | |
| return weights | |
| def _determine_search_radius(self, query: str) -> float: | |
| """Determine appropriate search radius based on query""" | |
| query_lower = query.lower() | |
| # Walking distance queries | |
| if any(term in query_lower for term in ["walk", "walking", "on foot", "քայլելով"]): | |
| return 0.5 # 500m | |
| # Neighborhood queries | |
| if any(term in query_lower for term in ["neighborhood", "area", "district", "թաղամաս"]): | |
| return 2.0 # 2km | |
| # City-wide queries | |
| if any(term in query_lower for term in ["yerevan", "city", "երևան", "քաղաք"]): | |
| return 10.0 # 10km | |
| # Default radius | |
| return 1.5 # 1.5km | |
| def _update_user_preferences(self, query: str, enhanced_data: Dict): | |
| """Learn from user query patterns (lightweight learning)""" | |
| self.query_history.append({ | |
| "query": query, | |
| "timestamp": len(self.query_history), # Simple timestamp | |
| "geo_context": enhanced_data["geo_context"], | |
| "intent": enhanced_data["query_intent"] | |
| }) | |
| # Keep only recent history (memory efficient) | |
| if len(self.query_history) > 100: | |
| self.query_history = self.query_history[-50:] | |
| # Update preferences based on patterns | |
| for intent, score in enhanced_data["query_intent"].items(): | |
| self.user_preferences[intent] += score * 0.1 # Small learning rate | |
| def calculate_enhanced_score(self, venue: Dict, enhanced_query: Dict) -> Tuple[float, Dict]: | |
| """ | |
| Calculate enhanced relevance score with explanation | |
| """ | |
| score = 0.0 | |
| explanation = { | |
| "name_match": 0, | |
| "category_match": 0, | |
| "summary_match": 0, | |
| "location_match": 0, | |
| "rating_boost": 0, | |
| "distance_penalty": 0, | |
| "total": 0 | |
| } | |
| venue_name = venue.get('name', '').lower() | |
| venue_category = venue.get('category', '').lower() | |
| venue_summary = venue.get('summary', '').lower() | |
| weights = enhanced_query["scoring_weights"] | |
| expanded_terms = enhanced_query["expanded_terms"] | |
| # Name matching with expanded terms | |
| name_score = 0 | |
| for term in expanded_terms: | |
| if term in venue_name: | |
| name_score += 5 | |
| explanation["name_match"] = name_score * weights["name_match"] | |
| score += explanation["name_match"] | |
| # Category matching | |
| category_score = 0 | |
| for term in expanded_terms: | |
| if term in venue_category: | |
| category_score += 3 | |
| explanation["category_match"] = category_score * weights["category_match"] | |
| score += explanation["category_match"] | |
| # Summary matching (enhanced with TF-IDF-like scoring) | |
| summary_score = 0 | |
| summary_words = venue_summary.split() | |
| for term in expanded_terms: | |
| term_count = summary_words.count(term) | |
| if term_count > 0: | |
| # TF-IDF-like: more points for rare terms | |
| term_weight = min(3.0, 1.0 / max(1, term_count * 0.1)) | |
| summary_score += term_count * term_weight | |
| explanation["summary_match"] = summary_score * weights["summary_match"] | |
| score += explanation["summary_match"] | |
| # Location/proximity scoring | |
| geo_context = enhanced_query["geo_context"] | |
| location_score = 0 | |
| distance_penalty = 0 | |
| if geo_context["coordinates"] and venue.get('latitude') and venue.get('longitude'): | |
| venue_coords = (venue['latitude'], venue['longitude']) | |
| distance = geodesic(geo_context["coordinates"], venue_coords).kilometers | |
| search_radius = enhanced_query["search_radius"] | |
| if distance <= search_radius: | |
| # Closer venues get higher scores | |
| location_score = max(0, 10 * (1 - distance / search_radius)) | |
| # Apply distance penalty for very far venues | |
| if distance > search_radius * 0.5: | |
| distance_penalty = (distance - search_radius * 0.5) * 2 | |
| explanation["location_match"] = location_score * weights["location_match"] | |
| explanation["distance_penalty"] = distance_penalty * weights["distance_penalty"] | |
| score += explanation["location_match"] | |
| score -= explanation["distance_penalty"] | |
| # Rating boost | |
| rating = venue.get('rating', 0) | |
| if rating is not None: | |
| try: | |
| rating = float(rating) | |
| if rating > 0: | |
| rating_boost = (rating - 3.0) * 2 # Boost for ratings above 3.0 | |
| explanation["rating_boost"] = max(0, rating_boost * weights["rating_boost"]) | |
| score += explanation["rating_boost"] | |
| except (ValueError, TypeError): | |
| rating = 0 | |
| explanation["total"] = score | |
| return max(0, score), explanation | |
| def get_search_explanation(self, query: str, top_venues: List[Tuple[Dict, float, Dict]]) -> str: | |
| """Generate human-readable explanation of search results""" | |
| if not top_venues: | |
| return "No venues found matching your criteria." | |
| explanations = [] | |
| explanations.append(f"🔍 Search results for: '{query}'\n") | |
| for i, (venue, score, details) in enumerate(top_venues[:3], 1): | |
| venue_name = venue.get('name', 'Unknown') | |
| rating = venue.get('rating', 'N/A') | |
| explanation_parts = [] | |
| if details['name_match'] > 0: | |
| explanation_parts.append(f"name match ({details['name_match']:.1f})") | |
| if details['category_match'] > 0: | |
| explanation_parts.append(f"category match ({details['category_match']:.1f})") | |
| if details['summary_match'] > 0: | |
| explanation_parts.append(f"content match ({details['summary_match']:.1f})") | |
| if details['location_match'] > 0: | |
| explanation_parts.append(f"location match ({details['location_match']:.1f})") | |
| if details['rating_boost'] > 0: | |
| explanation_parts.append(f"high rating ({rating}⭐)") | |
| explanation_text = ", ".join(explanation_parts) if explanation_parts else "general match" | |
| explanations.append(f"{i}. **{venue_name}** (Score: {score:.1f}) - {explanation_text}") | |
| return "\n".join(explanations) | |
| # Integration with your existing system | |
| def integrate_lightweight_rag(venue_ai_instance): | |
| """Add lightweight RAG to your existing CompleteYerevanVenueAI""" | |
| # Add the enhancer | |
| venue_ai_instance.rag_enhancer = LightweightRAGEnhancer() | |
| # Modify the existing search method | |
| original_search = venue_ai_instance._smart_venue_search | |
| def enhanced_smart_search(query, top_k=20): | |
| # Get initial results from your existing method | |
| initial_results = original_search(query, top_k * 2) # Get more for reranking | |
| # Apply lightweight RAG enhancement | |
| enhanced_results = venue_ai_instance.rag_enhancer.enhance_search( | |
| query, initial_results, top_k | |
| ) | |
| return enhanced_results | |
| # Replace the method | |
| venue_ai_instance._smart_venue_search = enhanced_smart_search | |
| return venue_ai_instance | |
| # Example usage | |
| if __name__ == "__main__": | |
| # Test the lightweight RAG | |
| enhancer = LightweightRAGEnhancer() | |
| # Mock venue data | |
| test_venues = [ | |
| { | |
| 'name': 'Dargett Craft Beer', | |
| 'category': 'pub', | |
| 'summary': 'Armenia\'s first craft brewery offering artisanal beers on tap', | |
| 'rating': 4.6, | |
| 'address': '72 Arami Street' | |
| }, | |
| { | |
| 'name': 'Coffee Central', | |
| 'category': 'cafe', | |
| 'summary': 'Cozy coffee shop with outdoor seating', | |
| 'rating': 4.2, | |
| 'address': '15 Mashtots Avenue' | |
| } | |
| ] | |
| # Test search | |
| results = enhancer.enhance_search("find craft beer pub", test_venues) | |
| for result in results: | |
| print(f"Venue: {result['name']}") | |
| print(f"Score: {result['enhanced_score']:.2f}") | |
| print(f"Reasons: {', '.join(result['match_reasons'])}") | |
| print("-" * 30) |