Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- requirements.txt +0 -5
- ultra_lightweight_llm.py +92 -0
- venue_ai_complete.py +31 -47
requirements.txt
CHANGED
|
@@ -4,8 +4,3 @@ numpy>=1.24.0
|
|
| 4 |
geopy>=2.3.0
|
| 5 |
scikit-learn>=1.3.0
|
| 6 |
regex>=2023.6.3
|
| 7 |
-
huggingface_hub>=0.20.0
|
| 8 |
-
transformers>=4.35.0
|
| 9 |
-
torch>=2.0.0
|
| 10 |
-
accelerate>=0.20.0
|
| 11 |
-
bitsandbytes>=0.41.0
|
|
|
|
| 4 |
geopy>=2.3.0
|
| 5 |
scikit-learn>=1.3.0
|
| 6 |
regex>=2023.6.3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ultra_lightweight_llm.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
# Ultra-lightweight models smaller than TinyLlama
|
| 5 |
+
ULTRA_LIGHTWEIGHT_MODELS = {
|
| 6 |
+
# MobiLlama 0.5B - 60% smaller than TinyLlama, shared FFN architecture
|
| 7 |
+
"mobillama": {
|
| 8 |
+
"model_name": "mbzuai-oryx/MobiLlama-05B-Chat",
|
| 9 |
+
"size": "0.5B",
|
| 10 |
+
"memory": "~1.5GB",
|
| 11 |
+
"description": "Shared FFN design, very efficient"
|
| 12 |
+
},
|
| 13 |
+
|
| 14 |
+
# GPT-2 variants - much smaller
|
| 15 |
+
"gpt2": {
|
| 16 |
+
"model_name": "gpt2", # 124M parameters
|
| 17 |
+
"size": "124M",
|
| 18 |
+
"memory": "~500MB",
|
| 19 |
+
"description": "Classic, very small, fast"
|
| 20 |
+
},
|
| 21 |
+
|
| 22 |
+
# Sum-small - extremely tiny
|
| 23 |
+
"sum_small": {
|
| 24 |
+
"model_name": "omi-health/sum-small",
|
| 25 |
+
"size": "~100M",
|
| 26 |
+
"memory": "~400MB",
|
| 27 |
+
"description": "Ultra-compact summarization model"
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
class UltraLightweightLLM:
|
| 32 |
+
def __init__(self, model_choice="gpt2"):
|
| 33 |
+
self.model_choice = model_choice
|
| 34 |
+
self.model = None
|
| 35 |
+
self.tokenizer = None
|
| 36 |
+
self.model_info = ULTRA_LIGHTWEIGHT_MODELS.get(model_choice, ULTRA_LIGHTWEIGHT_MODELS["gpt2"])
|
| 37 |
+
self.setup_model()
|
| 38 |
+
|
| 39 |
+
def setup_model(self):
|
| 40 |
+
"""Setup ultra-lightweight model for HF Spaces"""
|
| 41 |
+
try:
|
| 42 |
+
# For HF Spaces, we'll use template-based responses to avoid any model loading
|
| 43 |
+
# This ensures sub-second response times
|
| 44 |
+
logging.info(f"Ultra-lightweight mode: Using template responses for optimal HF Spaces performance")
|
| 45 |
+
logging.info(f"Selected model profile: {self.model_info['description']} ({self.model_info['size']})")
|
| 46 |
+
self.model = "template_based" # Flag for template mode
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logging.warning(f"Model setup failed, using template mode: {e}")
|
| 50 |
+
self.model = "template_based"
|
| 51 |
+
|
| 52 |
+
def generate_response(self, venue_context, user_query, max_length=200):
|
| 53 |
+
"""Generate ultra-fast template-based responses"""
|
| 54 |
+
if not self.model:
|
| 55 |
+
return "I can help you find venues in Yerevan! Please specify what type of place you're looking for."
|
| 56 |
+
|
| 57 |
+
# Ultra-fast template responses for common queries
|
| 58 |
+
query_lower = user_query.lower()
|
| 59 |
+
|
| 60 |
+
# Greeting responses
|
| 61 |
+
if any(word in query_lower for word in ['hello', 'hi', 'hey', 'good']):
|
| 62 |
+
return "Hello! I'm your Yerevan venue assistant. I can help you find great pubs, bars, restaurants, and cafes in Yerevan. What are you looking for today?"
|
| 63 |
+
|
| 64 |
+
# Thank you responses
|
| 65 |
+
if any(word in query_lower for word in ['thank', 'thanks']):
|
| 66 |
+
return "You're welcome! I'm here to help you discover the best venues in Yerevan. Feel free to ask for more recommendations!"
|
| 67 |
+
|
| 68 |
+
# Venue-specific quick responses
|
| 69 |
+
if 'pub' in query_lower or 'bar' in query_lower:
|
| 70 |
+
return "Great choice! Yerevan has amazing pubs and bars. I can help you find places with craft beer, live music, outdoor seating, or cozy atmospheres. Any specific preferences?"
|
| 71 |
+
|
| 72 |
+
if 'restaurant' in query_lower or 'food' in query_lower:
|
| 73 |
+
return "Yerevan's restaurant scene is fantastic! From traditional Armenian cuisine to international options, I can help you find the perfect dining spot. What type of cuisine interests you?"
|
| 74 |
+
|
| 75 |
+
if 'cafe' in query_lower or 'coffee' in query_lower:
|
| 76 |
+
return "Yerevan has wonderful cafes! Whether you want a quiet study spot, artisanal coffee, or a place to meet friends, I can suggest great options. Any location preferences?"
|
| 77 |
+
|
| 78 |
+
# Location-based responses
|
| 79 |
+
if any(loc in query_lower for loc in ['opera', 'republic', 'cascade', 'northern']):
|
| 80 |
+
return "Excellent area choice! That's one of Yerevan's most vibrant districts with lots of great venues nearby. Let me help you find specific recommendations in that area."
|
| 81 |
+
|
| 82 |
+
# Default helpful response
|
| 83 |
+
return "I'm here to help you discover Yerevan's best venues! Tell me what you're looking for - pubs, restaurants, cafes, or something specific, and I'll provide personalized recommendations."
|
| 84 |
+
|
| 85 |
+
# Simple function to get model info
|
| 86 |
+
def get_model_info():
|
| 87 |
+
return {
|
| 88 |
+
"current_mode": "Ultra-lightweight template system",
|
| 89 |
+
"response_time": "~100ms",
|
| 90 |
+
"memory_usage": "~50MB",
|
| 91 |
+
"models_available": ULTRA_LIGHTWEIGHT_MODELS
|
| 92 |
+
}
|
venue_ai_complete.py
CHANGED
|
@@ -30,13 +30,14 @@ except Exception as e:
|
|
| 30 |
from lightweight_rag import LightweightRAGEnhancer
|
| 31 |
|
| 32 |
|
|
|
|
| 33 |
try:
|
| 34 |
-
from
|
| 35 |
-
|
| 36 |
-
logger.info("
|
| 37 |
except ImportError as e:
|
| 38 |
-
logger.warning(f"
|
| 39 |
-
|
| 40 |
|
| 41 |
class CompleteYerevanVenueAI:
|
| 42 |
"""
|
|
@@ -765,12 +766,19 @@ class CompleteYerevanVenueAI:
|
|
| 765 |
armenian_category = category_map.get(category, category)
|
| 766 |
info_parts.append(f"🏷️ {armenian_category}")
|
| 767 |
|
| 768 |
-
# Add metadata features
|
| 769 |
features = []
|
| 770 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 771 |
if venue.get('serves_cocktails'): features.append("կոկտեյլ")
|
| 772 |
if venue.get('live_music'): features.append("կենդանի երաժշտություն")
|
| 773 |
if venue.get('outdoor_seating'): features.append("բացօթյա նստարան")
|
|
|
|
|
|
|
|
|
|
| 774 |
if features:
|
| 775 |
info_parts.append(f"✨ {', '.join(features)}")
|
| 776 |
|
|
@@ -803,14 +811,19 @@ class CompleteYerevanVenueAI:
|
|
| 803 |
if venue.get('category'):
|
| 804 |
info_parts.append(f"🏷️ {venue['category']}")
|
| 805 |
|
| 806 |
-
# Add metadata features
|
| 807 |
features = []
|
| 808 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
if venue.get('serves_cocktails'): features.append("cocktails")
|
| 810 |
if venue.get('live_music'): features.append("live music")
|
| 811 |
if venue.get('outdoor_seating'): features.append("outdoor seating")
|
| 812 |
if venue.get('good_for_date_night'): features.append("romantic")
|
| 813 |
if venue.get('good_for_groups'): features.append("good for groups")
|
|
|
|
| 814 |
if features:
|
| 815 |
info_parts.append(f"✨ {', '.join(features)}")
|
| 816 |
|
|
@@ -866,8 +879,8 @@ class CompleteYerevanVenueAI:
|
|
| 866 |
# Handle venue queries with the existing logic
|
| 867 |
location_context = self._extract_enhanced_location_context(user_query)
|
| 868 |
|
| 869 |
-
# Perform venue search
|
| 870 |
-
venues = self._smart_venue_search(user_query, top_k=
|
| 871 |
|
| 872 |
# Filter venues
|
| 873 |
filtered_venues = self._filter_venues(venues, min_rating, price_range, max_distance, location_context)
|
|
@@ -890,46 +903,17 @@ class CompleteYerevanVenueAI:
|
|
| 890 |
}
|
| 891 |
|
| 892 |
def _initialize_conversational_llm(self):
|
| 893 |
-
"""Initialize
|
| 894 |
-
|
| 895 |
-
if LIGHTWEIGHT_LLM_AVAILABLE:
|
| 896 |
try:
|
| 897 |
-
logger.info("Initializing lightweight conversational
|
| 898 |
-
self.conversational_llm =
|
| 899 |
-
logger.info("Successfully initialized lightweight conversational
|
| 900 |
return
|
| 901 |
except Exception as e:
|
| 902 |
-
logger.warning(f"Failed to initialize lightweight LLM: {e}")
|
| 903 |
-
|
| 904 |
-
# Legacy llama-cpp fallback (if available)
|
| 905 |
-
if LLAMA_CPP_AVAILABLE:
|
| 906 |
-
try:
|
| 907 |
-
from huggingface_hub import hf_hub_download
|
| 908 |
-
logger.info("Downloading TinyLlama model from Hugging Face Hub...")
|
| 909 |
-
|
| 910 |
-
model_path = hf_hub_download(
|
| 911 |
-
repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 912 |
-
filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
|
| 913 |
-
cache_dir="./model_cache"
|
| 914 |
-
)
|
| 915 |
-
|
| 916 |
-
from llama_cpp import Llama
|
| 917 |
-
self.conversational_llm = Llama(
|
| 918 |
-
model_path=model_path,
|
| 919 |
-
n_ctx=1024,
|
| 920 |
-
n_threads=2,
|
| 921 |
-
n_gpu_layers=0,
|
| 922 |
-
verbose=False,
|
| 923 |
-
use_mmap=True,
|
| 924 |
-
use_mlock=False
|
| 925 |
-
)
|
| 926 |
-
logger.info("Successfully loaded legacy TinyLlama model")
|
| 927 |
-
return
|
| 928 |
-
|
| 929 |
-
except Exception as e:
|
| 930 |
-
logger.warning(f"Failed to initialize legacy conversational LLM: {e}")
|
| 931 |
|
| 932 |
-
logger.info("
|
| 933 |
self.conversational_llm = None
|
| 934 |
|
| 935 |
def _add_to_conversation_history(self, user_message: str, ai_response: str):
|
|
|
|
| 30 |
from lightweight_rag import LightweightRAGEnhancer
|
| 31 |
|
| 32 |
|
| 33 |
+
# Ultra-lightweight LLM for optimal HF Spaces performance
|
| 34 |
try:
|
| 35 |
+
from ultra_lightweight_llm import UltraLightweightLLM
|
| 36 |
+
ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = True
|
| 37 |
+
logger.info("Ultra-lightweight LLM available for optimal performance")
|
| 38 |
except ImportError as e:
|
| 39 |
+
logger.warning(f"Ultra-lightweight LLM not available: {e}")
|
| 40 |
+
ULTRA_LIGHTWEIGHT_LLM_AVAILABLE = False
|
| 41 |
|
| 42 |
class CompleteYerevanVenueAI:
|
| 43 |
"""
|
|
|
|
| 766 |
armenian_category = category_map.get(category, category)
|
| 767 |
info_parts.append(f"🏷️ {armenian_category}")
|
| 768 |
|
| 769 |
+
# Add metadata features (skip common ones for pubs/bars)
|
| 770 |
features = []
|
| 771 |
+
venue_category = venue.get('category', '').lower()
|
| 772 |
+
|
| 773 |
+
# Only show beer for non-pub/bar venues
|
| 774 |
+
if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
|
| 775 |
+
features.append("գարեջուր")
|
| 776 |
if venue.get('serves_cocktails'): features.append("կոկտեյլ")
|
| 777 |
if venue.get('live_music'): features.append("կենդանի երաժշտություն")
|
| 778 |
if venue.get('outdoor_seating'): features.append("բացօթյա նստարան")
|
| 779 |
+
if venue.get('good_for_date_night'): features.append("ռոմանտիկ")
|
| 780 |
+
if venue.get('good_for_groups'): features.append("խմբերի համար")
|
| 781 |
+
|
| 782 |
if features:
|
| 783 |
info_parts.append(f"✨ {', '.join(features)}")
|
| 784 |
|
|
|
|
| 811 |
if venue.get('category'):
|
| 812 |
info_parts.append(f"🏷️ {venue['category']}")
|
| 813 |
|
| 814 |
+
# Add metadata features (skip common ones for pubs/bars)
|
| 815 |
features = []
|
| 816 |
+
venue_category = venue.get('category', '').lower()
|
| 817 |
+
|
| 818 |
+
# Only show beer for non-pub/bar venues
|
| 819 |
+
if venue.get('serves_beer') and venue_category not in ['pub', 'bar']:
|
| 820 |
+
features.append("serves beer")
|
| 821 |
if venue.get('serves_cocktails'): features.append("cocktails")
|
| 822 |
if venue.get('live_music'): features.append("live music")
|
| 823 |
if venue.get('outdoor_seating'): features.append("outdoor seating")
|
| 824 |
if venue.get('good_for_date_night'): features.append("romantic")
|
| 825 |
if venue.get('good_for_groups'): features.append("good for groups")
|
| 826 |
+
|
| 827 |
if features:
|
| 828 |
info_parts.append(f"✨ {', '.join(features)}")
|
| 829 |
|
|
|
|
| 879 |
# Handle venue queries with the existing logic
|
| 880 |
location_context = self._extract_enhanced_location_context(user_query)
|
| 881 |
|
| 882 |
+
# Perform venue search (full search for comprehensive results)
|
| 883 |
+
venues = self._smart_venue_search(user_query, top_k=100)
|
| 884 |
|
| 885 |
# Filter venues
|
| 886 |
filtered_venues = self._filter_venues(venues, min_rating, price_range, max_distance, location_context)
|
|
|
|
| 903 |
}
|
| 904 |
|
| 905 |
def _initialize_conversational_llm(self):
|
| 906 |
+
"""Initialize ultra-lightweight conversational system"""
|
| 907 |
+
if ULTRA_LIGHTWEIGHT_LLM_AVAILABLE:
|
|
|
|
| 908 |
try:
|
| 909 |
+
logger.info("Initializing ultra-lightweight conversational system...")
|
| 910 |
+
self.conversational_llm = UltraLightweightLLM()
|
| 911 |
+
logger.info("Successfully initialized ultra-lightweight conversational system")
|
| 912 |
return
|
| 913 |
except Exception as e:
|
| 914 |
+
logger.warning(f"Failed to initialize ultra-lightweight LLM: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 915 |
|
| 916 |
+
logger.info("Using template-based responses for optimal performance")
|
| 917 |
self.conversational_llm = None
|
| 918 |
|
| 919 |
def _add_to_conversation_history(self, user_message: str, ai_response: str):
|