Spaces:
Runtime error
Runtime error
File size: 7,385 Bytes
eeb0f9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
"""
Check RAG System Status - Verify all vector stores
Checks all 6 specialized ChromaDB databases
"""
from pathlib import Path
import sys
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
# Vector store definitions
VECTOR_STORES = {
'medical_diseases': {
'name': 'ViMedical Diseases',
'path': 'rag/vector_store/medical_diseases',
'expected_size': 50, # MB
'test_query': 'đau đầu triệu chứng'
},
'mental_health': {
'name': 'Mental Health',
'path': 'rag/vector_store/mental_health',
'expected_size': 80,
'test_query': 'stress anxiety depression'
},
'nutrition': {
'name': 'Nutrition Plans',
'path': 'rag/vector_store/nutrition',
'expected_size': 20,
'test_query': 'diet meal plan calories'
},
'vietnamese_nutrition': {
'name': 'Vietnamese Food',
'path': 'rag/vector_store/vietnamese_nutrition',
'expected_size': 5,
'test_query': 'phở cơm nutrition'
},
'fitness': {
'name': 'Fitness Exercises',
'path': 'rag/vector_store/fitness',
'expected_size': 10,
'test_query': 'gym workout exercise'
},
'symptom_qa': {
'name': 'Medical Q&A',
'path': 'rag/vector_store/symptom_qa',
'expected_size': 8,
'test_query': 'triệu chứng bệnh'
},
'general_health_qa': {
'name': 'General Health Q&A',
'path': 'rag/vector_store/general_health_qa',
'expected_size': 7,
'test_query': 'sức khỏe tổng quát'
}
}
def check_vector_store(store_info):
"""Check individual vector store"""
print(f"\n📦 {store_info['name']}")
print("-" * 50)
store_path = Path(store_info['path'])
# Check existence
if not store_path.exists():
print(f"❌ Not found: {store_info['path']}")
print(f" Reason: Directory does not exist")
return {'status': False, 'reason': 'Directory not found'}
print(f"✅ Exists: {store_info['path']}")
# Check size
total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file())
size_mb = total_size / (1024 * 1024)
expected = store_info['expected_size']
print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)")
if size_mb < 0.1:
print("⚠️ Database seems empty")
print(" Reason: Database size < 0.1 MB (likely not built)")
return {'status': False, 'reason': 'Database empty or not built'}
# Try to load and query
try:
import chromadb
client = chromadb.PersistentClient(path=str(store_path))
collections = client.list_collections()
if not collections:
print("⚠️ No collections found")
print(" Reason: ChromaDB has no collections")
return {'status': False, 'reason': 'No collections in database'}
collection = collections[0]
count = collection.count()
print(f"📚 Documents: {count:,} chunks")
if count == 0:
print("⚠️ Collection is empty")
print(" Reason: Collection exists but has 0 documents")
return {'status': False, 'reason': 'Collection is empty (0 documents)'}
# Test query
try:
results = collection.query(
query_texts=[store_info['test_query']],
n_results=1
)
if results and results['documents'] and results['documents'][0]:
print("✅ Query test passed")
return {'status': True, 'reason': None}
else:
print("⚠️ Query returned no results")
print(" Reason: Query executed but found no matching documents")
return {'status': False, 'reason': 'Query returned no results'}
except Exception as e:
print(f"⚠️ Query test failed: {e}")
print(f" Reason: {str(e)}")
return {'status': False, 'reason': f'Query failed: {str(e)}'}
except ImportError:
print("⚠️ ChromaDB not installed")
print(" Reason: pip install chromadb")
return {'status': False, 'reason': 'ChromaDB package not installed'}
except Exception as e:
print(f"⚠️ Error: {e}")
print(f" Reason: {str(e)}")
return {'status': False, 'reason': f'Error loading database: {str(e)}'}
def check_rag_status():
"""Check all RAG vector stores"""
print("="*60)
print("🔍 RAG System Status Check")
print("="*60)
# Check base directory
base_path = Path('rag/vector_store')
if not base_path.exists():
print("\n❌ Vector store directory not found!")
print(f" Expected: {base_path}")
print("\n💡 Solution:")
print(" bash scripts/setup_rag.sh")
return False
print(f"\n✅ Base directory exists: {base_path}")
# Check each vector store
results = {}
for store_id, store_info in VECTOR_STORES.items():
results[store_id] = check_vector_store(store_info)
# Summary
print("\n" + "="*60)
print("📊 Summary")
print("="*60)
total = len(results)
passed = sum(1 for v in results.values() if v['status'])
for store_id, result in results.items():
status = "✅" if result['status'] else "❌"
name = VECTOR_STORES[store_id]['name']
print(f"{status} {name}")
if not result['status'] and result['reason']:
print(f" └─ {result['reason']}")
print("\n" + "="*60)
print(f"Result: {passed}/{total} databases OK")
if passed == total:
print("\n🎉 All vector stores are ready!")
print("\nNext steps:")
print(" python app.py")
print(" Open http://localhost:7860")
print("="*60)
return True
else:
print("\n⚠️ Some databases are missing or have issues")
print("\n💡 Solutions:")
print("\n1️⃣ Quick fix (rebuild all):")
print(" bash scripts/setup_rag.sh")
print("\n2️⃣ Rebuild specific databases:")
# Map store_id to script
script_map = {
'medical_diseases': 'python data_mining/mining_vimedical.py',
'mental_health': 'python data_mining/mining_mentalchat.py',
'nutrition': 'python data_mining/mining_nutrition.py',
'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py',
'fitness': 'python data_mining/mining_fitness.py',
'symptom_qa': 'python data_mining/mining_medical_qa.py',
'general_health_qa': 'python data_mining/mining_medical_qa.py'
}
for store_id, result in results.items():
if not result['status']:
name = VECTOR_STORES[store_id]['name']
script = script_map.get(store_id, 'Unknown')
print(f"\n ❌ {name}:")
print(f" Reason: {result['reason']}")
print(f" Fix: {script}")
print("\n" + "="*60)
return False
if __name__ == '__main__':
success = check_rag_status()
exit(0 if success else 1)
|