File size: 7,385 Bytes
eeb0f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""
Check RAG System Status - Verify all vector stores
Checks all 6 specialized ChromaDB databases
"""

from pathlib import Path
import sys

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

# Vector store definitions
VECTOR_STORES = {
    'medical_diseases': {
        'name': 'ViMedical Diseases',
        'path': 'rag/vector_store/medical_diseases',
        'expected_size': 50,  # MB
        'test_query': 'đau đầu triệu chứng'
    },
    'mental_health': {
        'name': 'Mental Health',
        'path': 'rag/vector_store/mental_health',
        'expected_size': 80,
        'test_query': 'stress anxiety depression'
    },
    'nutrition': {
        'name': 'Nutrition Plans',
        'path': 'rag/vector_store/nutrition',
        'expected_size': 20,
        'test_query': 'diet meal plan calories'
    },
    'vietnamese_nutrition': {
        'name': 'Vietnamese Food',
        'path': 'rag/vector_store/vietnamese_nutrition',
        'expected_size': 5,
        'test_query': 'phở cơm nutrition'
    },
    'fitness': {
        'name': 'Fitness Exercises',
        'path': 'rag/vector_store/fitness',
        'expected_size': 10,
        'test_query': 'gym workout exercise'
    },
    'symptom_qa': {
        'name': 'Medical Q&A',
        'path': 'rag/vector_store/symptom_qa',
        'expected_size': 8,
        'test_query': 'triệu chứng bệnh'
    },
    'general_health_qa': {
        'name': 'General Health Q&A',
        'path': 'rag/vector_store/general_health_qa',
        'expected_size': 7,
        'test_query': 'sức khỏe tổng quát'
    }
}

def check_vector_store(store_info):
    """Check individual vector store"""
    
    print(f"\n📦 {store_info['name']}")
    print("-" * 50)
    
    store_path = Path(store_info['path'])
    
    # Check existence
    if not store_path.exists():
        print(f"❌ Not found: {store_info['path']}")
        print(f"   Reason: Directory does not exist")
        return {'status': False, 'reason': 'Directory not found'}
    
    print(f"✅ Exists: {store_info['path']}")
    
    # Check size
    total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file())
    size_mb = total_size / (1024 * 1024)
    expected = store_info['expected_size']
    
    print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)")
    
    if size_mb < 0.1:
        print("⚠️  Database seems empty")
        print("   Reason: Database size < 0.1 MB (likely not built)")
        return {'status': False, 'reason': 'Database empty or not built'}
    
    # Try to load and query
    try:
        import chromadb
        
        client = chromadb.PersistentClient(path=str(store_path))
        collections = client.list_collections()
        
        if not collections:
            print("⚠️  No collections found")
            print("   Reason: ChromaDB has no collections")
            return {'status': False, 'reason': 'No collections in database'}
        
        collection = collections[0]
        count = collection.count()
        print(f"📚 Documents: {count:,} chunks")
        
        if count == 0:
            print("⚠️  Collection is empty")
            print("   Reason: Collection exists but has 0 documents")
            return {'status': False, 'reason': 'Collection is empty (0 documents)'}
        
        # Test query
        try:
            results = collection.query(
                query_texts=[store_info['test_query']],
                n_results=1
            )
            if results and results['documents'] and results['documents'][0]:
                print("✅ Query test passed")
                return {'status': True, 'reason': None}
            else:
                print("⚠️  Query returned no results")
                print("   Reason: Query executed but found no matching documents")
                return {'status': False, 'reason': 'Query returned no results'}
        except Exception as e:
            print(f"⚠️  Query test failed: {e}")
            print(f"   Reason: {str(e)}")
            return {'status': False, 'reason': f'Query failed: {str(e)}'}
            
    except ImportError:
        print("⚠️  ChromaDB not installed")
        print("   Reason: pip install chromadb")
        return {'status': False, 'reason': 'ChromaDB package not installed'}
    except Exception as e:
        print(f"⚠️  Error: {e}")
        print(f"   Reason: {str(e)}")
        return {'status': False, 'reason': f'Error loading database: {str(e)}'}

def check_rag_status():
    """Check all RAG vector stores"""
    
    print("="*60)
    print("🔍 RAG System Status Check")
    print("="*60)
    
    # Check base directory
    base_path = Path('rag/vector_store')
    if not base_path.exists():
        print("\n❌ Vector store directory not found!")
        print(f"   Expected: {base_path}")
        print("\n💡 Solution:")
        print("   bash scripts/setup_rag.sh")
        return False
    
    print(f"\n✅ Base directory exists: {base_path}")
    
    # Check each vector store
    results = {}
    for store_id, store_info in VECTOR_STORES.items():
        results[store_id] = check_vector_store(store_info)
    
    # Summary
    print("\n" + "="*60)
    print("📊 Summary")
    print("="*60)
    
    total = len(results)
    passed = sum(1 for v in results.values() if v['status'])
    
    for store_id, result in results.items():
        status = "✅" if result['status'] else "❌"
        name = VECTOR_STORES[store_id]['name']
        print(f"{status} {name}")
        if not result['status'] and result['reason']:
            print(f"   └─ {result['reason']}")
    
    print("\n" + "="*60)
    print(f"Result: {passed}/{total} databases OK")
    
    if passed == total:
        print("\n🎉 All vector stores are ready!")
        print("\nNext steps:")
        print("   python app.py")
        print("   Open http://localhost:7860")
        print("="*60)
        return True
    else:
        print("\n⚠️  Some databases are missing or have issues")
        print("\n💡 Solutions:")
        print("\n1️⃣  Quick fix (rebuild all):")
        print("   bash scripts/setup_rag.sh")
        
        print("\n2️⃣  Rebuild specific databases:")
        
        # Map store_id to script
        script_map = {
            'medical_diseases': 'python data_mining/mining_vimedical.py',
            'mental_health': 'python data_mining/mining_mentalchat.py',
            'nutrition': 'python data_mining/mining_nutrition.py',
            'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py',
            'fitness': 'python data_mining/mining_fitness.py',
            'symptom_qa': 'python data_mining/mining_medical_qa.py',
            'general_health_qa': 'python data_mining/mining_medical_qa.py'
        }
        
        for store_id, result in results.items():
            if not result['status']:
                name = VECTOR_STORES[store_id]['name']
                script = script_map.get(store_id, 'Unknown')
                print(f"\n   ❌ {name}:")
                print(f"      Reason: {result['reason']}")
                print(f"      Fix: {script}")
        
        print("\n" + "="*60)
        return False


if __name__ == '__main__':
    success = check_rag_status()
    exit(0 if success else 1)