Spaces:

BonelliLab
/

Eidolon-CognitiveTutor

Sleeping

App Files Files Community

BonelliLab commited on Nov 2

Commit

cd8c2bb

1 Parent(s): f6c853c

Push existing cognitive tutor project

Browse files

Files changed (37) hide show

README.md +79 -12
__pycache__/cognitive_llm.cpython-313.pyc +0 -0
cog_tutor/__init__.py +3 -0
cog_tutor/__pycache__/__init__.cpython-313.pyc +0 -0
cog_tutor/__pycache__/adaptive_tutor.cpython-313.pyc +0 -0
cog_tutor/__pycache__/cache.cpython-313.pyc +0 -0
cog_tutor/__pycache__/inference.cpython-313.pyc +0 -0
cog_tutor/__pycache__/knowledge_tracing.cpython-313.pyc +0 -0
cog_tutor/__pycache__/prompts.cpython-313.pyc +0 -0
cog_tutor/__pycache__/schemas.cpython-313.pyc +0 -0
cog_tutor/__pycache__/validation.cpython-313.pyc +0 -0
cog_tutor/adapters/__init__.py +2 -0
cog_tutor/adapters/__pycache__/__init__.cpython-313.pyc +0 -0
cog_tutor/adapters/__pycache__/qwen_adapter.cpython-313.pyc +0 -0
cog_tutor/adapters/qwen_adapter.py +46 -0
cog_tutor/adaptive_tutor.py +316 -0
cog_tutor/cache.py +25 -0
cog_tutor/inference.py +144 -0
cog_tutor/knowledge_tracing.py +311 -0
cog_tutor/prompts.py +60 -0
cog_tutor/rag/__init__.py +5 -0
cog_tutor/rag/__pycache__/__init__.cpython-313.pyc +0 -0
cog_tutor/rag/__pycache__/knowledge_base.cpython-313.pyc +0 -0
cog_tutor/rag/__pycache__/rag_prompts.cpython-313.pyc +0 -0
cog_tutor/rag/__pycache__/retriever.cpython-313.pyc +0 -0
cog_tutor/rag/knowledge_base.py +173 -0
cog_tutor/rag/rag_prompts.py +88 -0
cog_tutor/rag/retriever.py +118 -0
cog_tutor/schemas.py +107 -0
cog_tutor/validation.py +10 -0
cognitive_llm.py +117 -0
knowledge_base.sqlite +0 -0
knowledge_tracing.sqlite +0 -0
requirements.txt +6 -0
research_output.json +89 -0
test_cog_tutor.py +11 -0
test_rag_tutor.py +191 -0

README.md CHANGED Viewed

@@ -1,12 +1,79 @@
----
-title: Eidolon CognitiveTutor
-emoji: 🌖
-colorFrom: red
-colorTo: gray
-sdk: docker
-pinned: false
-license: apache-2.0
-short_description: RAG-enhanced cognitive tutor demo built with Qwen and FastAP
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Cognitive LLM with Qwen3
+A simple implementation of a cognitive language model using Qwen3-7B-Instruct from Hugging Face.
+## Features
+- Easy-to-use Python interface for Qwen3-7B-Instruct
+- Optimized for both CUDA and CPU
+- 4-bit quantization for reduced memory usage
+- Interactive command-line interface
+- Configurable generation parameters
+## Prerequisites
+- Python 3.8 or higher
+- PyTorch (will be installed via requirements.txt)
+- CUDA-compatible GPU (recommended) or CPU
+## Installation
+1. Clone this repository
+2. Install the required packages:
+   ```bash
+   pip install -r requirements.txt
+   ```
+## Usage
+1. Run the interactive CLI:
+   ```bash
+   python cognitive_llm.py
+   ```
+2. Enter your prompt when prompted with `>>` and press Enter
+3. Type 'quit' or 'exit' to exit the program
+### Example Usage
+```python
+from cognitive_llm import CognitiveLLM
+# Initialize the LLM
+llm = CognitiveLLM()
+# Generate text
+response = llm.generate(
+    "Explain quantum computing in simple terms.",
+    max_new_tokens=256,
+    temperature=0.7
+)
+print(response)
+```
+## Configuration
+You can customize the model and generation parameters:
+```python
+llm = CognitiveLLM(
+    model_name="Qwen/Qwen3-7B-Instruct",  # Model name or path
+    device="cuda"  # 'cuda', 'mps', or 'cpu'
+)
+# Generate with custom parameters
+response = llm.generate(
+    "Your prompt here",
+    max_new_tokens=512,
+    temperature=0.7,
+    top_p=0.9,
+    do_sample=True
+)
+```
+## Note
+- First run will download the model weights (several GB)
+- A CUDA-compatible GPU is recommended for reasonable performance
+- Ensure you have sufficient disk space for the model weights
+- Internet connection is required for the initial download

__pycache__/cognitive_llm.cpython-313.pyc ADDED Viewed

Binary file (4.35 kB). View file

cog_tutor/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .inference import run_prompt
2	+ __all__=['run_prompt']
3	+

cog_tutor/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (242 Bytes). View file

cog_tutor/__pycache__/adaptive_tutor.cpython-313.pyc ADDED Viewed

Binary file (12.9 kB). View file

cog_tutor/__pycache__/cache.cpython-313.pyc ADDED Viewed

Binary file (2.11 kB). View file

cog_tutor/__pycache__/inference.cpython-313.pyc ADDED Viewed

Binary file (5.51 kB). View file

cog_tutor/__pycache__/knowledge_tracing.cpython-313.pyc ADDED Viewed

Binary file (14.6 kB). View file

cog_tutor/__pycache__/prompts.cpython-313.pyc ADDED Viewed

Binary file (3.45 kB). View file

cog_tutor/__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (7.26 kB). View file

cog_tutor/__pycache__/validation.cpython-313.pyc ADDED Viewed

Binary file (735 Bytes). View file

cog_tutor/adapters/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .qwen_adapter import QwenAdapter
2	+ __all__ = ["QwenAdapter"]

cog_tutor/adapters/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (255 Bytes). View file

cog_tutor/adapters/__pycache__/qwen_adapter.cpython-313.pyc ADDED Viewed

Binary file (2.1 kB). View file

cog_tutor/adapters/qwen_adapter.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from typing import Optional, List
+from cognitive_llm import CognitiveLLM
+class QwenAdapter:
+    def __init__(self, model_name: str = "Qwen/Qwen3-7B-Instruct"):
+        # Store model name for lazy initialization
+        self.model_name = model_name
+        self.client = None
+    def _initialize_client(self):
+        # Lazy initialization of the CognitiveLLM client
+        if self.client is None:
+            self.client = CognitiveLLM(model_name=self.model_name)
+    def generate(
+        self,
+        system: str,
+        user: str,
+        *,
+        temperature: float = 0.0,
+        max_tokens: int = 512,
+        stop: Optional[List[str]] = None,
+        seed: Optional[int] = None,
+    ) -> str:
+        # Initialize client if not already done
+        self._initialize_client()
+        # Compose a strict prompt: JSON only, no commentary
+        prompt = f"System: {system}\nReturn JSON only. No commentary.\nInput: {user}"
+        # Use the existing generate method with appropriate parameters
+        text = self.client.generate(
+            prompt,
+            max_new_tokens=max_tokens,
+            temperature=max(0.1, temperature),
+            top_p=0.9,
+            do_sample=temperature > 0.3
+        )
+        if stop:
+            for s in stop:
+                i = text.find(s)
+                if i != -1:
+                    text = text[:i]
+                    break
+        return text.strip()

cog_tutor/adaptive_tutor.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import json
+import numpy as np
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+from .knowledge_tracing import KnowledgeTracer, ItemResponse, SkillMastery
+from .rag.knowledge_base import KnowledgeBase
+from .rag.retriever import KnowledgeRetriever
+from .rag.rag_prompts import RAGEnhancedPrompts
+from .inference import run_prompt
+class AdaptiveTutor:
+    """RAG-enhanced adaptive tutoring system with knowledge tracing."""
+    def __init__(self, user_id: str = "default"):
+        self.user_id = user_id
+        self.knowledge_tracer = KnowledgeTracer()
+        self.knowledge_base = KnowledgeBase()
+        self.retriever = KnowledgeRetriever(self.knowledge_base)
+        self.rag_prompts = RAGEnhancedPrompts()
+        # Session tracking
+        self.session_start = datetime.now()
+        self.session_responses = []
+    def process_student_response(self, item_id: str, skill: str, question: str,
+                                user_answer: str, correct_answer: str,
+                                response_time: float, hints_used: int = 0) -> Dict[str, Any]:
+        """Process a student response and update knowledge tracing."""
+        # Determine correctness
+        is_correct = self._evaluate_answer(user_answer, correct_answer)
+        # Create item response
+        difficulty = self._estimate_item_difficulty(skill, question)
+        response = ItemResponse(
+            item_id=item_id,
+            skill=skill,
+            correct=is_correct,
+            response_time=response_time,
+            hints_used=hints_used,
+            difficulty=difficulty,
+            timestamp=datetime.now()
+        )
+        # Update knowledge tracing
+        new_theta = self.knowledge_tracer.update_mastery(response)
+        mastery_prob = self.knowledge_tracer.get_mastery_probability(skill)
+        # Generate RAG-enhanced explanation
+        explanation = self.generate_rag_explanation(question, user_answer, correct_answer)
+        # Track session
+        self.session_responses.append(response)
+        return {
+            "correct": is_correct,
+            "mastery_theta": new_theta,
+            "mastery_probability": mastery_prob,
+            "explanation": explanation,
+            "next_recommendations": self.get_next_items(skill)
+        }
+    def generate_rag_explanation(self, question: str, user_answer: str,
+                                correct_answer: str) -> Dict[str, Any]:
+        """Generate explanation with knowledge grounding."""
+        # Retrieve relevant knowledge
+        knowledge_context = self.retriever.get_explanation_with_citations(
+            question, user_answer, correct_answer
+        )
+        # Prepare input for RAG-enhanced prompt
+        prompt_input = {
+            "question": question,
+            "user_answer": user_answer,
+            "solution": correct_answer,
+            "facts": knowledge_context["facts"],
+            "sources": knowledge_context["sources"]
+        }
+        # Generate explanation using RAG prompt
+        try:
+            explanation = run_prompt(
+                "item_explanation_with_rag",
+                prompt_input,
+                model_id="Qwen/Qwen3-7B-Instruct"
+            )
+            # Add citations from knowledge base
+            explanation["knowledge_citations"] = knowledge_context["citations"]
+            explanation["fact_sources"] = knowledge_context["facts"]
+        except Exception as e:
+            # Fallback to basic explanation
+            explanation = {
+                "hint": "Review the problem steps carefully.",
+                "guided": "Compare your answer with the correct solution.",
+                "full": f"The correct answer is {correct_answer}. Please review the method.",
+                "knowledge_citations": [],
+                "fact_sources": []
+            }
+        return explanation
+    def generate_adaptive_hints(self, question: str, hint_level: int = 1) -> List[str]:
+        """Generate contextual hints using RAG."""
+        return self.retriever.get_contextual_hints(question, hint_level)
+    def generate_adaptive_question(self, skill: str, difficulty: Optional[float] = None) -> Dict[str, Any]:
+        """Generate an adaptive question based on current mastery."""
+        if difficulty is None:
+            mastery = self.knowledge_tracer.get_mastery_probability(skill)
+            difficulty = 1.0 - mastery  # Inverse relationship
+        # Retrieve relevant knowledge for the skill
+        knowledge_items = self.knowledge_base.retrieve_by_skill(skill, limit=3)
+        # Prepare input for question generation
+        prompt_input = {
+            "skill": skill,
+            "mastery_level": 1.0 - difficulty,
+            "knowledge_content": [item["content"] for item in knowledge_items],
+            "difficulty": difficulty
+        }
+        try:
+            question = run_prompt(
+                "adaptive_question_generation",
+                prompt_input,
+                model_id="Qwen/Qwen3-7B-Instruct"
+            )
+            # Add knowledge citations
+            question["knowledge_sources"] = [item["id"] for item in knowledge_items]
+        except Exception as e:
+            # Fallback question template
+            question = {
+                "question": f"Practice problem for {skill} at difficulty {difficulty:.2f}",
+                "answer": "Answer to be determined",
+                "explanation": "Explanation to be provided",
+                "difficulty": difficulty,
+                "skill": skill,
+                "knowledge_sources": []
+            }
+        return question
+    def get_next_items(self, current_skill: str = None, max_items: int = 5) -> List[Dict[str, Any]]:
+        """Get next item recommendations using entropy-based scheduling."""
+        # Generate candidate items
+        candidates = []
+        skills = ["algebra_simplification", "linear_equations", "fraction_operations", "ratios"]
+        for skill in skills:
+            for i in range(3):  # 3 items per skill
+                mastery = self.knowledge_tracer.get_mastery_probability(skill)
+                difficulty = 1.0 - mastery + np.random.normal(0, 0.1)  # Add noise
+                candidates.append({
+                    "item_id": f"{skill}_{i}",
+                    "skill": skill,
+                    "difficulty": np.clip(difficulty, 0.1, 0.9),
+                    "type": "practice"
+                })
+        # Get recommendations from knowledge tracer
+        recommendations = self.knowledge_tracer.get_next_item_recommendations(
+            candidates, max_items
+        )
+        # Add adaptive questions for top recommendations
+        for rec in recommendations:
+            if rec["type"] == "practice":
+                adaptive_q = self.generate_adaptive_question(rec["skill"], rec["difficulty"])
+                rec.update(adaptive_q)
+        return recommendations
+    def evaluate_mastery_with_irt(self, skill: str) -> Dict[str, Any]:
+        """Evaluate mastery using IRT parameters."""
+        # Get recent responses for the skill
+        skill_responses = [r for r in self.session_responses if r.skill == skill]
+        if not skill_responses:
+            # Get from database if no session responses
+            mastery_prob = self.knowledge_tracer.get_mastery_probability(skill)
+            return {
+                "theta": 0.0,
+                "sem": 1.0,
+                "mastery": mastery_prob,
+                "confidence_interval": [-1.96, 1.96]
+            }
+        # Prepare input for IRT evaluation
+        responses_data = []
+        for r in skill_responses[-10:]:  # Last 10 responses
+            responses_data.append({
+                "correct": r.correct,
+                "difficulty": r.difficulty,
+                "response_time": r.response_time,
+                "hints": r.hints_used
+            })
+        prompt_input = {
+            "skill": skill,
+            "responses": responses_data
+        }
+        try:
+            irt_result = run_prompt(
+                "mastery_diagnostic_with_irt",
+                prompt_input,
+                model_id="Qwen/Qwen3-7B-Instruct"
+            )
+            return irt_result
+        except:
+            # Fallback to knowledge tracer estimates
+            if skill in self.knowledge_tracer.skill_masteries:
+                mastery = self.knowledge_tracer.skill_masteries[skill]
+                return {
+                    "theta": mastery.theta,
+                    "sem": mastery.sem,
+                    "mastery": self.knowledge_tracer.get_mastery_probability(skill),
+                    "confidence_interval": [
+                        mastery.theta - 1.96 * mastery.sem,
+                        mastery.theta + 1.96 * mastery.sem
+                    ]
+                }
+            else:
+                return {
+                    "theta": 0.0,
+                    "sem": 1.0,
+                    "mastery": 0.5,
+                    "confidence_interval": [-1.96, 1.96]
+                }
+    def get_research_metrics(self) -> Dict[str, Any]:
+        """Get comprehensive research metrics for evaluation."""
+        # Basic session metrics
+        session_duration = (datetime.now() - self.session_start).total_seconds()
+        total_responses = len(self.session_responses)
+        correct_responses = sum(1 for r in self.session_responses if r.correct)
+        # Get detailed metrics from knowledge tracer
+        tracer_metrics = self.knowledge_tracer.get_research_metrics()
+        # Calculate additional session-based metrics
+        if total_responses > 0:
+            session_accuracy = correct_responses / total_responses
+            avg_session_time = np.mean([r.response_time for r in self.session_responses])
+            hints_per_response = np.mean([r.hints_used for r in self.session_responses])
+        else:
+            session_accuracy = 0.0
+            avg_session_time = 0.0
+            hints_per_response = 0.0
+        # Learning gain calculation
+        if len(self.session_responses) >= 10:
+            early_accuracy = sum(1 for r in self.session_responses[:5] if r.correct) / 5
+            late_accuracy = sum(1 for r in self.session_responses[-5:] if r.correct) / 5
+            session_learning_gain = late_accuracy - early_accuracy
+        else:
+            session_learning_gain = 0.0
+        # Combine all metrics
+        research_metrics = {
+            "session_metrics": {
+                "duration_seconds": session_duration,
+                "total_responses": total_responses,
+                "accuracy": session_accuracy,
+                "avg_response_time": avg_session_time,
+                "hints_per_response": hints_per_response,
+                "learning_gain": session_learning_gain
+            },
+            "cumulative_metrics": tracer_metrics,
+            "knowledge_tracing": {
+                "tracked_skills": len(self.knowledge_tracer.skill_masteries),
+                "skill_masteries": {
+                    skill: {
+                        "theta": mastery.theta,
+                        "mastery_prob": self.knowledge_tracer.get_mastery_probability(skill),
+                        "practice_count": mastery.practice_count
+                    }
+                    for skill, mastery in self.knowledge_tracer.skill_masteries.items()
+                }
+            }
+        }
+        return research_metrics
+    def _evaluate_answer(self, user_answer: str, correct_answer: str) -> bool:
+        """Evaluate if user answer is correct."""
+        # Simple string comparison - can be enhanced with semantic matching
+        return user_answer.strip().lower() == correct_answer.strip().lower()
+    def _estimate_item_difficulty(self, skill: str, question: str) -> float:
+        """Estimate item difficulty based on skill and question complexity."""
+        # Base difficulty on skill type
+        skill_difficulties = {
+            "algebra_simplification": 0.3,
+            "linear_equations": 0.5,
+            "fraction_operations": 0.6,
+            "ratios": 0.5
+        }
+        base_difficulty = skill_difficulties.get(skill, 0.5)
+        # Adjust based on question length (proxy for complexity)
+        length_factor = min(len(question) / 100.0, 0.3)
+        return np.clip(base_difficulty + length_factor, 0.1, 0.9)

cog_tutor/cache.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import hashlib
+import sqlite3
+from pathlib import Path
+from typing import Optional
+_DB = Path(__file__).with_name('cog_cache.sqlite')
+def _conn():
+    con = sqlite3.connect(str(_DB))
+    con.execute('CREATE TABLE IF NOT EXISTS kv (k TEXT PRIMARY KEY, v TEXT)')
+    return con
+def make_key(*parts) -> str:
+    raw = '\u241f'.join(str(p) for p in parts)
+    return hashlib.sha256(raw.encode('utf-8')).hexdigest()
+def get(key: str) -> Optional[str]:
+    with _conn() as con:
+        cur = con.execute('SELECT v FROM kv WHERE k=?', (key,))
+        row = cur.fetchone()
+        return row[0] if row else None
+def set(key: str, value: str) -> None:
+    with _conn() as con:
+        con.execute('REPLACE INTO kv (k, v) VALUES (?, ?)', (key, value))

cog_tutor/inference.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import json
+from typing import Dict, Any
+from . import prompts
+from .schemas import (
+    ItemExplanationInput, ItemExplanationOutput,
+    MasteryDiagnosticInput, MasteryDiagnosticOutput,
+    NextItemSelectorInput, NextItemSelectorOutput,
+    SkillFeedbackInput, SkillFeedbackOutput,
+    HintGenerationInput, HintGenerationOutput,
+    ReflectionInput, ReflectionOutput,
+    InstructorInsightInput, InstructorInsightRow,
+    ExplanationCompressionInput, ExplanationCompressionOutput,
+    QuestionAuthoringInput, QuestionAuthoringOutput,
+    ToneNormalizerInput, ToneNormalizerOutput,
+)
+from .validation import parse_and_validate
+from .cache import make_key, get as cache_get, set as cache_set
+from .adapters.qwen_adapter import QwenAdapter
+PRESETS = {
+    'item_explanation': dict(temperature=0.2, max_tokens=256),
+    'mastery_diagnostic': dict(temperature=0.2, max_tokens=128),
+    'next_item_selector': dict(temperature=0.2, max_tokens=128),
+    'skill_feedback': dict(temperature=0.3, max_tokens=256),
+    'hint_generation': dict(temperature=0.6, max_tokens=200),
+    'reflection': dict(temperature=0.3, max_tokens=120),
+    'instructor_insight': dict(temperature=0.2, max_tokens=160),
+    'explanation_compression': dict(temperature=0.2, max_tokens=80),
+    'question_authoring': dict(temperature=0.6, max_tokens=400),
+    'tone_normalizer': dict(temperature=0.2, max_tokens=60),
+}
+SYSTEMS = {
+    'item_explanation': prompts.item_explanation,
+    'mastery_diagnostic': prompts.mastery_diagnostic,
+    'next_item_selector': prompts.next_item_selector,
+    'skill_feedback': prompts.skill_feedback,
+    'hint_generation': prompts.hint_generation,
+    'reflection': prompts.reflection,
+    'instructor_insight': prompts.instructor_insight,
+    'explanation_compression': prompts.explanation_compression,
+    'question_authoring': prompts.question_authoring,
+    'tone_normalizer': prompts.tone_normalizer,
+}
+INPUT_MODELS = {
+    'item_explanation': ItemExplanationInput,
+    'mastery_diagnostic': MasteryDiagnosticInput,
+    'next_item_selector': NextItemSelectorInput,
+    'skill_feedback': SkillFeedbackInput,
+    'hint_generation': HintGenerationInput,
+    'reflection': ReflectionInput,
+    'instructor_insight': InstructorInsightInput,
+    'explanation_compression': ExplanationCompressionInput,
+    'question_authoring': QuestionAuthoringInput,
+    'tone_normalizer': ToneNormalizerInput,
+}
+OUTPUT_MODELS = {
+    'item_explanation': ItemExplanationOutput,
+    'mastery_diagnostic': MasteryDiagnosticOutput,
+    'next_item_selector': NextItemSelectorOutput,
+    'skill_feedback': SkillFeedbackOutput,
+    'hint_generation': HintGenerationOutput,
+    'reflection': ReflectionOutput,
+    'instructor_insight': InstructorInsightRow,  # list validated separately
+    'explanation_compression': ExplanationCompressionOutput,
+    'question_authoring': QuestionAuthoringOutput,
+    'tone_normalizer': ToneNormalizerOutput,
+}
+_adapter = None
+SPECIAL_CACHE_KEYS = {'item_explanation', 'hint_generation'}
+def _get_adapter(model_id: str) -> QwenAdapter:
+    global _adapter
+    if _adapter is None:
+        _adapter = QwenAdapter(model_name=model_id)
+    return _adapter
+def _cache_key(prompt_name: str, input_data: Dict[str, Any], model_id: str, temperature: float) -> str:
+    special = None
+    if prompt_name in SPECIAL_CACHE_KEYS:
+        if prompt_name == 'item_explanation':
+            q = input_data.get('question', '')
+            ua = input_data.get('user_answer', '')
+            special = f"{q}\u241f{ua}"
+        elif prompt_name == 'hint_generation':
+            q = input_data.get('question', '')
+            special = q
+    base = json.dumps(input_data, sort_keys=True)
+    parts = [prompt_name, base, model_id, temperature, special or '-']
+    return make_key(*parts)
+def run_prompt(prompt_name: str, input_payload: Dict[str, Any], *, model_id: str = 'Qwen/Qwen3-7B-Instruct', seed: int = 42) -> Any:
+    if prompt_name not in PRESETS:
+        raise ValueError(f'Unknown prompt: {prompt_name}')
+    input_model = INPUT_MODELS[prompt_name]
+    parsed_input = input_model.parse_obj(input_payload)
+    preset = PRESETS[prompt_name]
+    ckey = _cache_key(prompt_name, parsed_input.dict(by_alias=True), model_id, preset['temperature'])
+    cached = cache_get(ckey)
+    if cached is not None:
+        return json.loads(cached)
+    # Get adapter with lazy initialization
+    adapter = _get_adapter(model_id)
+    system = SYSTEMS[prompt_name]()
+    user = json.dumps(parsed_input.dict(by_alias=True), ensure_ascii=False)
+    text = adapter.generate(
+        system=system,
+        user=f"Return JSON only. No commentary.\nInput: {user}",
+        temperature=preset['temperature'],
+        max_tokens=preset['max_tokens'],
+        stop=None,
+        seed=seed,
+    )
+    if prompt_name == 'instructor_insight':
+        data = json.loads(text)
+        if not isinstance(data, list):
+            raise ValueError('Expected a JSON array')
+        from .schemas import InstructorInsightRow
+        validated = [InstructorInsightRow.parse_obj(x).dict() for x in data]
+        out_obj = validated
+    else:
+        out_model = OUTPUT_MODELS[prompt_name]
+        out_obj = parse_and_validate(out_model, text)
+        # Handle RootModel (Pydantic v2)
+        if hasattr(out_obj, 'root'):
+            out_obj = out_obj.root
+        elif hasattr(out_obj, 'dict'):
+            out_obj = out_obj.dict(by_alias=True)
+        elif hasattr(out_obj, '__root__'):
+            out_obj = out_obj.__root__
+    cache_set(ckey, json.dumps(out_obj, ensure_ascii=False))
+    return out_obj

cog_tutor/knowledge_tracing.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import numpy as np
+import json
+from typing import Dict, List, Any, Tuple
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+import sqlite3
+@dataclass
+class SkillMastery:
+    skill: str
+    theta: float  # IRT ability parameter (-3 to +3)
+    sem: float    # Standard error of measurement
+    last_practiced: datetime
+    practice_count: int
+    success_rate: float
+@dataclass
+class ItemResponse:
+    item_id: str
+    skill: str
+    correct: bool
+    response_time: float
+    hints_used: int
+    difficulty: float
+    timestamp: datetime
+class KnowledgeTracer:
+    """Knowledge tracing system using Item Response Theory and Bayesian updating."""
+    def __init__(self, db_path: str = "knowledge_tracing.sqlite"):
+        self.db_path = db_path
+        self._init_database()
+        self.skill_masteries: Dict[str, SkillMastery] = {}
+        self.response_history: List[ItemResponse] = []
+    def _init_database(self):
+        """Initialize database for storing tracing data."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS skill_mastery (
+                    skill TEXT PRIMARY KEY,
+                    theta REAL DEFAULT 0.0,
+                    sem REAL DEFAULT 1.0,
+                    last_practiced TIMESTAMP,
+                    practice_count INTEGER DEFAULT 0,
+                    success_rate REAL DEFAULT 0.0
+                )
+            """)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS item_responses (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    item_id TEXT,
+                    skill TEXT,
+                    correct BOOLEAN,
+                    response_time REAL,
+                    hints_used INTEGER,
+                    difficulty REAL,
+                    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            conn.execute("""
+                CREATE INDEX IF NOT EXISTS idx_skill_responses ON item_responses(skill)
+            """)
+    def update_mastery(self, response: ItemResponse) -> float:
+        """Update skill mastery using Bayesian updating with IRT."""
+        skill = response.skill
+        # Load current mastery if exists
+        if skill not in self.skill_masteries:
+            self._load_skill_mastery(skill)
+        current = self.skill_masteries.get(skill, SkillMastery(
+            skill=skill, theta=0.0, sem=1.0,
+            last_practiced=datetime.now(),
+            practice_count=0, success_rate=0.0
+        ))
+        # IRT 2-parameter model update
+        # P(correct) = 1 / (1 + exp(-a*(theta - b)))
+        # where a = discrimination (fixed at 1.0), b = difficulty
+        # Calculate likelihood of response given current theta
+        logit = current.theta - response.difficulty
+        p_correct = 1.0 / (1.0 + np.exp(-logit))
+        # Bayesian update using response as evidence
+        # Posterior precision = prior precision + information
+        prior_precision = 1.0 / (current.sem ** 2)
+        # Information function for 2PL IRT
+        information = p_correct * (1 - p_correct)
+        posterior_precision = prior_precision + information
+        posterior_sem = np.sqrt(1.0 / posterior_precision)
+        # Update theta based on response
+        if response.correct:
+            # Correct response increases theta
+            theta_update = (current.theta / (current.sem ** 2) +
+                          information * response.difficulty) / posterior_precision
+        else:
+            # Incorrect response decreases theta
+            theta_update = (current.theta / (current.sem ** 2) -
+                          information * (1 - response.difficulty)) / posterior_precision
+        # Apply forgetting factor for time since last practice
+        days_since_practice = (response.timestamp - current.last_practiced).days
+        forgetting_factor = np.exp(-0.05 * days_since_practice)  # 5% decay per day
+        theta_update *= forgetting_factor
+        # Update mastery
+        updated = SkillMastery(
+            skill=skill,
+            theta=np.clip(theta_update, -3.0, 3.0),
+            sem=posterior_sem,
+            last_practiced=response.timestamp,
+            practice_count=current.practice_count + 1,
+            success_rate=self._update_success_rate(current.success_rate, current.practice_count, response.correct)
+        )
+        self.skill_masteries[skill] = updated
+        self.response_history.append(response)
+        # Save to database
+        self._save_skill_mastery(updated)
+        self._save_response(response)
+        return updated.theta
+    def _update_success_rate(self, current_rate: float, count: int, correct: bool) -> float:
+        """Update exponential moving average of success rate."""
+        alpha = 0.1  # Learning rate for EMA
+        if count == 0:
+            return 1.0 if correct else 0.0
+        return alpha * (1.0 if correct else 0.0) + (1 - alpha) * current_rate
+    def get_mastery_probability(self, skill: str) -> float:
+        """Convert theta to mastery probability (0-1 scale)."""
+        if skill not in self.skill_masteries:
+            self._load_skill_mastery(skill)
+        # Use default theta if skill not found
+        theta = self.skill_masteries.get(skill, SkillMastery(
+            skill=skill, theta=0.0, sem=1.0,
+            last_practiced=datetime.now(),
+            practice_count=0, success_rate=0.0
+        )).theta
+        # Logistic transformation: theta=0 -> 0.5, theta=+2 -> 0.88, theta=-2 -> 0.12
+        return 1.0 / (1.0 + np.exp(-theta))
+    def calculate_information_gain(self, skill: str, difficulty: float) -> float:
+        """Calculate expected information gain for an item."""
+        if skill not in self.skill_masteries:
+            self._load_skill_mastery(skill)
+        # Use default theta if skill not found
+        theta = self.skill_masteries.get(skill, SkillMastery(
+            skill=skill, theta=0.0, sem=1.0,
+            last_practiced=datetime.now(),
+            practice_count=0, success_rate=0.0
+        )).theta
+        # Expected information = I(theta) where I is Fisher information
+        logit = theta - difficulty
+        p_correct = 1.0 / (1.0 + np.exp(-logit))
+        information = p_correct * (1 - p_correct)
+        return information
+    def get_next_item_recommendations(self, candidate_items: List[Dict[str, Any]],
+                                     max_items: int = 5) -> List[Dict[str, Any]]:
+        """Recommend next items based on information gain and spacing."""
+        scored_items = []
+        for item in candidate_items:
+            skill = item['skill']
+            difficulty = item['difficulty']
+            # Calculate information gain
+            info_gain = self.calculate_information_gain(skill, difficulty)
+            # Calculate spacing benefit (higher for items not practiced recently)
+            if skill in self.skill_masteries:
+                days_since = (datetime.now() - self.skill_masteries[skill].last_practiced).days
+                spacing_bonus = min(days_since / 7.0, 1.0)  # Max bonus after 1 week
+            else:
+                spacing_bonus = 1.0  # New skill gets max bonus
+            # Calculate mastery urgency (higher for lower mastery)
+            mastery = self.get_mastery_probability(skill)
+            urgency = 1.0 - mastery
+            # Combined score
+            score = 0.4 * info_gain + 0.3 * spacing_bonus + 0.3 * urgency
+            scored_items.append({
+                **item,
+                'score': score,
+                'information_gain': info_gain,
+                'spacing_bonus': spacing_bonus,
+                'urgency': urgency,
+                'current_mastery': mastery
+            })
+        # Sort by score and return top items
+        scored_items.sort(key=lambda x: x['score'], reverse=True)
+        return scored_items[:max_items]
+    def get_research_metrics(self, skill: str = None) -> Dict[str, Any]:
+        """Calculate research metrics for evaluation."""
+        if skill:
+            responses = [r for r in self.response_history if r.skill == skill]
+        else:
+            responses = self.response_history
+        if not responses:
+            return {}
+        # Basic metrics
+        total_responses = len(responses)
+        correct_responses = sum(1 for r in responses if r.correct)
+        accuracy = correct_responses / total_responses
+        # Time metrics
+        avg_response_time = np.mean([r.response_time for r in responses])
+        # Hint metrics
+        hints_per_response = np.mean([r.hints_used for r in responses])
+        # Learning gain (compare first vs last 10 responses)
+        if len(responses) >= 20:
+            early_responses = responses[:10]
+            late_responses = responses[-10:]
+            early_accuracy = sum(1 for r in early_responses if r.correct) / len(early_responses)
+            late_accuracy = sum(1 for r in late_responses if r.correct) / len(late_responses)
+            learning_gain = late_accuracy - early_accuracy
+        else:
+            learning_gain = 0.0
+        # Retention (performance on items practiced > 3 days ago)
+        retention_items = [r for r in responses
+                          if (datetime.now() - r.timestamp).days > 3]
+        if retention_items:
+            retention_rate = sum(1 for r in retention_items if r.correct) / len(retention_items)
+        else:
+            retention_rate = None
+        return {
+            'total_responses': total_responses,
+            'accuracy': accuracy,
+            'avg_response_time': avg_response_time,
+            'hints_per_response': hints_per_response,
+            'learning_gain': learning_gain,
+            'retention_rate': retention_rate,
+            'skill_masteries': len(self.skill_masteries)
+        }
+    def _load_skill_mastery(self, skill: str):
+        """Load skill mastery from database."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute(
+                "SELECT * FROM skill_mastery WHERE skill = ?", (skill,)
+            )
+            row = cursor.fetchone()
+            if row:
+                self.skill_masteries[skill] = SkillMastery(
+                    skill=row['skill'],
+                    theta=row['theta'],
+                    sem=row['sem'],
+                    last_practiced=datetime.fromisoformat(row['last_practiced']),
+                    practice_count=row['practice_count'],
+                    success_rate=row['success_rate']
+                )
+    def _save_skill_mastery(self, mastery: SkillMastery):
+        """Save skill mastery to database."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO skill_mastery
+                (skill, theta, sem, last_practiced, practice_count, success_rate)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (
+                mastery.skill,
+                mastery.theta,
+                mastery.sem,
+                mastery.last_practiced.isoformat(),
+                mastery.practice_count,
+                mastery.success_rate
+            ))
+    def _save_response(self, response: ItemResponse):
+        """Save item response to database."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT INTO item_responses
+                (item_id, skill, correct, response_time, hints_used, difficulty, timestamp)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """, (
+                response.item_id,
+                response.skill,
+                response.correct,
+                response.response_time,
+                response.hints_used,
+                response.difficulty,
+                response.timestamp.isoformat()
+            ))

cog_tutor/prompts.py ADDED Viewed

	@@ -0,0 +1,60 @@

+def item_explanation() -> str:
+    return (
+        "You are a tutoring engine for short-form questions. Given a question, user answer, and correct solution, "
+        "explain the reasoning step-by-step in plain language. Output three tiers: Hint, Guided reasoning, Full explanation. "
+        "Never invent new facts beyond the item’s text. Return JSON with keys hint, guided, full."
+    )
+def mastery_diagnostic() -> str:
+    return (
+        "You estimate mastery of a single skill from the last 10 responses. "
+        "Consider correctness, response time, and hint usage. Output a float 0–1 and one-sentence rationale. "
+        "Return JSON with keys mastery, comment."
+    )
+def next_item_selector() -> str:
+    return (
+        "You are a learning planner. Choose the next item that maximizes expected learning gain. "
+        "Prioritize skills with low mastery and overdue reviews. Return item_id and reason as JSON."
+    )
+def skill_feedback() -> str:
+    return (
+        "Summarize a learner’s progress across all skills. Highlight top 3 strengths and 3 weaknesses. "
+        "Give one actionable tip per weakness. Output concise JSON with strengths and weaknesses."
+    )
+def hint_generation() -> str:
+    return (
+        "Provide a tiered hint sequence for a given question. Level 1: conceptual nudge. Level 2: procedural cue. "
+        "Level 3: near-solution scaffold. Do not reveal the final answer. Return JSON keys '1','2','3'."
+    )
+def reflection() -> str:
+    return (
+        "After each session, guide the learner to reflect. Ask one self-evaluation question and one improvement question. "
+        "Keep tone neutral and constructive. Return JSON with reflection and improvement."
+    )
+def instructor_insight() -> str:
+    return (
+        "You analyze cohort data and surface anomalies for teachers. Detect items with low discrimination or poor fit. "
+        "Suggest review actions. Return JSON array of objects with item_id and flag."
+    )
+def explanation_compression() -> str:
+    return (
+        "Convert a long explanation into a single 2-line recap focused on rule application. "
+        "Keep syntax minimal, grade-appropriate. Return JSON with recap."
+    )
+def question_authoring() -> str:
+    return (
+        "Generate 5 original practice items for a given skill. Each must include the correct answer and a short rationale. "
+        "Output JSON array with objects having q, a, why."
+    )
+def tone_normalizer() -> str:
+    return (
+        "Rewrite AI feedback to be neutral, factual, and non-emotional. Keep it under 20 words. Return JSON with normalized."
+    )

cog_tutor/rag/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .retriever import KnowledgeRetriever
+from .knowledge_base import KnowledgeBase
+from .rag_prompts import RAGEnhancedPrompts
+__all__ = ["KnowledgeRetriever", "KnowledgeBase", "RAGEnhancedPrompts"]

cog_tutor/rag/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (374 Bytes). View file

cog_tutor/rag/__pycache__/knowledge_base.cpython-313.pyc ADDED Viewed

Binary file (8.42 kB). View file

cog_tutor/rag/__pycache__/rag_prompts.cpython-313.pyc ADDED Viewed

Binary file (4.42 kB). View file

cog_tutor/rag/__pycache__/retriever.cpython-313.pyc ADDED Viewed

Binary file (6.25 kB). View file

cog_tutor/rag/knowledge_base.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import json
+import hashlib
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+import sqlite3
+class KnowledgeBase:
+    """Knowledge base for educational content with fact-grounded explanations."""
+    def __init__(self, db_path: str = "knowledge_base.sqlite"):
+        self.db_path = db_path
+        self._init_database()
+        self._load_sample_content()
+    def _init_database(self):
+        """Initialize SQLite database for knowledge storage."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS knowledge_items (
+                    id TEXT PRIMARY KEY,
+                    skill TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    facts TEXT NOT NULL,
+                    difficulty REAL DEFAULT 0.5,
+                    prerequisite_skills TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            conn.execute("""
+                CREATE INDEX IF NOT EXISTS idx_skill ON knowledge_items(skill)
+            """)
+    def _load_sample_content(self):
+        """Load sample educational content for testing."""
+        sample_items = [
+            {
+                "id": "algebra_simplify_001",
+                "skill": "algebra_simplification",
+                "content": "To simplify algebraic expressions, combine like terms by adding or subtracting coefficients of the same variable. For example, 3x + 2x = 5x.",
+                "facts": [
+                    "Like terms have the same variable raised to the same power",
+                    "Coefficients of like terms can be combined through addition",
+                    "The variable part remains unchanged when combining like terms"
+                ],
+                "difficulty": 0.3,
+                "prerequisite_skills": ["basic_arithmetic", "variables"]
+            },
+            {
+                "id": "algebra_simplify_002",
+                "skill": "algebra_simplification",
+                "content": "When simplifying expressions with division, first combine like terms in the numerator, then divide by the denominator. Example: (3x + 2x) / 5 = 5x / 5 = x.",
+                "facts": [
+                    "Division applies to the entire expression",
+                    "Simplify numerator before dividing",
+                    "A term divided by itself equals 1"
+                ],
+                "difficulty": 0.5,
+                "prerequisite_skills": ["algebra_simplification", "division"]
+            },
+            {
+                "id": "linear_eq_001",
+                "skill": "linear_equations",
+                "content": "To solve linear equations, isolate the variable by performing inverse operations. Add/subtract to isolate the variable term, then multiply/divide to solve for the variable.",
+                "facts": [
+                    "Inverse operations undo each other (addition ↔ subtraction, multiplication ↔ division)",
+                    "Apply the same operation to both sides to maintain equality",
+                    "Goal is to isolate the variable on one side"
+                ],
+                "difficulty": 0.4,
+                "prerequisite_skills": ["algebra_simplification"]
+            },
+            {
+                "id": "fraction_div_001",
+                "skill": "fraction_operations",
+                "content": "To divide fractions, multiply by the reciprocal of the second fraction. The reciprocal of a/b is b/a.",
+                "facts": [
+                    "Division is equivalent to multiplication by the reciprocal",
+                    "Reciprocal flips numerator and denominator",
+                    "Multiply numerators together and denominators together"
+                ],
+                "difficulty": 0.6,
+                "prerequisite_skills": ["fraction_multiplication"]
+            },
+            {
+                "id": "ratio_001",
+                "skill": "ratios",
+                "content": "Ratios compare quantities. To solve ratio problems, set up proportions and cross-multiply. a:b = c:d means a×d = b×c.",
+                "facts": [
+                    "Ratios show relative sizes of quantities",
+                    "Equivalent ratios have the same value when simplified",
+                    "Cross-multiplication solves proportion equations"
+                ],
+                "difficulty": 0.5,
+                "prerequisite_skills": ["proportions"]
+            }
+        ]
+        with sqlite3.connect(self.db_path) as conn:
+            for item in sample_items:
+                conn.execute("""
+                    INSERT OR REPLACE INTO knowledge_items
+                    (id, skill, content, facts, difficulty, prerequisite_skills)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                """, (
+                    item["id"],
+                    item["skill"],
+                    item["content"],
+                    json.dumps(item["facts"]),
+                    item["difficulty"],
+                    json.dumps(item["prerequisite_skills"])
+                ))
+    def retrieve_by_skill(self, skill: str, limit: int = 3) -> List[Dict[str, Any]]:
+        """Retrieve knowledge items for a specific skill."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute("""
+                SELECT * FROM knowledge_items
+                WHERE skill = ? OR skill LIKE ?
+                ORDER BY difficulty ASC
+                LIMIT ?
+            """, (skill, f"%{skill}%", limit))
+            results = []
+            for row in cursor.fetchall():
+                results.append({
+                    "id": row["id"],
+                    "skill": row["skill"],
+                    "content": row["content"],
+                    "facts": json.loads(row["facts"]),
+                    "difficulty": row["difficulty"],
+                    "prerequisite_skills": json.loads(row["prerequisite_skills"])
+                })
+            return results
+    def retrieve_by_query(self, query: str, limit: int = 3) -> List[Dict[str, Any]]:
+        """Retrieve knowledge items based on text search."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute("""
+                SELECT * FROM knowledge_items
+                WHERE content LIKE ? OR skill LIKE ?
+                ORDER BY difficulty ASC
+                LIMIT ?
+            """, (f"%{query}%", f"%{query}%", limit))
+            results = []
+            for row in cursor.fetchall():
+                results.append({
+                    "id": row["id"],
+                    "skill": row["skill"],
+                    "content": row["content"],
+                    "facts": json.loads(row["facts"]),
+                    "difficulty": row["difficulty"],
+                    "prerequisite_skills": json.loads(row["prerequisite_skills"])
+                })
+            return results
+    def add_knowledge_item(self, item: Dict[str, Any]):
+        """Add a new knowledge item to the database."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO knowledge_items
+                (id, skill, content, facts, difficulty, prerequisite_skills)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (
+                item["id"],
+                item["skill"],
+                item["content"],
+                json.dumps(item["facts"]),
+                item["difficulty"],
+                json.dumps(item.get("prerequisite_skills", []))
+            ))

cog_tutor/rag/rag_prompts.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from typing import Dict, Any, List
+class RAGEnhancedPrompts:
+    """RAG-enhanced prompts with knowledge grounding and citations."""
+    @staticmethod
+    def item_explanation_with_rag() -> str:
+        return """You are a tutoring engine for short-form questions with access to educational knowledge.
+Given a question, user answer, correct solution, and relevant facts from the knowledge base, explain the reasoning step-by-step in plain language.
+IMPORTANT:
+- Use ONLY the provided facts to build your explanation
+- Cite the knowledge sources using [Source X] notation
+- Output three tiers: Hint, Guided reasoning, Full explanation
+- Never invent new facts beyond the provided knowledge
+Output JSON with keys: hint, guided, full, citations
+Example citation format: "Combine like terms by adding coefficients [Source 1]." """
+    @staticmethod
+    def hint_generation_with_rag() -> str:
+        return """You are generating hints using educational knowledge.
+Given a question and relevant facts, provide a tiered hint sequence:
+- Level 1: conceptual nudge using the facts
+- Level 2: procedural cue based on the knowledge
+- Level 3: near-solution scaffold
+IMPORTANT:
+- Use ONLY the provided facts
+- Do not reveal the final answer
+- Cite sources using [Source X]
+Return JSON with keys '1','2','3' and include citations in each hint."""
+    @staticmethod
+    def adaptive_question_generation() -> str:
+        return """You are generating adaptive practice questions based on student performance.
+Given a skill, mastery level, and knowledge content, create a question that:
+- Matches the student's current mastery (difficulty = 1 - mastery)
+- Uses concepts from the provided knowledge
+- Includes the correct answer and explanation
+Output JSON with keys: question, answer, explanation, difficulty, skill"""
+    @staticmethod
+    def next_item_selector_with_entropy() -> str:
+        return """You are a learning planner using entropy-based scheduling.
+Given candidate items, student mastery, and recent performance, select the next item that:
+- Maximizes expected learning gain (high information gain for uncertain skills)
+- Balances review and new content
+- Considers prerequisite relationships
+Return JSON with keys: item_id, reason, expected_gain, information_gain"""
+    @staticmethod
+    def mastery_diagnostic_with_irt() -> str:
+        return """You are estimating mastery using Item Response Theory (IRT).
+Given skill performance data including:
+- Response accuracy
+- Item difficulty
+- Response time
+- Hint usage
+Estimate:
+- Theta (ability parameter): -3 to +3 scale
+- Standard error of measurement
+- Mastery probability (0-1)
+Return JSON with keys: theta, sem, mastery, confidence_interval"""
+    @staticmethod
+    def research_metrics() -> str:
+        return """You are calculating research metrics for learning analytics.
+Given session data, compute:
+- Learning gain (pre/post mastery difference)
+- Retention rate (accuracy on review items)
+- Hint efficiency (hints per correct answer)
+- Time on task
+- Knowledge transfer (cross-skill performance)
+Return JSON with all metrics and statistical significance where applicable."""

cog_tutor/rag/retriever.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import hashlib
+import sqlite3
+from typing import List, Dict, Any, Tuple
+from .knowledge_base import KnowledgeBase
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+class KnowledgeRetriever:
+    """Retrieval-augmented generation system for educational content."""
+    def __init__(self, knowledge_base: KnowledgeBase):
+        self.kb = knowledge_base
+        self.vectorizer = TfidfVectorizer(
+            stop_words='english',
+            ngram_range=(1, 2),
+            max_features=1000
+        )
+        self._build_index()
+    def _build_index(self):
+        """Build TF-IDF index for semantic search."""
+        # Get all knowledge items
+        all_items = []
+        with sqlite3.connect(self.kb.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute("SELECT * FROM knowledge_items")
+            for row in cursor.fetchall():
+                all_items.append({
+                    "id": row["id"],
+                    "skill": row["skill"],
+                    "content": row["content"],
+                    "facts": eval(row["facts"]),
+                    "difficulty": row["difficulty"]
+                })
+        self.all_items = all_items
+        # Build corpus for vectorization
+        corpus = []
+        for item in self.all_items:
+            text = f"{item['skill']} {item['content']} {' '.join(item['facts'])}"
+            corpus.append(text)
+        # Fit vectorizer
+        self.tfidf_matrix = self.vectorizer.fit_transform(corpus)
+    def retrieve_relevant_knowledge(self, query: str, skill: str = None, top_k: int = 3) -> List[Dict[str, Any]]:
+        """Retrieve relevant knowledge items for a query."""
+        # If skill is specified, prioritize skill-specific items
+        if skill:
+            skill_items = self.kb.retrieve_by_skill(skill, limit=top_k)
+            if len(skill_items) >= top_k:
+                return skill_items[:top_k]
+        # Use semantic search
+        query_vec = self.vectorizer.transform([query])
+        similarities = cosine_similarity(query_vec, self.tfidf_matrix).flatten()
+        # Get top-k most similar items
+        top_indices = np.argsort(similarities)[-top_k:][::-1]
+        results = []
+        for idx in top_indices:
+            if similarities[idx] > 0.1:  # Threshold for relevance
+                item = self.all_items[idx].copy()
+                item["relevance_score"] = float(similarities[idx])
+                results.append(item)
+        return results
+    def get_facts_for_explanation(self, question: str, user_answer: str, solution: str) -> List[str]:
+        """Extract relevant facts for explaining a problem."""
+        query = f"{question} {solution}"
+        relevant_items = self.retrieve_relevant_knowledge(query, top_k=5)
+        # Collect and deduplicate facts
+        all_facts = []
+        seen_facts = set()
+        for item in relevant_items:
+            for fact in item["facts"]:
+                if fact not in seen_facts:
+                    all_facts.append(fact)
+                    seen_facts.add(fact)
+        return all_facts[:5]  # Return top 5 most relevant facts
+    def get_contextual_hints(self, question: str, hint_level: int = 1) -> List[str]:
+        """Generate contextual hints based on retrieved knowledge."""
+        relevant_items = self.retrieve_relevant_knowledge(question, top_k=3)
+        if hint_level == 1:
+            # Conceptual nudge
+            hints = [item["content"].split('.')[0] + "." for item in relevant_items]
+        elif hint_level == 2:
+            # Procedural cue
+            hints = [item["content"] for item in relevant_items]
+        else:
+            # Near-solution scaffold
+            hints = []
+            for item in relevant_items:
+                for fact in item["facts"]:
+                    if "step" in fact.lower() or "method" in fact.lower():
+                        hints.append(fact)
+        return hints[:3]
+    def get_explanation_with_citations(self, question: str, user_answer: str, solution: str) -> Dict[str, Any]:
+        """Generate explanation with knowledge citations."""
+        facts = self.get_facts_for_explanation(question, user_answer, solution)
+        relevant_items = self.retrieve_relevant_knowledge(f"{question} {solution}", top_k=3)
+        return {
+            "facts": facts,
+            "citations": [{"id": item["id"], "skill": item["skill"]} for item in relevant_items],
+            "sources": [item["content"] for item in relevant_items]
+        }

cog_tutor/schemas.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from typing import List, Dict, Any
+from pydantic import BaseModel, Field, confloat, conlist, RootModel
+class ItemExplanationInput(BaseModel):
+    question: str
+    user_answer: str
+    solution: str
+class ItemExplanationOutput(BaseModel):
+    hint: str
+    guided: str
+    full: str
+class MasteryDiagEvent(BaseModel):
+    correct: bool
+    rt: int
+    hints: int
+class MasteryDiagnosticInput(BaseModel):
+    skill: str
+    history: conlist(MasteryDiagEvent, min_length=1, max_length=50)
+class MasteryDiagnosticOutput(BaseModel):
+    mastery: confloat(ge=0.0, le=1.0)
+    comment: str
+class NextItemCandidate(BaseModel):
+    item_id: str
+    skill: str
+    p_correct: confloat(ge=0.0, le=1.0)
+    due: bool
+class NextItemSelectorInput(BaseModel):
+    user_id: str
+    candidates: conlist(NextItemCandidate, min_length=1)
+class NextItemSelectorOutput(BaseModel):
+    item_id: str
+    reason: str
+class SkillMastery(BaseModel):
+    name: str
+    mastery: confloat(ge=0.0, le=1.0)
+class SkillFeedbackInput(BaseModel):
+    skills: conlist(SkillMastery, min_length=1)
+class SkillWeakness(BaseModel):
+    skill: str
+    tip: str
+class SkillFeedbackOutput(BaseModel):
+    strengths: List[str]
+    weaknesses: List[SkillWeakness]
+class HintGenerationInput(BaseModel):
+    question: str
+class HintGenerationOutput(BaseModel):
+    field_1: str = Field(alias='1')
+    field_2: str = Field(alias='2')
+    field_3: str = Field(alias='3')
+    class Config:
+        populate_by_name = True
+class ReflectionInput(BaseModel):
+    session: Dict[str, Any]
+class ReflectionOutput(BaseModel):
+    reflection: str
+    improvement: str
+class InstructorItem(BaseModel):
+    id: str
+    discrimination: float
+    accuracy: confloat(ge=0.0, le=1.0)
+class InstructorInsightInput(BaseModel):
+    items: conlist(InstructorItem, min_length=1)
+class InstructorInsightRow(BaseModel):
+    item_id: str
+    flag: str
+class ExplanationCompressionInput(BaseModel):
+    explanation: str
+class ExplanationCompressionOutput(BaseModel):
+    recap: str
+class QuestionAuthoringInput(BaseModel):
+    skill: str
+    difficulty: str
+class QAItem(BaseModel):
+    q: str
+    a: str
+    why: str
+class QuestionAuthoringOutput(RootModel[List[QAItem]]):
+    pass
+class ToneNormalizerInput(BaseModel):
+    raw: str
+class ToneNormalizerOutput(BaseModel):
+    normalized: str

cog_tutor/validation.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import json
+from typing import Type, Any
+from pydantic import BaseModel
+def parse_and_validate(model: Type[BaseModel], text: str) -> Any:
+    data = json.loads(text)
+    # Support pydantic v1 and v2
+    if hasattr(model, 'model_validate'):
+        return model.model_validate(data)
+    return model.parse_obj(data)

cognitive_llm.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from typing import Optional, Dict, Any
+class CognitiveLLM:
+    def __init__(self, model_name: str = "Qwen/Qwen3-7B-Instruct", device: str = None):
+        """
+        Initialize the Cognitive LLM with the specified model.
+        Args:
+            model_name: Name of the model to use (default: Qwen/Qwen3-7B-Instruct)
+            device: Device to run the model on ('cuda', 'mps', or 'cpu'). Auto-detects if None.
+        """
+        self.model_name = model_name
+        self.device = device if device else 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+        print(f"Loading {model_name} on {self.device}...")
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        # Load model with 4-bit quantization for efficiency
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map="auto",
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16,
+            attn_implementation="flash_attention_2" if self.device.startswith('cuda') else None,
+            load_in_4bit=True
+        )
+        # Create text generation pipeline
+        self.pipe = pipeline(
+            "text-generation",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device_map="auto"
+        )
+        print(f"Model {model_name} loaded successfully on {self.device}")
+    def generate(
+        self,
+        prompt: str,
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        **generation_kwargs
+    ) -> str:
+        """
+        Generate text from a prompt using the loaded model.
+        Args:
+            prompt: Input text prompt
+            max_new_tokens: Maximum number of tokens to generate
+            temperature: Sampling temperature (lower = more focused, higher = more creative)
+            top_p: Nucleus sampling parameter
+            **generation_kwargs: Additional generation parameters
+        Returns:
+            Generated text
+        """
+        # Format the prompt for Qwen3 chat
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        # Generate response
+        response = self.pipe(
+            messages,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            **generation_kwargs
+        )
+        # Extract and return the generated text
+        return response[0]["generated_text"][-1]["content"]
+def main():
+    # Initialize the cognitive LLM
+    llm = CognitiveLLM()
+    print("\nCognitive LLM initialized. Type 'quit' to exit.")
+    print("Enter your prompt:")
+    # Interactive loop
+    while True:
+        try:
+            user_input = input(">> ")
+            if user_input.lower() in ['quit', 'exit', 'q']:
+                break
+            if user_input.strip() == '':
+                continue
+            # Generate response
+            response = llm.generate(user_input)
+            print("\nResponse:")
+            print(response)
+            print("\n---\nEnter another prompt or 'quit' to exit:")
+        except KeyboardInterrupt:
+            print("\nExiting...")
+            break
+        except Exception as e:
+            print(f"\nError: {str(e)}")
+            continue
+if __name__ == "__main__":
+    main()

knowledge_base.sqlite ADDED Viewed

Binary file (16.4 kB). View file

knowledge_tracing.sqlite ADDED Viewed

Binary file (24.6 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers>=4.36.0
+torch>=2.0.0
+sentencepiece
+accelerate
+bitsandbytes
+pydantic>=2.5.0

research_output.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "user_id": "sunny_test",
+  "session_start": "2025-11-01T22:59:20.445840",
+  "metrics": {
+    "session_metrics": {
+      "duration_seconds": 2.298408,
+      "total_responses": 6,
+      "accuracy": 0.5,
+      "avg_response_time": 2.3333723147710166,
+      "hints_per_response": 0.6666666666666666,
+      "learning_gain": 0.0
+    },
+    "cumulative_metrics": {
+      "total_responses": 6,
+      "accuracy": 0.5,
+      "avg_response_time": 2.3333723147710166,
+      "hints_per_response": 0.6666666666666666,
+      "learning_gain": 0.0,
+      "retention_rate": null,
+      "skill_masteries": 1
+    },
+    "knowledge_tracing": {
+      "tracked_skills": 1,
+      "skill_masteries": {
+        "algebra_simplification": {
+          "theta": 0.014428777179973144,
+          "mastery_prob": 0.503607131714598,
+          "practice_count": 7
+        }
+      }
+    }
+  },
+  "session_responses": [
+    {
+      "item_id": "test_001",
+      "skill": "algebra_simplification",
+      "correct": false,
+      "response_time": 2.0002338886260986,
+      "hints_used": 1,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.449109"
+    },
+    {
+      "item_id": "test_002",
+      "skill": "algebra_simplification",
+      "correct": false,
+      "response_time": 3.0,
+      "hints_used": 2,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.518361"
+    },
+    {
+      "item_id": "test_003",
+      "skill": "algebra_simplification",
+      "correct": false,
+      "response_time": 2.7,
+      "hints_used": 1,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.562224"
+    },
+    {
+      "item_id": "test_004",
+      "skill": "algebra_simplification",
+      "correct": true,
+      "response_time": 2.4,
+      "hints_used": 0,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.603972"
+    },
+    {
+      "item_id": "test_005",
+      "skill": "algebra_simplification",
+      "correct": true,
+      "response_time": 2.1,
+      "hints_used": 0,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.648160"
+    },
+    {
+      "item_id": "test_006",
+      "skill": "algebra_simplification",
+      "correct": true,
+      "response_time": 1.8,
+      "hints_used": 0,
+      "difficulty": 0.6,
+      "timestamp": "2025-11-01T22:59:22.693063"
+    }
+  ]
+}

test_cog_tutor.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Test the cog_tutor package imports correctly
+try:
+    from cog_tutor import run_prompt
+    print("SUCCESS: cog_tutor package imported correctly")
+    print("The package is ready to use with your Qwen model.")
+    print("\nTo test with a specific model, run:")
+    print("  from cog_tutor import run_prompt")
+    print("  output = run_prompt('item_explanation', {...}, model_id='your-model-id')")
+    print("\nMake sure you have proper Hugging Face authentication if using gated models.")
+except Exception as e:
+    print(f"ERROR: Failed to import cog_tutor: {e}")

test_rag_tutor.py ADDED Viewed

	@@ -0,0 +1,191 @@

+#!/usr/bin/env python3
+"""
+Test script for RAG-enhanced Cognitive Tutor system.
+Demonstrates adaptive questioning, knowledge tracing, and research metrics.
+"""
+from cog_tutor.adaptive_tutor import AdaptiveTutor
+import json
+import time
+def test_rag_enhanced_tutor():
+    """Test the complete RAG-enhanced tutoring system."""
+    print("=== RAG-Enhanced Cognitive Tutor Test ===\n")
+    # Initialize tutor
+    tutor = AdaptiveTutor(user_id="sunny_test")
+    # Test 1: Generate adaptive question
+    print("1. Testing Adaptive Question Generation")
+    print("-" * 40)
+    question = tutor.generate_adaptive_question("algebra_simplification")
+    print(f"Generated Question: {question['question']}")
+    print(f"Expected Difficulty: {question['difficulty']:.2f}")
+    print(f"Knowledge Sources: {question['knowledge_sources']}")
+    print()
+    # Test 2: Process student response
+    print("2. Testing Student Response Processing")
+    print("-" * 40)
+    # Simulate student response
+    start_time = time.time()
+    time.sleep(2)  # Simulate thinking time
+    response_time = time.time() - start_time
+    result = tutor.process_student_response(
+        item_id="test_001",
+        skill="algebra_simplification",
+        question=question['question'],
+        user_answer="5x",  # Incorrect answer
+        correct_answer="x",
+        response_time=response_time,
+        hints_used=1
+    )
+    print(f"Response Correct: {result['correct']}")
+    print(f"Mastery Theta: {result['mastery_theta']:.3f}")
+    print(f"Mastery Probability: {result['mastery_probability']:.3f}")
+    print(f"Explanation Hint: {result['explanation']['hint']}")
+    print()
+    # Test 3: Generate contextual hints
+    print("3. Testing Contextual Hint Generation")
+    print("-" * 40)
+    hints = tutor.generate_adaptive_hints(question['question'], hint_level=2)
+    for i, hint in enumerate(hints, 1):
+        print(f"Hint {i}: {hint}")
+    print()
+    # Test 4: Get next item recommendations
+    print("4. Testing Next Item Recommendations")
+    print("-" * 40)
+    recommendations = tutor.get_next_items("algebra_simplification", max_items=3)
+    for i, rec in enumerate(recommendations, 1):
+        print(f"Recommendation {i}:")
+        print(f"  Item: {rec['item_id']}")
+        print(f"  Skill: {rec['skill']}")
+        print(f"  Score: {rec['score']:.3f}")
+        print(f"  Information Gain: {rec['information_gain']:.3f}")
+        print(f"  Current Mastery: {rec['current_mastery']:.3f}")
+        print()
+    # Test 5: Evaluate mastery with IRT
+    print("5. Testing IRT Mastery Evaluation")
+    print("-" * 40)
+    irt_evaluation = tutor.evaluate_mastery_with_irt("algebra_simplification")
+    print(f"Theta (Ability): {irt_evaluation['theta']:.3f}")
+    print(f"Standard Error: {irt_evaluation['sem']:.3f}")
+    print(f"Mastery Probability: {irt_evaluation['mastery']:.3f}")
+    print(f"95% CI: [{irt_evaluation['confidence_interval'][0]:.2f}, {irt_evaluation['confidence_interval'][1]:.2f}]")
+    print()
+    # Test 6: Simulate multiple responses for learning metrics
+    print("6. Testing Learning Progress Simulation")
+    print("-" * 40)
+    # Simulate 5 more responses
+    for i in range(5):
+        # Generate question
+        q = tutor.generate_adaptive_question("algebra_simplification")
+        # Simulate improving performance
+        correct = i >= 2  # Get correct after 3 attempts
+        result = tutor.process_student_response(
+            item_id=f"test_{i+2:03d}",
+            skill="algebra_simplification",
+            question=q['question'],
+            user_answer="x" if correct else "5x",
+            correct_answer="x",
+            response_time=3.0 - i * 0.3,  # Get faster
+            hints_used=max(0, 2 - i)  # Use fewer hints
+        )
+        print(f"Response {i+1}: Correct={result['correct']}, Mastery={result['mastery_probability']:.3f}")
+    print()
+    # Test 7: Get comprehensive research metrics
+    print("7. Testing Research Metrics")
+    print("-" * 40)
+    metrics = tutor.get_research_metrics()
+    print("Session Metrics:")
+    session = metrics['session_metrics']
+    print(f"  Duration: {session['duration_seconds']:.1f}s")
+    print(f"  Total Responses: {session['total_responses']}")
+    print(f"  Accuracy: {session['accuracy']:.3f}")
+    print(f"  Learning Gain: {session['learning_gain']:.3f}")
+    print("\nCumulative Metrics:")
+    cumulative = metrics['cumulative_metrics']
+    if cumulative:
+        print(f"  Total Responses: {cumulative.get('total_responses', 0)}")
+        print(f"  Accuracy: {cumulative.get('accuracy', 0):.3f}")
+        print(f"  Retention Rate: {cumulative.get('retention_rate', 'N/A')}")
+    print("\nKnowledge Tracing:")
+    kt = metrics['knowledge_tracing']
+    print(f"  Tracked Skills: {kt['tracked_skills']}")
+    for skill, data in kt['skill_masteries'].items():
+        print(f"  {skill}: θ={data['theta']:.2f}, mastery={data['mastery_prob']:.3f}")
+    print()
+    # Test 8: Test RAG knowledge retrieval
+    print("8. Testing Knowledge Retrieval")
+    print("-" * 40)
+    # Test fact retrieval
+    facts = tutor.retriever.get_facts_for_explanation(
+        "Simplify (3x + 2x) / 5",
+        "5x",
+        "x"
+    )
+    print("Retrieved Facts for Explanation:")
+    for i, fact in enumerate(facts, 1):
+        print(f"  {i}. {fact}")
+    print()
+    # Test 9: Save metrics to file for research analysis
+    print("9. Saving Research Data")
+    print("-" * 40)
+    research_data = {
+        "user_id": tutor.user_id,
+        "session_start": tutor.session_start.isoformat(),
+        "metrics": metrics,
+        "session_responses": [
+            {
+                "item_id": r.item_id,
+                "skill": r.skill,
+                "correct": r.correct,
+                "response_time": r.response_time,
+                "hints_used": r.hints_used,
+                "difficulty": r.difficulty,
+                "timestamp": r.timestamp.isoformat()
+            }
+            for r in tutor.session_responses
+        ]
+    }
+    with open("research_output.json", "w") as f:
+        json.dump(research_data, f, indent=2)
+    print("Research data saved to 'research_output.json'")
+    print()
+    print("=== Test Complete ===")
+    print("RAG-enhanced Cognitive Tutor is ready for deployment!")
+if __name__ == "__main__":
+    test_rag_enhanced_tutor()