import logging
from pathlib import Path
from typing import List, Tuple

from .llm import LLMClient
from .config import GEN_MODEL

logger = logging.getLogger(__name__)


def _truncate(text: str, max_chars: int) -> str:
    """
    Hard truncate long strings to keep the prompt under the LLM TPM limit.
    """
    if len(text) <= max_chars:
        return text
    # leave a small suffix to show that it was truncated
    return text[: max_chars - 32] + "... [TRUNCATED]"


def format_docs_with_keys(
    documents_sentences: List[List[Tuple[str, str]]]
) -> str:
    """
    Format retrieved documents for the generator prompt.

    documents_sentences: list over documents;
      each document is a list of (key, sentence) pairs.
    """
    blocks: List[str] = []
    for doc in documents_sentences:
        for key, sentence in doc:
            blocks.append(f"{key}: {sentence}")
        # blank line between documents
        blocks.append("")
    return "\n".join(blocks).strip()


class RAGGenerator:
    """
    RAG answer generator.

    - Builds a prompt using a template + retrieved documents
    - Truncates context so the request fits Groq's TPM limit
    - Calls the LLM via LLMClient
    - On any LLM error (including 413 / rate_limit_exceeded), returns
      a safe fallback answer string instead of raising.
    """

    def __init__(self, client: LLMClient | None = None) -> None:
        self.client = client or LLMClient(GEN_MODEL)

        # Try to load a custom generator prompt; fall back to a default
        project_root = Path(__file__).resolve().parents[1]
        prompt_path = project_root / "prompts" / "ragbench_generator_prompt.txt"

        if prompt_path.exists():
            self.prompt_template = prompt_path.read_text(encoding="utf-8")
        else:
            logger.warning(
                "Generator prompt file %s not found. Using default template.",
                prompt_path,
            )
            self.prompt_template = (
                "You are a helpful assistant in a retrieval-augmented "
                "question-answering system.\n\n"
                "You must answer ONLY using the information in the context "
                "below. If the context does not contain the answer, say you "
                "do not know.\n\n"
                "Context:\n{documents}\n\n"
                "Question: {question}\n\n"
                "Answer:"
            )

        # Hard character limits to stay well under ~6000 tokens
        # 6000 chars ≈ 1500 tokens (roughly), which is safe for your TPM limit.
        self.max_docs_chars = 6000
        self.max_prompt_chars = 9000
        self.max_answer_tokens = 512

    def build_prompt(
        self,
        question: str,
        documents_sentences: List[List[Tuple[str, str]]],
    ) -> str:
        """
        Build and truncate the full LLM prompt string.
        """
        docs_block = format_docs_with_keys(documents_sentences)
        docs_block = _truncate(docs_block, self.max_docs_chars)

        prompt = self.prompt_template.format(
            documents=docs_block,
            question=question,
        )
        prompt = _truncate(prompt, self.max_prompt_chars)
        return prompt

    def generate(
        self,
        question: str,
        documents_sentences: List[List[Tuple[str, str]]],
    ) -> str:
        """
        Generate an answer for a single question + retrieved documents.

        Returns a plain text answer string.
        Never raises LLM errors: instead, returns an explanatory fallback
        answer if the Groq call fails (e.g., 413 / TPM limit).
        """
        prompt = self.build_prompt(question, documents_sentences)

        messages = [
            {
                "role": "system",
                "content": (
                    "You are a retrieval-augmented QA assistant. "
                    "Use only the given context to answer."
                ),
            },
            {"role": "user", "content": prompt},
        ]

        try:
            return self.client.chat(
                messages,
                max_tokens=self.max_answer_tokens,
            )

        except Exception as e:
            # We log the full error for debugging, but return a safe text answer
            msg = str(e)
            logger.exception("Generator LLM call failed: %s", msg)

            # Handle Groq 413 / token limit errors more clearly
            if (
                "rate_limit_exceeded" in msg
                or "tokens per minute" in msg
                or "Request too large" in msg
            ):
                return (
                    "The generator could not answer this question because the "
                    "combined question and retrieved context are too large for "
                    "the current LLM token limit. Please reduce the domain "
                    "size or the number of retrieved documents and run again."
                )

            # Generic fallback for any other LLM failure
            return (
                "The generator failed due to an internal LLM error. "
                "Short description: " + msg[:400]
            )