File size: 2,272 Bytes
1e23279
16c2a22
 
1e23279
 
16c2a22
 
9db586c
16c2a22
 
1e23279
16c2a22
 
1e23279
 
 
 
 
 
16c2a22
 
1e23279
16c2a22
 
 
 
 
 
 
1e23279
 
 
16c2a22
1e23279
16c2a22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e23279
16c2a22
83ffe61
 
 
16c2a22
 
 
1e23279
 
16c2a22
 
1e23279
 
 
 
 
16c2a22
67befa7
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""Application-wide constants and configuration."""

import os
from typing import Final, List


# Model configuration
MODEL_NAME: Final[str] = "DragonLLM/Qwen-Open-Finance-R-8B"

# Cache directory - respect HF_HOME if set, otherwise use default
CACHE_DIR: Final[str] = os.getenv("HF_HOME", "/tmp/huggingface")

# Hugging Face token environment variable priority order
HF_TOKEN_VARS: Final[List[str]] = [
    "HF_TOKEN_LC2",
    "HF_TOKEN_LC",
    "HF_TOKEN",
    "HUGGING_FACE_HUB_TOKEN"
]

# French language detection patterns
FRENCH_PHRASES: Final[List[str]] = [
    "en français",
    "répondez en français",
    "réponse française",
    "répondez uniquement en français",
    "expliquez en français",
]

FRENCH_CHARS: Final[List[str]] = [
    "é", "è", "ê", "à", "ç", "ù", "ô", "î", "â", "û", "ë", "ï"
]

FRENCH_PATTERNS: Final[List[str]] = [
    "qu'est-ce",
    "qu'est",
    "expliquez",
    "comment",
    "pourquoi",
    "combien",
    "quel",
    "quelle",
    "quels",
    "quelles",
    "où",
    "quand",
    "définissez",
]

FRENCH_SYSTEM_PROMPT: Final[str] = (
    "Vous êtes un assistant financier expert. "
    "Répondez TOUJOURS en français. "
    "Soyez concis et précis dans vos explications. "
    "Fournissez des réponses claires et complètes sans développements excessifs."
)

# Qwen3 EOS tokens
EOS_TOKENS: Final[List[int]] = [151645, 151643]  # [<|im_end|>, <|endoftext|>]
PAD_TOKEN_ID: Final[int] = 151643  # <|endoftext|>

# Generation defaults
DEFAULT_MAX_TOKENS: Final[int] = 1000  # Increased for complete answers with concise reasoning
DEFAULT_TEMPERATURE: Final[float] = 0.7
DEFAULT_TOP_P: Final[float] = 1.0
DEFAULT_TOP_K: Final[int] = 20
REPETITION_PENALTY: Final[float] = 1.05

# Model initialization constants
MODEL_INIT_TIMEOUT_SECONDS = 300  # 5 minutes timeout for model initialization
MODEL_INIT_WAIT_INTERVAL_SECONDS = 1  # Check interval while waiting for initialization

# Rate limiting constants (for demo/single user)
RATE_LIMIT_REQUESTS_PER_MINUTE = 30  # 30 requests per minute (generous for single user)
RATE_LIMIT_REQUESTS_PER_HOUR = 500  # 500 requests per hour

# Confidence calculation constants
MIN_ANSWER_LENGTH_FOR_HIGH_CONFIDENCE = 50  # Minimum answer length for high confidence score