Spaces:
Sleeping
Sleeping
| import logging | |
| from typing import Union, Optional | |
| from fastapi import FastAPI | |
| from llama_cpp import Llama | |
| app = FastAPI() | |
| CHAT_TEMPLATE = '<|system|> {system_prompt}<|end|><|user|> {prompt}<|end|><|assistant|>'.strip() | |
| SYSTEM_PROMPT = '' | |
| logging.basicConfig( | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| level=logging.INFO | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logger.info("Запускаемся... 🥳🥳🥳") | |
| REPO_ID = "Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF" | |
| FILE_NAME = "Vikhr-Qwen-2.5-1.5b-Instruct-Q8_0.gguf", | |
| # Инициализация модели | |
| try: | |
| logger.info(f"Загрузка модели {FILE_NAME}...") | |
| # загрузка модели для локального хранилища | |
| # llm = Llama( | |
| # model_path=f"./models/{model_name}.gguf", | |
| # verbose=False, | |
| # n_gpu_layers=-1, | |
| # n_ctx=1512, | |
| # temperature=0.3, | |
| # num_return_sequences=1, | |
| # no_repeat_ngram_size=2, | |
| # top_k=50, | |
| # top_p=0.95, | |
| # ) | |
| # if not llm: | |
| LLM = Llama.from_pretrained( | |
| repo_id=REPO_ID, | |
| filename='Vikhr-Qwen-2.5-1.5b-Instruct-Q8_0.gguf', | |
| n_gpu_layers=-1, | |
| n_threads=2, | |
| n_ctx=4096, | |
| temperature=0.3, | |
| num_return_sequences=1, | |
| no_repeat_ngram_size=2, | |
| top_k=50, | |
| top_p=0.95, | |
| ) | |
| except Exception as e: | |
| logger.error(f"Ошибка загрузки модели: {str(e)}") | |
| raise | |
| # составление промта для модели | |
| def create_prompt(text: str) -> Union[str, None]: | |
| try: | |
| user_input = text | |
| logger.info(f"Получено сообщение: {user_input}") | |
| # Генерация шаблона | |
| return CHAT_TEMPLATE.format( | |
| system_prompt=SYSTEM_PROMPT or 'Ответ должен быть точным, кратким и с юмором.', | |
| prompt=user_input, | |
| ) | |
| except Exception as e: | |
| logger.error(e) | |
| def generate_response(prompt: str) -> Optional[str]: | |
| try: | |
| # Обработка текстового сообщения | |
| output = LLM( | |
| prompt, | |
| max_tokens=64, | |
| stop=["<|end|>"], | |
| ) | |
| logger.info('Output:') | |
| logger.info(output) | |
| response = output['choices'][0]['text'] | |
| # Отправка ответа | |
| if response: | |
| return response | |
| return 'Произошла ошибка при обработке запроса' | |
| except Exception as e: | |
| logger.error(f"Ошибка обработки сообщения: {str(e)}") | |
| def greet_json(): | |
| return {"Hello": "World!"} | |
| async def set_system_prompt(text: str): | |
| logger.info('post/system-prompt') | |
| global SYSTEM_PROMPT | |
| SYSTEM_PROMPT = text | |
| async def predict(text: str): | |
| # Генерация ответа с помощью модели | |
| logger.info('post/predict') | |
| prompt = create_prompt(text) | |
| response = generate_response(prompt) | |
| return {"response": response} |