Update app.py
Browse files
app.py
CHANGED
|
@@ -498,7 +498,7 @@ def generate_text (prompt, chatbot, history, rag_option, model_option, openai_ap
|
|
| 498 |
print("HF Anfrage.......................")
|
| 499 |
model_kwargs={"temperature": 0.5, "max_length": 512, "num_return_sequences": 1, "top_k": top_k, "top_p": top_p, "repetition_penalty": repetition_penalty}
|
| 500 |
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs=model_kwargs)
|
| 501 |
-
|
| 502 |
#llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
|
| 503 |
#llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
|
| 504 |
#llm via HuggingChat
|
|
@@ -522,9 +522,10 @@ def generate_text (prompt, chatbot, history, rag_option, model_option, openai_ap
|
|
| 522 |
else:
|
| 523 |
#splittet = False
|
| 524 |
print("LLM aufrufen ohne RAG: ...........")
|
| 525 |
-
|
| 526 |
-
|
| 527 |
#Alternativ mit API_URL - aber das model braucht 93 B Space!!!
|
|
|
|
| 528 |
data = {
|
| 529 |
"inputs": prompt,
|
| 530 |
"parameters": {"temperature": 0.2, "max_length": 64},
|
|
@@ -538,7 +539,7 @@ def generate_text (prompt, chatbot, history, rag_option, model_option, openai_ap
|
|
| 538 |
print("Fehler:", response.text)
|
| 539 |
result = response.json()
|
| 540 |
|
| 541 |
-
|
| 542 |
chatbot_response = result[0]['generated_text']
|
| 543 |
print("anzahl tokens gesamt antwort:------------------")
|
| 544 |
print (len(chatbot_response.split()))
|
|
|
|
| 498 |
print("HF Anfrage.......................")
|
| 499 |
model_kwargs={"temperature": 0.5, "max_length": 512, "num_return_sequences": 1, "top_k": top_k, "top_p": top_p, "repetition_penalty": repetition_penalty}
|
| 500 |
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs=model_kwargs)
|
| 501 |
+
llm = HuggingFaceChain(model=MODEL_NAME_HF, model_kwargs={"temperature": 0.5, "max_length": 128})
|
| 502 |
#llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
|
| 503 |
#llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
|
| 504 |
#llm via HuggingChat
|
|
|
|
| 522 |
else:
|
| 523 |
#splittet = False
|
| 524 |
print("LLM aufrufen ohne RAG: ...........")
|
| 525 |
+
resulti = llm_chain(llm, history_text_und_prompt)
|
| 526 |
+
result = resulti.strip()
|
| 527 |
#Alternativ mit API_URL - aber das model braucht 93 B Space!!!
|
| 528 |
+
"""
|
| 529 |
data = {
|
| 530 |
"inputs": prompt,
|
| 531 |
"parameters": {"temperature": 0.2, "max_length": 64},
|
|
|
|
| 539 |
print("Fehler:", response.text)
|
| 540 |
result = response.json()
|
| 541 |
|
| 542 |
+
|
| 543 |
chatbot_response = result[0]['generated_text']
|
| 544 |
print("anzahl tokens gesamt antwort:------------------")
|
| 545 |
print (len(chatbot_response.split()))
|