Update app.py
Browse files
app.py
CHANGED
|
@@ -428,7 +428,7 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
|
|
| 428 |
state.llm_model = LLM(
|
| 429 |
model=model_path,
|
| 430 |
tensor_parallel_size=tp_size,
|
| 431 |
-
gpu_memory_utilization=0.
|
| 432 |
max_model_len=15000,
|
| 433 |
)
|
| 434 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|
|
|
|
| 428 |
state.llm_model = LLM(
|
| 429 |
model=model_path,
|
| 430 |
tensor_parallel_size=tp_size,
|
| 431 |
+
gpu_memory_utilization=0.80,
|
| 432 |
max_model_len=15000,
|
| 433 |
)
|
| 434 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|