Support for Nvidia TensorRT

2025-12-22 17:05:41 +01:00 · 2024-02-29 19:41:58 +01:00 · 2024-02-29 19:41:58 +01:00 · a7b18058b5
commit a7b18058b5
parent c3fe36e070
7 changed files with 141 additions and 8 deletions
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -81,7 +81,7 @@ class DataSettings(BaseModel):


 class LLMSettings(BaseModel):
-    mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"]
+    mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama", "tensorrt"]
    max_new_tokens: int = Field(
        256,
        description="The maximum number of token that the LLM is authorized to generate in one completion.",
@ -120,6 +120,22 @@ class LlamaCPPSettings(BaseModel):
    )


+class TensorRTSettings(BaseModel):
+    model_path: str
+    engine_name: str
+    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
+        "llama2",
+        description=(
+            "The prompt style to use for the chat engine. "
+            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+            "`llama2` is the historic behaviour. `default` might work better with your custom models."
+        ),
+    )
+
+
 class HuggingFaceSettings(BaseModel):
    embedding_hf_model_name: str = Field(
        description="Name of the HuggingFace model to use for embeddings"
@ -296,6 +312,7 @@ class Settings(BaseModel):
    llm: LLMSettings
    embedding: EmbeddingSettings
    llamacpp: LlamaCPPSettings
+    tensorrt: TensorRTSettings
    huggingface: HuggingFaceSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings