mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 17:05:41 +01:00
Support for Nvidia TensorRT
This commit is contained in:
parent
c3fe36e070
commit
a7b18058b5
7 changed files with 141 additions and 8 deletions
|
|
@ -81,7 +81,7 @@ class DataSettings(BaseModel):
|
|||
|
||||
|
||||
class LLMSettings(BaseModel):
|
||||
mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"]
|
||||
mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama", "tensorrt"]
|
||||
max_new_tokens: int = Field(
|
||||
256,
|
||||
description="The maximum number of token that the LLM is authorized to generate in one completion.",
|
||||
|
|
@ -120,6 +120,22 @@ class LlamaCPPSettings(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class TensorRTSettings(BaseModel):
|
||||
model_path: str
|
||||
engine_name: str
|
||||
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
|
||||
"llama2",
|
||||
description=(
|
||||
"The prompt style to use for the chat engine. "
|
||||
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
|
||||
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
|
||||
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
|
||||
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
|
||||
"`llama2` is the historic behaviour. `default` might work better with your custom models."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class HuggingFaceSettings(BaseModel):
|
||||
embedding_hf_model_name: str = Field(
|
||||
description="Name of the HuggingFace model to use for embeddings"
|
||||
|
|
@ -296,6 +312,7 @@ class Settings(BaseModel):
|
|||
llm: LLMSettings
|
||||
embedding: EmbeddingSettings
|
||||
llamacpp: LlamaCPPSettings
|
||||
tensorrt: TensorRTSettings
|
||||
huggingface: HuggingFaceSettings
|
||||
sagemaker: SagemakerSettings
|
||||
openai: OpenAISettings
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue