feat(llm): adds serveral settings for llamacpp and ollama (#1703)

This commit is contained in:
icsy7867 2024-03-11 17:51:05 -04:00 committed by GitHub
parent 410bf7a71f
commit 02dc83e8e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 91 additions and 8 deletions

View file

@ -98,6 +98,10 @@ class LLMSettings(BaseModel):
"like `HuggingFaceH4/zephyr-7b-beta`. If not set, will load a tokenizer matching "
"gpt-3.5-turbo LLM.",
)
temperature: float = Field(
0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
)
class VectorstoreSettings(BaseModel):
@ -119,6 +123,23 @@ class LlamaCPPSettings(BaseModel):
),
)
tfs_z: float = Field(
1.0,
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
)
top_k: int = Field(
40,
description="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)",
)
top_p: float = Field(
0.9,
description="Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)",
)
repeat_penalty: float = Field(
1.1,
description="Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
)
class HuggingFaceSettings(BaseModel):
embedding_hf_model_name: str = Field(
@ -184,6 +205,30 @@ class OllamaSettings(BaseModel):
None,
description="Model to use. Example: 'nomic-embed-text'.",
)
tfs_z: float = Field(
1.0,
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
)
num_predict: int = Field(
None,
description="Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)",
)
top_k: int = Field(
40,
description="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)",
)
top_p: float = Field(
0.9,
description="Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)",
)
repeat_last_n: int = Field(
64,
description="Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
)
repeat_penalty: float = Field(
1.1,
description="Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
)
class UISettings(BaseModel):