feat: Upgrade to LlamaIndex to 0.10 (#1663)

* Extract optional dependencies * Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity * Support Ollama embeddings * Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine * Fix vector retriever filters
2025-12-22 20:12:55 +01:00 · 2024-03-06 17:51:30 +01:00 · 2024-03-06 17:51:30 +01:00 · 45f05711eb
commit 45f05711eb
parent 12f3a39e8a
43 changed files with 1474 additions and 1396 deletions
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -81,7 +81,7 @@ class DataSettings(BaseModel):


 class LLMSettings(BaseModel):
-    mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"]
+    mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"]
    max_new_tokens: int = Field(
        256,
        description="The maximum number of token that the LLM is authorized to generate in one completion.",
@ -104,12 +104,9 @@ class VectorstoreSettings(BaseModel):
    database: Literal["chroma", "qdrant", "pgvector"]


-class LocalSettings(BaseModel):
+class LlamaCPPSettings(BaseModel):
    llm_hf_repo_id: str
    llm_hf_model_file: str
-    embedding_hf_model_name: str = Field(
-        description="Name of the HuggingFace model to use for embeddings"
-    )
    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
        "llama2",
        description=(
@ -123,8 +120,14 @@ class LocalSettings(BaseModel):
    )


+class HuggingFaceSettings(BaseModel):
+    embedding_hf_model_name: str = Field(
+        description="Name of the HuggingFace model to use for embeddings"
+    )
+
+
 class EmbeddingSettings(BaseModel):
-    mode: Literal["local", "openai", "sagemaker", "mock"]
+    mode: Literal["huggingface", "openai", "sagemaker", "ollama", "mock"]
    ingest_mode: Literal["simple", "batch", "parallel"] = Field(
        "simple",
        description=(
@ -173,10 +176,14 @@ class OllamaSettings(BaseModel):
        "http://localhost:11434",
        description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
    )
-    model: str = Field(
+    llm_model: str = Field(
        None,
        description="Model to use. Example: 'llama2-uncensored'.",
    )
+    embedding_model: str = Field(
+        None,
+        description="Model to use. Example: 'nomic-embed-text'.",
+    )


 class UISettings(BaseModel):
@ -292,7 +299,8 @@ class Settings(BaseModel):
    ui: UISettings
    llm: LLMSettings
    embedding: EmbeddingSettings
-    local: LocalSettings
+    llamacpp: LlamaCPPSettings
+    huggingface: HuggingFaceSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
    ollama: OllamaSettings