feat: update llama-index + dependencies (#2092)

* chore: update libraries * fix: mypy * chore: more updates * fix: mypy/black * chore: fix docker warnings * fix: mypy * fix: black
2025-12-22 07:40:12 +01:00 · 2024-09-26 16:29:52 +02:00 · 2024-09-26 16:29:52 +02:00 · 5851b02378
commit 5851b02378
parent 5fbb402477
16 changed files with 2773 additions and 2420 deletions
--- a/private_gpt/components/ingest/ingest_component.py
+++ b/private_gpt/components/ingest/ingest_component.py
@ -403,7 +403,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
                self.transformations,
                show_progress=self.show_progress,
            )
-            self.node_q.put(("process", file_name, documents, nodes))
+            self.node_q.put(("process", file_name, documents, list(nodes)))
        finally:
            self.doc_semaphore.release()
            self.doc_q.task_done()  # unblock Q joins
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -120,7 +120,6 @@ class LLMComponent:
                    api_version="",
                    temperature=settings.llm.temperature,
                    context_window=settings.llm.context_window,
-                    max_new_tokens=settings.llm.max_new_tokens,
                    messages_to_prompt=prompt_style.messages_to_prompt,
                    completion_to_prompt=prompt_style.completion_to_prompt,
                    tokenizer=settings.llm.tokenizer,
@ -184,10 +183,10 @@ class LLMComponent:

                        return wrapper

-                    Ollama.chat = add_keep_alive(Ollama.chat)
-                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
-                    Ollama.complete = add_keep_alive(Ollama.complete)
-                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
+                    Ollama.chat = add_keep_alive(Ollama.chat)  # type: ignore
+                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)  # type: ignore
+                    Ollama.complete = add_keep_alive(Ollama.complete)  # type: ignore
+                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)  # type: ignore

                self.llm = llm

--- a/private_gpt/components/llm/prompt_helper.py
+++ b/private_gpt/components/llm/prompt_helper.py
@ -40,7 +40,8 @@ class AbstractPromptStyle(abc.ABC):
        logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
        return prompt

-    def completion_to_prompt(self, completion: str) -> str:
+    def completion_to_prompt(self, prompt: str) -> str:
+        completion = prompt  # Fix: Llama-index parameter has to be named as prompt
        prompt = self._completion_to_prompt(completion)
        logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
        return prompt
@ -285,8 +286,9 @@ class ChatMLPromptStyle(AbstractPromptStyle):


 def get_prompt_style(
-    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
-    | None
+    prompt_style: (
+        Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
+    )
 ) -> AbstractPromptStyle:
    """Get the prompt style to use from the given string.

--- a/private_gpt/components/node_store/node_store_component.py
+++ b/private_gpt/components/node_store/node_store_component.py
@ -38,10 +38,10 @@ class NodeStoreComponent:

            case "postgres":
                try:
-                    from llama_index.core.storage.docstore.postgres_docstore import (
+                    from llama_index.storage.docstore.postgres import (  # type: ignore
                        PostgresDocumentStore,
                    )
-                    from llama_index.core.storage.index_store.postgres_index_store import (
+                    from llama_index.storage.index_store.postgres import (  # type: ignore
                        PostgresIndexStore,
                    )
                except ImportError:
@ -55,6 +55,7 @@ class NodeStoreComponent:
                self.index_store = PostgresIndexStore.from_params(
                    **settings.postgres.model_dump(exclude_none=True)
                )
+
                self.doc_store = PostgresDocumentStore.from_params(
                    **settings.postgres.model_dump(exclude_none=True)
                )
--- a/private_gpt/components/vector_store/batched_chroma.py
+++ b/private_gpt/components/vector_store/batched_chroma.py
@ -1,14 +1,17 @@
-from collections.abc import Generator
-from typing import Any
+from collections.abc import Generator, Sequence
+from typing import TYPE_CHECKING, Any

 from llama_index.core.schema import BaseNode, MetadataMode
 from llama_index.core.vector_stores.utils import node_to_metadata_dict
 from llama_index.vector_stores.chroma import ChromaVectorStore  # type: ignore

+if TYPE_CHECKING:
+    from collections.abc import Mapping
+

 def chunk_list(
-    lst: list[BaseNode], max_chunk_size: int
-) -> Generator[list[BaseNode], None, None]:
+    lst: Sequence[BaseNode], max_chunk_size: int
+) -> Generator[Sequence[BaseNode], None, None]:
    """Yield successive max_chunk_size-sized chunks from lst.

    Args:
@ -60,7 +63,7 @@ class BatchedChromaVectorStore(ChromaVectorStore):  # type: ignore
        )
        self.chroma_client = chroma_client

-    def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
+    def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
        """Add nodes to index, batching the insertion to avoid issues.

        Args:
@ -78,8 +81,8 @@ class BatchedChromaVectorStore(ChromaVectorStore):  # type: ignore

        all_ids = []
        for node_chunk in node_chunks:
-            embeddings = []
-            metadatas = []
+            embeddings: list[Sequence[float]] = []
+            metadatas: list[Mapping[str, Any]] = []
            ids = []
            documents = []
            for node in node_chunk:
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from typing import TYPE_CHECKING

 from injector import inject, singleton
 from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
@ -26,6 +27,9 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.server.chunks.chunks_service import Chunk
 from private_gpt.settings.settings import Settings

+if TYPE_CHECKING:
+    from llama_index.core.postprocessor.types import BaseNodePostprocessor
+

 class Completion(BaseModel):
    response: str
@ -114,12 +118,15 @@ class ChatService:
                context_filter=context_filter,
                similarity_top_k=self.settings.rag.similarity_top_k,
            )
-            node_postprocessors = [
+            node_postprocessors: list[BaseNodePostprocessor] = [
                MetadataReplacementPostProcessor(target_metadata_key="window"),
-                SimilarityPostprocessor(
-                    similarity_cutoff=settings.rag.similarity_value
-                ),
            ]
+            if settings.rag.similarity_value:
+                node_postprocessors.append(
+                    SimilarityPostprocessor(
+                        similarity_cutoff=settings.rag.similarity_value
+                    )
+                )

            if settings.rag.rerank.enabled:
                rerank_postprocessor = SentenceTransformerRerank(
--- a/private_gpt/server/recipes/summarize/summarize_service.py
+++ b/private_gpt/server/recipes/summarize/summarize_service.py
@ -90,9 +90,9 @@ class SummarizeService:
        # Add context documents to summarize
        if use_context:
            # 1. Recover all ref docs
-            ref_docs: dict[
-                str, RefDocInfo
-            ] | None = self.storage_context.docstore.get_all_ref_doc_info()
+            ref_docs: dict[str, RefDocInfo] | None = (
+                self.storage_context.docstore.get_all_ref_doc_info()
+            )
            if ref_docs is None:
                raise ValueError("No documents have been ingested yet.")

--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
        0.1,
        description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
    )
-    prompt_style: Literal[
-        "default", "llama2", "llama3", "tag", "mistral", "chatml"
-    ] = Field(
-        "llama2",
-        description=(
-            "The prompt style to use for the chat engine. "
-            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
-            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
-            "If `llama3` - use the llama3 prompt style from the llama_index."
-            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
-            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
-            "`llama2` is the historic behaviour. `default` might work better with your custom models."
-        ),
+    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
+        Field(
+            "llama2",
+            description=(
+                "The prompt style to use for the chat engine. "
+                "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+                "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+                "If `llama3` - use the llama3 prompt style from the llama_index."
+                "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+                "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+                "`llama2` is the historic behaviour. `default` might work better with your custom models."
+            ),
+        )
    )


--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@ -1,4 +1,5 @@
 """This file should be imported if and only if you want to run the UI locally."""
+
 import base64
 import logging
 import time