mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 07:40:12 +01:00
feat: update llama-index + dependencies (#2092)
Some checks failed
release-please / release-please (push) Has been cancelled
tests / setup (push) Has been cancelled
tests / ${{ matrix.quality-command }} (black) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (mypy) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (ruff) (push) Has been cancelled
tests / test (push) Has been cancelled
tests / all_checks_passed (push) Has been cancelled
Some checks failed
release-please / release-please (push) Has been cancelled
tests / setup (push) Has been cancelled
tests / ${{ matrix.quality-command }} (black) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (mypy) (push) Has been cancelled
tests / ${{ matrix.quality-command }} (ruff) (push) Has been cancelled
tests / test (push) Has been cancelled
tests / all_checks_passed (push) Has been cancelled
* chore: update libraries * fix: mypy * chore: more updates * fix: mypy/black * chore: fix docker warnings * fix: mypy * fix: black
This commit is contained in:
parent
5fbb402477
commit
5851b02378
16 changed files with 2773 additions and 2420 deletions
|
|
@ -403,7 +403,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
|
|||
self.transformations,
|
||||
show_progress=self.show_progress,
|
||||
)
|
||||
self.node_q.put(("process", file_name, documents, nodes))
|
||||
self.node_q.put(("process", file_name, documents, list(nodes)))
|
||||
finally:
|
||||
self.doc_semaphore.release()
|
||||
self.doc_q.task_done() # unblock Q joins
|
||||
|
|
|
|||
|
|
@ -120,7 +120,6 @@ class LLMComponent:
|
|||
api_version="",
|
||||
temperature=settings.llm.temperature,
|
||||
context_window=settings.llm.context_window,
|
||||
max_new_tokens=settings.llm.max_new_tokens,
|
||||
messages_to_prompt=prompt_style.messages_to_prompt,
|
||||
completion_to_prompt=prompt_style.completion_to_prompt,
|
||||
tokenizer=settings.llm.tokenizer,
|
||||
|
|
@ -184,10 +183,10 @@ class LLMComponent:
|
|||
|
||||
return wrapper
|
||||
|
||||
Ollama.chat = add_keep_alive(Ollama.chat)
|
||||
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
|
||||
Ollama.complete = add_keep_alive(Ollama.complete)
|
||||
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
|
||||
Ollama.chat = add_keep_alive(Ollama.chat) # type: ignore
|
||||
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) # type: ignore
|
||||
Ollama.complete = add_keep_alive(Ollama.complete) # type: ignore
|
||||
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) # type: ignore
|
||||
|
||||
self.llm = llm
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,8 @@ class AbstractPromptStyle(abc.ABC):
|
|||
logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
|
||||
return prompt
|
||||
|
||||
def completion_to_prompt(self, completion: str) -> str:
|
||||
def completion_to_prompt(self, prompt: str) -> str:
|
||||
completion = prompt # Fix: Llama-index parameter has to be named as prompt
|
||||
prompt = self._completion_to_prompt(completion)
|
||||
logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
|
||||
return prompt
|
||||
|
|
@ -285,8 +286,9 @@ class ChatMLPromptStyle(AbstractPromptStyle):
|
|||
|
||||
|
||||
def get_prompt_style(
|
||||
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
|
||||
| None
|
||||
prompt_style: (
|
||||
Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
|
||||
)
|
||||
) -> AbstractPromptStyle:
|
||||
"""Get the prompt style to use from the given string.
|
||||
|
||||
|
|
|
|||
|
|
@ -38,10 +38,10 @@ class NodeStoreComponent:
|
|||
|
||||
case "postgres":
|
||||
try:
|
||||
from llama_index.core.storage.docstore.postgres_docstore import (
|
||||
from llama_index.storage.docstore.postgres import ( # type: ignore
|
||||
PostgresDocumentStore,
|
||||
)
|
||||
from llama_index.core.storage.index_store.postgres_index_store import (
|
||||
from llama_index.storage.index_store.postgres import ( # type: ignore
|
||||
PostgresIndexStore,
|
||||
)
|
||||
except ImportError:
|
||||
|
|
@ -55,6 +55,7 @@ class NodeStoreComponent:
|
|||
self.index_store = PostgresIndexStore.from_params(
|
||||
**settings.postgres.model_dump(exclude_none=True)
|
||||
)
|
||||
|
||||
self.doc_store = PostgresDocumentStore.from_params(
|
||||
**settings.postgres.model_dump(exclude_none=True)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from collections.abc import Generator, Sequence
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from llama_index.core.schema import BaseNode, MetadataMode
|
||||
from llama_index.core.vector_stores.utils import node_to_metadata_dict
|
||||
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
|
||||
def chunk_list(
|
||||
lst: list[BaseNode], max_chunk_size: int
|
||||
) -> Generator[list[BaseNode], None, None]:
|
||||
lst: Sequence[BaseNode], max_chunk_size: int
|
||||
) -> Generator[Sequence[BaseNode], None, None]:
|
||||
"""Yield successive max_chunk_size-sized chunks from lst.
|
||||
|
||||
Args:
|
||||
|
|
@ -60,7 +63,7 @@ class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
|
|||
)
|
||||
self.chroma_client = chroma_client
|
||||
|
||||
def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
|
||||
def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
|
||||
"""Add nodes to index, batching the insertion to avoid issues.
|
||||
|
||||
Args:
|
||||
|
|
@ -78,8 +81,8 @@ class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
|
|||
|
||||
all_ids = []
|
||||
for node_chunk in node_chunks:
|
||||
embeddings = []
|
||||
metadatas = []
|
||||
embeddings: list[Sequence[float]] = []
|
||||
metadatas: list[Mapping[str, Any]] = []
|
||||
ids = []
|
||||
documents = []
|
||||
for node in node_chunk:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from injector import inject, singleton
|
||||
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
|
||||
|
|
@ -26,6 +27,9 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
|
|||
from private_gpt.server.chunks.chunks_service import Chunk
|
||||
from private_gpt.settings.settings import Settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.core.postprocessor.types import BaseNodePostprocessor
|
||||
|
||||
|
||||
class Completion(BaseModel):
|
||||
response: str
|
||||
|
|
@ -114,12 +118,15 @@ class ChatService:
|
|||
context_filter=context_filter,
|
||||
similarity_top_k=self.settings.rag.similarity_top_k,
|
||||
)
|
||||
node_postprocessors = [
|
||||
node_postprocessors: list[BaseNodePostprocessor] = [
|
||||
MetadataReplacementPostProcessor(target_metadata_key="window"),
|
||||
SimilarityPostprocessor(
|
||||
similarity_cutoff=settings.rag.similarity_value
|
||||
),
|
||||
]
|
||||
if settings.rag.similarity_value:
|
||||
node_postprocessors.append(
|
||||
SimilarityPostprocessor(
|
||||
similarity_cutoff=settings.rag.similarity_value
|
||||
)
|
||||
)
|
||||
|
||||
if settings.rag.rerank.enabled:
|
||||
rerank_postprocessor = SentenceTransformerRerank(
|
||||
|
|
|
|||
|
|
@ -90,9 +90,9 @@ class SummarizeService:
|
|||
# Add context documents to summarize
|
||||
if use_context:
|
||||
# 1. Recover all ref docs
|
||||
ref_docs: dict[
|
||||
str, RefDocInfo
|
||||
] | None = self.storage_context.docstore.get_all_ref_doc_info()
|
||||
ref_docs: dict[str, RefDocInfo] | None = (
|
||||
self.storage_context.docstore.get_all_ref_doc_info()
|
||||
)
|
||||
if ref_docs is None:
|
||||
raise ValueError("No documents have been ingested yet.")
|
||||
|
||||
|
|
|
|||
|
|
@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
|
|||
0.1,
|
||||
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
|
||||
)
|
||||
prompt_style: Literal[
|
||||
"default", "llama2", "llama3", "tag", "mistral", "chatml"
|
||||
] = Field(
|
||||
"llama2",
|
||||
description=(
|
||||
"The prompt style to use for the chat engine. "
|
||||
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
|
||||
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
|
||||
"If `llama3` - use the llama3 prompt style from the llama_index."
|
||||
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
|
||||
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
|
||||
"`llama2` is the historic behaviour. `default` might work better with your custom models."
|
||||
),
|
||||
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
|
||||
Field(
|
||||
"llama2",
|
||||
description=(
|
||||
"The prompt style to use for the chat engine. "
|
||||
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
|
||||
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
|
||||
"If `llama3` - use the llama3 prompt style from the llama_index."
|
||||
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
|
||||
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
|
||||
"`llama2` is the historic behaviour. `default` might work better with your custom models."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
"""This file should be imported if and only if you want to run the UI locally."""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import time
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue