feat: Qdrant support (#1228)

* feat: Qdrant support * Update private_gpt/components/vector_store/vector_store_component.py
2025-12-22 10:45:42 +01:00 · 2023-11-14 01:53:26 +05:30 · 2023-11-14 01:53:26 +05:30 · 03d1ae6d70
commit 03d1ae6d70
parent 86fc4781d8
6 changed files with 320 additions and 18 deletions
--- a/private_gpt/components/vector_store/vector_store_component.py
+++ b/private_gpt/components/vector_store/vector_store_component.py
@ -1,7 +1,8 @@
+import logging
 import typing

 import chromadb
-from chromadb.config import Settings
+from chromadb.config import Settings as ChromaSettings
 from injector import inject, singleton
 from llama_index import VectorStoreIndex
 from llama_index.indices.vector_store import VectorIndexRetriever
@ -10,6 +11,9 @@ from llama_index.vector_stores.types import VectorStore
 from private_gpt.components.vector_store.batched_chroma import BatchedChromaVectorStore
 from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.paths import local_data_path
+from private_gpt.settings.settings import Settings
+
+logger = logging.getLogger(__name__)


@typing.no_type_check
@ -36,22 +40,58 @@ class VectorStoreComponent:
    vector_store: VectorStore

    @inject
-    def __init__(self) -> None:
-        chroma_settings = Settings(anonymized_telemetry=False)
-        chroma_client = chromadb.PersistentClient(
-            path=str((local_data_path / "chroma_db").absolute()),
-            settings=chroma_settings,
-        )
-        chroma_collection = chroma_client.get_or_create_collection(
-            "make_this_parameterizable_per_api_call"
-        )  # TODO
+    def __init__(self, settings: Settings) -> None:
+        match settings.vectorstore.database:
+            case "chroma":
+                chroma_settings = ChromaSettings(anonymized_telemetry=False)
+                chroma_client = chromadb.PersistentClient(
+                    path=str((local_data_path / "chroma_db").absolute()),
+                    settings=chroma_settings,
+                )
+                chroma_collection = chroma_client.get_or_create_collection(
+                    "make_this_parameterizable_per_api_call"
+                )  # TODO

-        self.vector_store = typing.cast(
-            VectorStore,
-            BatchedChromaVectorStore(
-                chroma_client=chroma_client, chroma_collection=chroma_collection
-            ),
-        )
+                self.vector_store = typing.cast(
+                    VectorStore,
+                    BatchedChromaVectorStore(
+                        chroma_client=chroma_client, chroma_collection=chroma_collection
+                    ),
+                )
+
+            case "qdrant":
+                try:
+                    from llama_index.vector_stores.qdrant import QdrantVectorStore
+                    from qdrant_client import QdrantClient  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "'qdrant_client' is not installed."
+                        "To use PrivateGPT with Qdrant, install the 'qdrant' extra."
+                        "`poetry install --extras qdrant`"
+                    ) from e
+                if settings.qdrant is None:
+                    logger.info(
+                        "Qdrant config not found. Using default settings."
+                        "Trying to connect to Qdrant at localhost:6333."
+                    )
+                    client = QdrantClient()
+                else:
+                    client = QdrantClient(
+                        **settings.qdrant.model_dump(exclude_none=True)
+                    )
+                self.vector_store = typing.cast(
+                    VectorStore,
+                    QdrantVectorStore(
+                        client=client,
+                        collection_name="make_this_parameterizable_per_api_call",
+                    ),  # TODO
+                )
+            case _:
+                # Should be unreachable
+                # The settings validator should have caught this
+                raise ValueError(
+                    f"Vectorstore database {settings.vectorstore.database} not supported"
+                )

    @staticmethod
    def get_retriever(
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -84,6 +84,10 @@ class LLMSettings(BaseModel):
    mode: Literal["local", "openai", "sagemaker", "mock"]


+class VectorstoreSettings(BaseModel):
+    database: Literal["chroma", "qdrant"]
+
+
 class LocalSettings(BaseModel):
    llm_hf_repo_id: str
    llm_hf_model_file: str
@ -104,6 +108,53 @@ class UISettings(BaseModel):
    path: str


+class QdrantSettings(BaseModel):
+    location: str | None = Field(
+        None,
+        description=(
+            "If `:memory:` - use in-memory Qdrant instance.\n"
+            "If `str` - use it as a `url` parameter.\n"
+        ),
+    )
+    url: str | None = Field(
+        None,
+        description=(
+            "Either host or str of 'Optional[scheme], host, Optional[port], Optional[prefix]'."
+        ),
+    )
+    port: int | None = Field(6333, description="Port of the REST API interface.")
+    grpc_port: int | None = Field(6334, description="Port of the gRPC interface.")
+    prefer_grpc: bool | None = Field(
+        False,
+        description="If `true` - use gRPC interface whenever possible in custom methods.",
+    )
+    https: bool | None = Field(
+        None,
+        description="If `true` - use HTTPS(SSL) protocol.",
+    )
+    api_key: str | None = Field(
+        None,
+        description="API key for authentication in Qdrant Cloud.",
+    )
+    prefix: str | None = Field(
+        None,
+        description=(
+            "Prefix to add to the REST URL path."
+            "Example: `service/v1` will result in "
+            "'http://localhost:6333/service/v1/{qdrant-endpoint}' for REST API."
+        ),
+    )
+    timeout: float | None = Field(
+        None,
+        description="Timeout for REST and gRPC API requests.",
+    )
+    host: str | None = Field(
+        None,
+        description="Host name of Qdrant service. If url and host are None, set to 'localhost'.",
+    )
+    path: str | None = Field(None, description="Persistence path for QdrantLocal.")
+
+
 class Settings(BaseModel):
    server: ServerSettings
    data: DataSettings
@ -112,6 +163,8 @@ class Settings(BaseModel):
    local: LocalSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
+    vectorstore: VectorstoreSettings
+    qdrant: QdrantSettings | None = None


 """