mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 17:05:41 +01:00
added window_size setting for ingestion
This commit is contained in:
parent
6af9fb8b42
commit
def2b7f71f
3 changed files with 8 additions and 2 deletions
|
|
@ -39,13 +39,14 @@ class IngestService:
|
|||
docstore=node_store_component.doc_store,
|
||||
index_store=node_store_component.index_store,
|
||||
)
|
||||
node_parser = SentenceWindowNodeParser.from_defaults()
|
||||
self._settings = settings()
|
||||
node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)
|
||||
|
||||
self.ingest_component = get_ingestion_component(
|
||||
self.storage_context,
|
||||
embed_model=embedding_component.embedding_model,
|
||||
transformations=[node_parser, embedding_component.embedding_model],
|
||||
settings=settings(),
|
||||
settings=self._settings,
|
||||
)
|
||||
|
||||
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
|
||||
|
|
|
|||
|
|
@ -120,6 +120,10 @@ class LLMSettings(BaseModel):
|
|||
|
||||
class VectorstoreSettings(BaseModel):
|
||||
database: Literal["chroma", "qdrant", "postgres"]
|
||||
inject_win_size: int = Field(
|
||||
3,
|
||||
description="How many sentences on either side to capture, when parsing files",
|
||||
)
|
||||
|
||||
|
||||
class NodeStoreSettings(BaseModel):
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ huggingface:
|
|||
|
||||
vectorstore:
|
||||
database: qdrant
|
||||
inject_win_size: 2
|
||||
|
||||
nodestore:
|
||||
database: simple
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue