added window_size setting for ingestion

This commit is contained in:
Robert Hirsch 2024-06-06 21:07:07 +02:00
parent 6af9fb8b42
commit def2b7f71f
No known key found for this signature in database
GPG key ID: A9D9D1205DBED12C
3 changed files with 8 additions and 2 deletions

View file

@ -39,13 +39,14 @@ class IngestService:
docstore=node_store_component.doc_store,
index_store=node_store_component.index_store,
)
node_parser = SentenceWindowNodeParser.from_defaults()
self._settings = settings()
node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)
self.ingest_component = get_ingestion_component(
self.storage_context,
embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model],
settings=settings(),
settings=self._settings,
)
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:

View file

@ -120,6 +120,10 @@ class LLMSettings(BaseModel):
class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "postgres"]
inject_win_size: int = Field(
3,
description="How many sentences on either side to capture, when parsing files",
)
class NodeStoreSettings(BaseModel):

View file

@ -74,6 +74,7 @@ huggingface:
vectorstore:
database: qdrant
inject_win_size: 2
nodestore:
database: simple