added window_size setting for ingestion

This commit is contained in:
Robert Hirsch 2024-06-06 21:07:07 +02:00
parent 6af9fb8b42
commit def2b7f71f
No known key found for this signature in database
GPG key ID: A9D9D1205DBED12C
3 changed files with 8 additions and 2 deletions

View file

@ -39,13 +39,14 @@ class IngestService:
docstore=node_store_component.doc_store, docstore=node_store_component.doc_store,
index_store=node_store_component.index_store, index_store=node_store_component.index_store,
) )
node_parser = SentenceWindowNodeParser.from_defaults() self._settings = settings()
node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)
self.ingest_component = get_ingestion_component( self.ingest_component = get_ingestion_component(
self.storage_context, self.storage_context,
embed_model=embedding_component.embedding_model, embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model], transformations=[node_parser, embedding_component.embedding_model],
settings=settings(), settings=self._settings,
) )
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]: def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:

View file

@ -120,6 +120,10 @@ class LLMSettings(BaseModel):
class VectorstoreSettings(BaseModel): class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "postgres"] database: Literal["chroma", "qdrant", "postgres"]
inject_win_size: int = Field(
3,
description="How many sentences on either side to capture, when parsing files",
)
class NodeStoreSettings(BaseModel): class NodeStoreSettings(BaseModel):

View file

@ -74,6 +74,7 @@ huggingface:
vectorstore: vectorstore:
database: qdrant database: qdrant
inject_win_size: 2
nodestore: nodestore:
database: simple database: simple