This commit is contained in:
Nathan Lenas 2024-11-28 15:02:58 +01:00 committed by GitHub
commit 494bb9eea9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 136 additions and 24 deletions

View file

@ -1,5 +1,6 @@
import logging
from pathlib import Path
from typing import Any
from llama_index.core.readers import StringIterableReader
from llama_index.core.readers.base import BaseReader
@ -69,11 +70,13 @@ class IngestionHelper:
@staticmethod
def transform_file_into_documents(
file_name: str, file_data: Path
file_name: str, file_data: Path, file_metadata: dict[str, Any] | None = None
) -> list[Document]:
documents = IngestionHelper._load_file_to_documents(file_name, file_data)
for document in documents:
document.metadata.update(file_metadata or {})
document.metadata["file_name"] = file_name
IngestionHelper._exclude_metadata(documents)
return documents