mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 13:55:41 +01:00
Sanitize null bytes before ingestion
This commit is contained in:
parent
77461b96cf
commit
43a9dbe21b
1 changed files with 6 additions and 1 deletions
|
|
@ -92,7 +92,12 @@ class IngestionHelper:
|
||||||
return string_reader.load_data([file_data.read_text()])
|
return string_reader.load_data([file_data.read_text()])
|
||||||
|
|
||||||
logger.debug("Specific reader found for extension=%s", extension)
|
logger.debug("Specific reader found for extension=%s", extension)
|
||||||
return reader_cls().load_data(file_data)
|
documents = reader_cls().load_data(file_data)
|
||||||
|
|
||||||
|
for i in range(len(documents)):
|
||||||
|
documents[i].text = documents[i].text.replace("\u0000", "")
|
||||||
|
|
||||||
|
return documents
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _exclude_metadata(documents: list[Document]) -> None:
|
def _exclude_metadata(documents: list[Document]) -> None:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue