mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 20:12:55 +01:00
While ingesting, some files led to a crash due to encoding error. Even though utf-8 some characters still messed it up.
This commit is contained in:
parent
757a8c79fd
commit
6af9fb8b42
1 changed files with 8 additions and 2 deletions
|
|
@ -89,10 +89,16 @@ class IngestionHelper:
|
||||||
)
|
)
|
||||||
# Read as a plain text
|
# Read as a plain text
|
||||||
string_reader = StringIterableReader()
|
string_reader = StringIterableReader()
|
||||||
return string_reader.load_data([file_data.read_text()])
|
return string_reader.load_data([file_data.read_text(errors='replace')])
|
||||||
|
|
||||||
logger.debug("Specific reader found for extension=%s", extension)
|
logger.debug("Specific reader found for extension=%s", extension)
|
||||||
return reader_cls().load_data(file_data)
|
try:
|
||||||
|
res = reader_cls().load_data(file_data)
|
||||||
|
except:
|
||||||
|
string_reader = StringIterableReader()
|
||||||
|
res = string_reader.load_data([file_data.read_text(errors='replace')])
|
||||||
|
pass
|
||||||
|
return res
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _exclude_metadata(documents: list[Document]) -> None:
|
def _exclude_metadata(documents: list[Document]) -> None:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue