mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 10:45:42 +01:00
Fix the parallel ingestion mode, and make it available through conf (#1336)
* Fix the parallel ingestion mode, and make it available through conf Also updated the documentation to show how to configure the ingest mode. * PR feedback: redirect to documentation
This commit is contained in:
parent
b7ca7d35a0
commit
56af625d71
5 changed files with 140 additions and 45 deletions
|
|
@ -121,6 +121,30 @@ class LocalSettings(BaseModel):
|
|||
|
||||
class EmbeddingSettings(BaseModel):
|
||||
mode: Literal["local", "openai", "sagemaker", "mock"]
|
||||
ingest_mode: Literal["simple", "batch", "parallel"] = Field(
|
||||
"simple",
|
||||
description=(
|
||||
"The ingest mode to use for the embedding engine:\n"
|
||||
"If `simple` - ingest files sequentially and one by one. It is the historic behaviour.\n"
|
||||
"If `batch` - if multiple files, parse all the files in parallel, "
|
||||
"and send them in batch to the embedding model.\n"
|
||||
"If `parallel` - parse the files in parallel using multiple cores, and embedd them in parallel.\n"
|
||||
"`parallel` is the fastest mode for local setup, as it parallelize IO RW in the index.\n"
|
||||
"For modes that leverage parallelization, you can specify the number of "
|
||||
"workers to use with `count_workers`.\n"
|
||||
),
|
||||
)
|
||||
count_workers: int = Field(
|
||||
2,
|
||||
description=(
|
||||
"The number of workers to use for file ingestion.\n"
|
||||
"In `batch` mode, this is the number of workers used to parse the files.\n"
|
||||
"In `parallel` mode, this is the number of workers used to parse the files and embed them.\n"
|
||||
"This is only used if `ingest_mode` is not `simple`.\n"
|
||||
"Do not go too high with this number, as it might cause memory issues. (especially in `parallel` mode)\n"
|
||||
"Do not set it higher than your number of threads of your CPU."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class SagemakerSettings(BaseModel):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue