mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 10:45:42 +01:00
Optimize load_documents function with multiprocessing
This commit is contained in:
parent
ad64589c8f
commit
81b221bccb
1 changed files with 4 additions and 1 deletions
|
|
@ -2,6 +2,7 @@ import os
|
||||||
import glob
|
import glob
|
||||||
from typing import List
|
from typing import List
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from multiprocessing import Pool
|
||||||
|
|
||||||
from langchain.document_loaders import (
|
from langchain.document_loaders import (
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
|
|
@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
|
||||||
all_files.extend(
|
all_files.extend(
|
||||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||||
)
|
)
|
||||||
return [load_single_document(file_path) for file_path in all_files]
|
with Pool(processes=os.cpu_count()) as pool:
|
||||||
|
documents = pool.map(load_single_document, all_files)
|
||||||
|
return documents
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue