mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 07:40:12 +01:00
Ingestion Speedup Multiple strategy (#1309)
This commit is contained in:
parent
546ba33e6f
commit
bafdd3baf1
13 changed files with 515 additions and 195 deletions
32
private_gpt/server/ingest/model.py
Normal file
32
private_gpt/server/ingest/model.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
from typing import Any, Literal
|
||||
|
||||
from llama_index import Document
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class IngestedDoc(BaseModel):
|
||||
object: Literal["ingest.document"]
|
||||
doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"])
|
||||
doc_metadata: dict[str, Any] | None = Field(
|
||||
examples=[
|
||||
{
|
||||
"page_label": "2",
|
||||
"file_name": "Sales Report Q3 2023.pdf",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Remove unwanted metadata keys."""
|
||||
for key in ["doc_id", "window", "original_text"]:
|
||||
metadata.pop(key, None)
|
||||
return metadata
|
||||
|
||||
@staticmethod
|
||||
def from_document(document: Document) -> "IngestedDoc":
|
||||
return IngestedDoc(
|
||||
object="ingest.document",
|
||||
doc_id=document.doc_id,
|
||||
doc_metadata=IngestedDoc.curate_metadata(document.metadata),
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue