Added routes for pdf ocr

This commit is contained in:
Saurab-Shrestha 2024-02-15 17:39:07 +05:45
parent 91ebce47d4
commit d849ee76f4
9 changed files with 1519 additions and 541 deletions

View file

@ -178,6 +178,60 @@ def ingest_file(
)) -> IngestResponse:
"""Ingests and processes a file, storing its chunks to be used as context."""
service = request.state.injector.get(IngestService)
print("-------------------------------------->",file)
try:
file_ingested = crud.documents.get_by_filename(db, file_name=file.filename)
if file_ingested:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="File already exists. Choose a different file.",
)
if file.filename is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No file name provided",
)
try:
docs_in = schemas.DocumentCreate(filename=file.filename, uploaded_by=current_user.id)
crud.documents.create(db=db, obj_in=docs_in)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Unable to upload file.",
)
upload_path = Path(f"{UPLOAD_DIR}/{file.filename}")
with open(upload_path, "wb") as f:
f.write(file.file.read())
with open(upload_path, "rb") as f:
ingested_documents = service.ingest_bin_data(file.filename, f)
logger.info(f"{file.filename} is uploaded by the {current_user.fullname}.")
return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
except HTTPException:
raise
except Exception as e:
logger.error(f"There was an error uploading the file(s): {str(e)}")
print("ERROR: ", e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal Server Error: Unable to ingest file.",
)
def ingest_pdf_file(
request: Request,
db: Session = Depends(deps.get_db),
file: UploadFile = File(...),
) -> IngestResponse:
"""Ingests and processes a file, storing its chunks to be used as context."""
service = request.state.injector.get(IngestService)
try:
file_ingested = crud.documents.get_by_filename(db, file_name=file.filename)