feat: Upgrade to LlamaIndex to 0.10 (#1663)

* Extract optional dependencies

* Separate local mode into llms-llama-cpp and embeddings-huggingface for clarity

* Support Ollama embeddings

* Upgrade to llamaindex 0.10.14. Remove legacy use of ServiceContext in ContextChatEngine

* Fix vector retriever filters
This commit is contained in:
Iván Martínez 2024-03-06 17:51:30 +01:00 committed by GitHub
parent 12f3a39e8a
commit 45f05711eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1474 additions and 1396 deletions

View file

@ -1,14 +1,58 @@
import logging
from pathlib import Path
from llama_index import Document
from llama_index.readers import JSONReader, StringIterableReader
from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS
from llama_index.core.readers import StringIterableReader
from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.json import JSONReader
from llama_index.core.schema import Document
logger = logging.getLogger(__name__)
# Inspired by the `llama_index.core.readers.file.base` module
def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
try:
from llama_index.readers.file.docs import ( # type: ignore
DocxReader,
HWPReader,
PDFReader,
)
from llama_index.readers.file.epub import EpubReader # type: ignore
from llama_index.readers.file.image import ImageReader # type: ignore
from llama_index.readers.file.ipynb import IPYNBReader # type: ignore
from llama_index.readers.file.markdown import MarkdownReader # type: ignore
from llama_index.readers.file.mbox import MboxReader # type: ignore
from llama_index.readers.file.slides import PptxReader # type: ignore
from llama_index.readers.file.tabular import PandasCSVReader # type: ignore
from llama_index.readers.file.video_audio import ( # type: ignore
VideoAudioReader,
)
except ImportError as e:
raise ImportError("`llama-index-readers-file` package not found") from e
default_file_reader_cls: dict[str, type[BaseReader]] = {
".hwp": HWPReader,
".pdf": PDFReader,
".docx": DocxReader,
".pptx": PptxReader,
".ppt": PptxReader,
".pptm": PptxReader,
".jpg": ImageReader,
".png": ImageReader,
".jpeg": ImageReader,
".mp3": VideoAudioReader,
".mp4": VideoAudioReader,
".csv": PandasCSVReader,
".epub": EpubReader,
".md": MarkdownReader,
".mbox": MboxReader,
".ipynb": IPYNBReader,
}
return default_file_reader_cls
# Patching the default file reader to support other file types
FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy()
FILE_READER_CLS = _try_loading_included_file_formats()
FILE_READER_CLS.update(
{
".json": JSONReader,