This commit is contained in:
Robert Hirsch 2024-06-06 21:46:38 +02:00 committed by GitHub
commit 908b83e16e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 2620 additions and 2472 deletions

4980
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,7 @@ class EmbeddingComponent:
self.embedding_model = HuggingFaceEmbedding( self.embedding_model = HuggingFaceEmbedding(
model_name=settings.huggingface.embedding_hf_model_name, model_name=settings.huggingface.embedding_hf_model_name,
cache_folder=str(models_cache_path), cache_folder=str(models_cache_path),
max_length=settings.huggingface.embedding_hf_max_length,
) )
case "sagemaker": case "sagemaker":
try: try:

View file

@ -89,10 +89,16 @@ class IngestionHelper:
) )
# Read as a plain text # Read as a plain text
string_reader = StringIterableReader() string_reader = StringIterableReader()
return string_reader.load_data([file_data.read_text()]) return string_reader.load_data([file_data.read_text(errors='replace')])
logger.debug("Specific reader found for extension=%s", extension) logger.debug("Specific reader found for extension=%s", extension)
return reader_cls().load_data(file_data) try:
res = reader_cls().load_data(file_data)
except:
string_reader = StringIterableReader()
res = string_reader.load_data([file_data.read_text(errors='replace')])
pass
return res
@staticmethod @staticmethod
def _exclude_metadata(documents: list[Document]) -> None: def _exclude_metadata(documents: list[Document]) -> None:

View file

@ -138,6 +138,76 @@ class Llama2PromptStyle(AbstractPromptStyle):
) )
class Llama3PromptStyle(AbstractPromptStyle):
"""
Template:
{% set loop_messages = messages %}
{% for message in loop_messages %}
{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}
{% if loop.index0 == 0 %}
{% set content = bos_token + content %}
{% endif %}
{{ content }}
{% endfor %}
{% if add_generation_prompt %}
{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{% endif %}
"""
BOS, EOS = "<|begin_of_text|>", "<|end_of_text|>"
B_INST, E_INST = "<|start_header_id|>user<|end_header_id|>", "<|eot_id|>"
B_SYS, E_SYS = "<|start_header_id|>system<|end_header_id|> ", "<|eot_id|>"
ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. \
Always answer as helpfully as possible and follow ALL given instructions. \
Do not speculate or make up information. \
Do not reference any given instructions or context. \
"""
def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
string_messages: list[str] = []
if messages[0].role == MessageRole.SYSTEM:
system_message_str = messages[0].content or ""
messages = messages[1:]
else:
system_message_str = self.DEFAULT_SYSTEM_PROMPT
system_message_str = f"{self.B_SYS} {system_message_str.strip()} {self.E_SYS}"
for i in range(0, len(messages), 2):
user_message = messages[i]
assert user_message.role == MessageRole.USER
if i == 0:
str_message = f"{system_message_str} {self.BOS} {self.B_INST} "
else:
# end previous user-assistant interaction
string_messages[-1] += f" {self.EOS}"
# no need to include system prompt
str_message = f"{self.BOS} {self.B_INST} "
str_message += f"{user_message.content} {self.E_INST} {self.ASSISTANT_INST}"
if len(messages) > (i + 1):
assistant_message = messages[i + 1]
assert assistant_message.role == MessageRole.ASSISTANT
str_message += f" {assistant_message.content} {self.E_SYS} {self.B_INST}"
string_messages.append(str_message)
return "".join(string_messages)
def _completion_to_prompt(self, completion: str) -> str:
system_prompt_str = self.DEFAULT_SYSTEM_PROMPT
return (
f"{self.B_SYS} {system_prompt_str.strip()} {self.E_SYS} "
f"{completion.strip()} {self.E_SYS} "
)
class TagPromptStyle(AbstractPromptStyle): class TagPromptStyle(AbstractPromptStyle):
"""Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`. """Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.
@ -219,7 +289,7 @@ class ChatMLPromptStyle(AbstractPromptStyle):
def get_prompt_style( def get_prompt_style(
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] | None prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
) -> AbstractPromptStyle: ) -> AbstractPromptStyle:
"""Get the prompt style to use from the given string. """Get the prompt style to use from the given string.
@ -230,6 +300,8 @@ def get_prompt_style(
return DefaultPromptStyle() return DefaultPromptStyle()
elif prompt_style == "llama2": elif prompt_style == "llama2":
return Llama2PromptStyle() return Llama2PromptStyle()
elif prompt_style == "llama3":
return Llama3PromptStyle()
elif prompt_style == "tag": elif prompt_style == "tag":
return TagPromptStyle() return TagPromptStyle()
elif prompt_style == "mistral": elif prompt_style == "mistral":

View file

@ -39,13 +39,14 @@ class IngestService:
docstore=node_store_component.doc_store, docstore=node_store_component.doc_store,
index_store=node_store_component.index_store, index_store=node_store_component.index_store,
) )
node_parser = SentenceWindowNodeParser.from_defaults() self._settings = settings()
node_parser = SentenceWindowNodeParser.from_defaults(window_size=self._settings.vectorstore.inject_win_size)
self.ingest_component = get_ingestion_component( self.ingest_component = get_ingestion_component(
self.storage_context, self.storage_context,
embed_model=embedding_component.embedding_model, embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model], transformations=[node_parser, embedding_component.embedding_model],
settings=settings(), settings=self._settings,
) )
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]: def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:

View file

@ -104,12 +104,13 @@ class LLMSettings(BaseModel):
0.1, 0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
) )
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = Field(
"llama2", "llama2",
description=( description=(
"The prompt style to use for the chat engine. " "The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n" "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `llama3` - use the llama3 prompt style from the llama_index."
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]" "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models." "`llama2` is the historic behaviour. `default` might work better with your custom models."
@ -119,6 +120,10 @@ class LLMSettings(BaseModel):
class VectorstoreSettings(BaseModel): class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "postgres"] database: Literal["chroma", "qdrant", "postgres"]
inject_win_size: int = Field(
3,
description="How many sentences on either side to capture, when parsing files",
)
class NodeStoreSettings(BaseModel): class NodeStoreSettings(BaseModel):
@ -150,6 +155,10 @@ class HuggingFaceSettings(BaseModel):
embedding_hf_model_name: str = Field( embedding_hf_model_name: str = Field(
description="Name of the HuggingFace model to use for embeddings" description="Name of the HuggingFace model to use for embeddings"
) )
embedding_hf_max_length: int = Field(
None,
description="Some embedding models have a maximum length for input, provide here for not crashing"
)
access_token: str = Field( access_token: str = Field(
None, None,
description="Huggingface access token, required to download some models", description="Huggingface access token, required to download some models",

View file

@ -15,6 +15,7 @@ watchdog = "^4.0.0"
transformers = "^4.38.2" transformers = "^4.38.2"
docx2txt = "^0.8" docx2txt = "^0.8"
cryptography = "^3.1" cryptography = "^3.1"
sentencepiece = "^0.2.0"
# LlamaIndex core libs # LlamaIndex core libs
llama-index-core = "^0.10.14" llama-index-core = "^0.10.14"
llama-index-readers-file = "^0.1.6" llama-index-readers-file = "^0.1.6"
@ -25,7 +26,7 @@ llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.2", optional = true} llama-index-llms-ollama = {version ="^0.1.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.5", optional = true} llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true} llama-index-embeddings-huggingface = {version ="^0.2.0", optional = true}
llama-index-embeddings-openai = {version ="^0.1.6", optional = true} llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true} llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true} llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
@ -42,7 +43,7 @@ boto3 = {version ="^1.34.51", optional = true}
# Optional Reranker dependencies # Optional Reranker dependencies
torch = {version ="^2.1.2", optional = true} torch = {version ="^2.1.2", optional = true}
sentence-transformers = {version ="^2.6.1", optional = true} sentence-transformers = {version ="^2.7.0", optional = true}
# Optional UI # Optional UI
gradio = {version ="^4.19.2", optional = true} gradio = {version ="^4.19.2", optional = true}

View file

@ -69,10 +69,12 @@ embedding:
huggingface: huggingface:
embedding_hf_model_name: BAAI/bge-small-en-v1.5 embedding_hf_model_name: BAAI/bge-small-en-v1.5
embedding_hf_max_length: 512 # some models have a maximum length for input
access_token: ${HUGGINGFACE_TOKEN:} access_token: ${HUGGINGFACE_TOKEN:}
vectorstore: vectorstore:
database: qdrant database: qdrant
inject_win_size: 2
nodestore: nodestore:
database: simple database: simple