fix(config): make tokenizer optional and include a troubleshooting doc (#1998)

* docs: add troubleshooting * fix: pass HF token to setup script and prevent to download tokenizer when it is empty * fix: improve log and disable specific tokenizer by default * chore: change HF_TOKEN environment to be aligned with default config * ifx: mypy
2025-12-22 04:30:11 +01:00 · 2024-07-17 10:06:27 +02:00 · 2024-07-17 10:06:27 +02:00 · 01b7ccd064
commit 01b7ccd064
parent 15f73dbc48
6 changed files with 65 additions and 12 deletions
--- a/settings.yaml
+++ b/settings.yaml
@ -40,7 +40,8 @@ llm:
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
-  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  # Select your tokenizer. Llama-index tokenizer is the default.
+  # tokenizer: mistralai/Mistral-7B-Instruct-v0.2
  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

 rag:
@ -76,7 +77,7 @@ embedding:

 huggingface:
  embedding_hf_model_name: BAAI/bge-small-en-v1.5
-  access_token: ${HUGGINGFACE_TOKEN:}
+  access_token: ${HF_TOKEN:}

 vectorstore:
  database: qdrant