Merge branch 'main' into feat/recipe-summary

# Conflicts: # private_gpt/launcher.py
2025-12-22 17:05:41 +01:00 · 2024-07-31 14:45:23 +02:00 · 2024-07-31 14:45:23 +02:00 · 3cd0580f67
commit 3cd0580f67
parent b7d2941cb1 40638a18a5
20 changed files with 2429 additions and 2450 deletions
--- a/.github/workflows/actions/install_dependencies/action.yml
+++ b/.github/workflows/actions/install_dependencies/action.yml
@ -8,7 +8,7 @@ inputs:
  poetry_version:
    required: true
    type: string
-    default: "1.5.1"
+    default: "1.8.3"

 runs:
  using: composite
--- a/Dockerfile.external
+++ b/Dockerfile.external
@ -3,7 +3,7 @@ FROM python:3.11.6-slim-bookworm as base
 # Install poetry
 RUN pip install pipx
 RUN python3 -m pipx ensurepath
-RUN pipx install poetry
+RUN pipx install poetry==1.8.3
 ENV PATH="/root/.local/bin:$PATH"
 ENV PATH=".venv/bin/:$PATH"

@ -14,27 +14,38 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

-RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
+ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama"
+RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

 FROM base as app
-
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
+ENV APP_ENV=prod
+ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
 EXPOSE 8080

 # Prepare a non-root user
-RUN adduser --system worker
+# More info about how to configure UIDs and GIDs in Docker:
+# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
+
+# Define the User ID (UID) for the non-root user
+# UID 100 is chosen to avoid conflicts with existing system users
+ARG UID=100
+
+# Define the Group ID (GID) for the non-root user
+# GID 65534 is often used for the 'nogroup' or 'nobody' group
+ARG GID=65534
+
+RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
 WORKDIR /home/worker/app

-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
+RUN chown worker /home/worker/app
+RUN mkdir local_data && chown worker local_data
+RUN mkdir models && chown worker models
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
-COPY --chown=worker fern/ fern
-COPY --chown=worker *.yaml *.md ./
+COPY --chown=worker *.yaml .
 COPY --chown=worker scripts/ scripts

-ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
-
 USER worker
 ENTRYPOINT python -m private_gpt
--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -5,7 +5,7 @@ FROM python:3.11.6-slim-bookworm as base
 # Install poetry
 RUN pip install pipx
 RUN python3 -m pipx ensurepath
-RUN pipx install poetry
+RUN pipx install poetry==1.8.3
 ENV PATH="/root/.local/bin:$PATH"
 ENV PATH=".venv/bin/:$PATH"

@ -24,28 +24,39 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

-RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
+ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
+RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

 FROM base as app

 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
+ENV APP_ENV=prod
+ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
 EXPOSE 8080

 # Prepare a non-root user
-RUN adduser --group worker
-RUN adduser --system --ingroup worker worker
+# More info about how to configure UIDs and GIDs in Docker:
+# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
+
+# Define the User ID (UID) for the non-root user
+# UID 100 is chosen to avoid conflicts with existing system users
+ARG UID=100
+
+# Define the Group ID (GID) for the non-root user
+# GID 65534 is often used for the 'nogroup' or 'nobody' group
+ARG GID=65534
+
+RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
 WORKDIR /home/worker/app

-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
+RUN chown worker /home/worker/app
+RUN mkdir local_data && chown worker local_data
+RUN mkdir models && chown worker models
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
-COPY --chown=worker fern/ fern
-COPY --chown=worker *.yaml *.md ./
+COPY --chown=worker *.yaml ./
 COPY --chown=worker scripts/ scripts

-ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
-
 USER worker
 ENTRYPOINT python -m private_gpt
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -5,12 +5,15 @@ services:
    volumes:
      - ./local_data/:/home/worker/app/local_data
    ports:
-      - 8001:8080
+      - 8001:8001
    environment:
-      PORT: 8080
+      PORT: 8001
      PGPT_PROFILES: docker
      PGPT_MODE: ollama
+      PGPT_EMBED_MODE: ollama
  ollama:
    image: ollama/ollama:latest
+    ports:
+      - 11434:11434
    volumes:
      - ./models:/root/.ollama
--- a/fern/docs/pages/installation/installation.mdx
+++ b/fern/docs/pages/installation/installation.mdx
@ -28,6 +28,11 @@ pyenv local 3.11
 Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
 Follow the instructions on the official Poetry website to install it.

+<Callout intent="warning">
+A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version.
+To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it.
+</Callout>
+
 ### 4. Optional: Install `make`
 To run various scripts, you need to install `make`. Follow the instructions for your operating system:
 #### macOS
@ -135,14 +140,14 @@ Now, start Ollama service (it will start a local inference server, serving both
 ollama serve
 ```

-Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)
+Install the models to be used, the default settings-ollama.yaml is configured to user llama3.1 8b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)

 By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.

 In any case, if you want to manually pull models, run the following commands:

 ```bash
-ollama pull mistral
+ollama pull llama3.1
 ollama pull nomic-embed-text
 ```

--- a/fern/docs/pages/installation/troubleshooting.mdx
+++ b/fern/docs/pages/installation/troubleshooting.mdx
@ -24,8 +24,26 @@ PrivateGPT uses the `AutoTokenizer` library to tokenize input text accurately. I
   In your `settings.yaml` file, specify the model you want to use:
   ```yaml
   llm:
-     tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+     tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
   ```
 2. **Set Access Token for Gated Models:**
   If you are using a gated model, ensure the `access_token` is set as mentioned in the previous section.
 This configuration ensures that PrivateGPT can download and use the correct tokenizer for the model you are working with.
+
+# Embedding dimensions mismatch
+If you encounter an error message like `Embedding dimensions mismatch`, it is likely due to the embedding model and
+current vector dimension mismatch. To resolve this issue, ensure that the model and the input data have the same vector dimensions.
+
+By default, PrivateGPT uses `nomic-embed-text` embeddings, which have a vector dimension of 768.
+If you are using a different embedding model, ensure that the vector dimensions match the model's output.
+
+<Callout intent = "warning">
+In versions below to 0.6.0, the default embedding model was `BAAI/bge-small-en-v1.5` in `huggingface` setup.
+If you plan to reuse the old generated embeddings, you need to update the `settings.yaml` file to use the correct embedding model:
+```yaml
+huggingface:
+  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+embedding:
+  embed_dim: 384
+```
+</Callout>
--- a/fern/docs/pages/manual/ingestion.mdx
+++ b/fern/docs/pages/manual/ingestion.mdx
@ -8,6 +8,14 @@ The ingestion of documents can be done in different ways:

 ## Bulk Local Ingestion

+You will need to activate `data.local_ingestion.enabled` in your setting file to use this feature. Additionally,
+it is probably a good idea to set `data.local_ingestion.allow_ingest_from` to specify which folders are allowed to be ingested.
+
+<Callout intent = "warning">
+Be careful enabling this feature in a production environment, as it can be a security risk, as it allows users to
+ingest any local file with permissions.
+</Callout>
+
 When you are running PrivateGPT in a fully local setup, you can ingest a complete folder for convenience (containing
 pdf, text files, etc.)
 and optionally watch changes on it with the command:
--- a/poetry.lock
+++ b/poetry.lock
--- a/private_gpt/components/llm/prompt_helper.py
+++ b/private_gpt/components/llm/prompt_helper.py
@ -169,7 +169,7 @@ class Llama3PromptStyle(AbstractPromptStyle):
    """

    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
-        prompt = self.BOS
+        prompt = ""
        has_system_message = False

        for i, message in enumerate(messages):
@ -189,8 +189,7 @@ class Llama3PromptStyle(AbstractPromptStyle):
        # Add default system prompt if no system message was provided
        if not has_system_message:
            prompt = (
-                f"{self.BOS}{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}"
-                + prompt[len(self.BOS) :]
+                f"{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}" + prompt
            )

        # TODO: Implement tool handling logic
@ -199,7 +198,7 @@ class Llama3PromptStyle(AbstractPromptStyle):

    def _completion_to_prompt(self, completion: str) -> str:
        return (
-            f"{self.BOS}{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}"
+            f"{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}"
            f"{self.B_INST}user{self.E_INST}\n\n{completion.strip()}{self.EOT}"
            f"{self.ASSISTANT_INST}\n\n"
        )
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -59,6 +59,27 @@ class AuthSettings(BaseModel):
    )


+class IngestionSettings(BaseModel):
+    """Ingestion configuration.
+
+    This configuration is used to control the ingestion of data into the system
+    using non-server methods. This is useful for local development and testing;
+    or to ingest in bulk from a folder.
+
+    Please note that this configuration is not secure and should be used in
+    a controlled environment only (setting right permissions, etc.).
+    """
+
+    enabled: bool = Field(
+        description="Flag indicating if local ingestion is enabled or not.",
+        default=False,
+    )
+    allow_ingest_from: list[str] = Field(
+        description="A list of folders that should be permitted to make ingest requests.",
+        default=[],
+    )
+
+
 class ServerSettings(BaseModel):
    env_name: str = Field(
        description="Name of the environment (prod, staging, local...)"
@ -74,6 +95,10 @@ class ServerSettings(BaseModel):


 class DataSettings(BaseModel):
+    local_ingestion: IngestionSettings = Field(
+        description="Ingestion configuration",
+        default_factory=lambda: IngestionSettings(allow_ingest_from=["*"]),
+    )
    local_data_folder: str = Field(
        description="Path to local storage."
        "It will be treated as an absolute path if it starts with /"
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@ -384,7 +384,7 @@ class PrivateGptUi:
            "hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
            ".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
            ".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
-            ".footer-zylon-link { display:flex; margin-left: 5px; text-decoration: auto; color: #fff; }"
+            ".footer-zylon-link { display:flex; margin-left: 5px; text-decoration: auto; color: var(--body-text-color); }"
            ".footer-zylon-link:hover { color: #C7BAFF; }"
            ".footer-zylon-ico { height: 20px; margin-left: 5px; background-color: antiquewhite; border-radius: 2px; }",
        ) as blocks:
--- a/scripts/ingest_folder.py
+++ b/scripts/ingest_folder.py
@ -7,12 +7,13 @@ from pathlib import Path
 from private_gpt.di import global_injector
 from private_gpt.server.ingest.ingest_service import IngestService
 from private_gpt.server.ingest.ingest_watcher import IngestWatcher
+from private_gpt.settings.settings import Settings

 logger = logging.getLogger(__name__)


 class LocalIngestWorker:
-    def __init__(self, ingest_service: IngestService) -> None:
+    def __init__(self, ingest_service: IngestService, setting: Settings) -> None:
        self.ingest_service = ingest_service

        self.total_documents = 0
@ -20,6 +21,24 @@ class LocalIngestWorker:

        self._files_under_root_folder: list[Path] = []

+        self.is_local_ingestion_enabled = setting.data.local_ingestion.enabled
+        self.allowed_local_folders = setting.data.local_ingestion.allow_ingest_from
+
+    def _validate_folder(self, folder_path: Path) -> None:
+        if not self.is_local_ingestion_enabled:
+            raise ValueError(
+                "Local ingestion is disabled."
+                "You can enable it in settings `ingestion.enabled`"
+            )
+
+        # Allow all folders if wildcard is present
+        if "*" in self.allowed_local_folders:
+            return
+
+        for allowed_folder in self.allowed_local_folders:
+            if not folder_path.is_relative_to(allowed_folder):
+                raise ValueError(f"Folder {folder_path} is not allowed for ingestion")
+
    def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None:
        """Search all files under the root folder recursively.

@ -28,6 +47,7 @@ class LocalIngestWorker:
        for file_path in root_path.iterdir():
            if file_path.is_file() and file_path.name not in ignored:
                self.total_documents += 1
+                self._validate_folder(file_path)
                self._files_under_root_folder.append(file_path)
            elif file_path.is_dir() and file_path.name not in ignored:
                self._find_all_files_in_folder(file_path, ignored)
@ -92,13 +112,13 @@ if args.log_file:
    logger.addHandler(file_handler)

 if __name__ == "__main__":
-
    root_path = Path(args.folder)
    if not root_path.exists():
        raise ValueError(f"Path {args.folder} does not exist")

    ingest_service = global_injector.get(IngestService)
-    worker = LocalIngestWorker(ingest_service)
+    settings = global_injector.get(Settings)
+    worker = LocalIngestWorker(ingest_service, settings)
    worker.ingest_folder(root_path, args.ignored)

    if args.ignored:
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -6,21 +6,21 @@ llm:
  mode: ${PGPT_MODE:mock}

 embedding:
-  mode: ${PGPT_MODE:sagemaker}
+  mode: ${PGPT_EMBED_MODE:mock}

 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}

 huggingface:
-  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:nomic-ai/nomic-embed-text-v1.5}

 sagemaker:
  llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}

 ollama:
-  llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
+  llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3.1}
  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
@ -30,6 +30,7 @@ ollama:
  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
+  autopull_models: ${PGPT_OLLAMA_AUTOPULL_MODELS:true}

 ui:
  enabled: true
--- a/settings-local.yaml
+++ b/settings-local.yaml
@ -7,18 +7,18 @@ llm:
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
-  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
-  prompt_style: "mistral"
+  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
+  prompt_style: "llama3"

 llamacpp:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
+  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
+  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf

 embedding:
  mode: huggingface

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5

 vectorstore:
  database: qdrant
--- a/settings-ollama-pg.yaml
+++ b/settings-ollama-pg.yaml
@ -14,7 +14,7 @@ embedding:
  embed_dim: 768

 ollama:
-  llm_model: mistral
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434

--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@ -11,7 +11,7 @@ embedding:
  mode: ollama

 ollama:
-  llm_model: mistral
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
--- a/settings-vllm.yaml
+++ b/settings-vllm.yaml
@ -4,7 +4,7 @@ server:
 llm:
  mode: openailike
  max_new_tokens: 512
-  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
  temperature: 0.1

 embedding:
@ -12,7 +12,7 @@ embedding:
  ingest_mode: simple

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5

 openai:
  api_base: http://localhost:8000/v1
--- a/settings.yaml
+++ b/settings.yaml
@ -17,6 +17,9 @@ server:
    secret: "Basic c2VjcmV0OmtleQ=="

 data:
+  local_ingestion:
+    enabled: ${LOCAL_INGESTION_ENABLED:false}
+    allow_ingest_from: ["*"]
  local_data_folder: local_data/private_gpt

 ui:
@ -43,12 +46,12 @@ ui:

 llm:
  mode: llamacpp
-  prompt_style: "mistral"
+  prompt_style: "llama3"
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
  # Select your tokenizer. Llama-index tokenizer is the default.
-  # tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

 rag:
@ -72,8 +75,8 @@ clickhouse:
    database: embeddings

 llamacpp:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
+  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
+  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
@ -83,10 +86,10 @@ embedding:
  # Should be matching the value above in most cases
  mode: huggingface
  ingest_mode: simple
-  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
+  embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5
  access_token: ${HF_TOKEN:}

 vectorstore:
@ -121,7 +124,7 @@ openai:
  embedding_api_key: ${OPENAI_API_KEY:}

 ollama:
-  llm_model: llama2
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
--- a/tests/server/ingest/test_local_ingest.py
+++ b/tests/server/ingest/test_local_ingest.py
@ -0,0 +1,74 @@
+import os
+import subprocess
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+@pytest.fixture()
+def file_path() -> str:
+    return "test.txt"
+
+
+def create_test_file(file_path: str) -> None:
+    with open(file_path, "w") as f:
+        f.write("test")
+
+
+def clear_log_file(log_file_path: str) -> None:
+    if Path(log_file_path).exists():
+        os.remove(log_file_path)
+
+
+def read_log_file(log_file_path: str) -> str:
+    with open(log_file_path) as f:
+        return f.read()
+
+
+def init_structure(folder: str, file_path: str) -> None:
+    clear_log_file(file_path)
+    os.makedirs(folder, exist_ok=True)
+    create_test_file(f"{folder}/${file_path}")
+
+
+def test_ingest_one_file_in_allowed_folder(
+    file_path: str, test_client: TestClient
+) -> None:
+    allowed_folder = "local_data/tests/allowed_folder"
+    init_structure(allowed_folder, file_path)
+
+    test_env = os.environ.copy()
+    test_env["PGPT_PROFILES"] = "test"
+    test_env["LOCAL_INGESTION_ENABLED"] = "True"
+
+    result = subprocess.run(
+        ["python", "scripts/ingest_folder.py", allowed_folder],
+        capture_output=True,
+        text=True,
+        env=test_env,
+    )
+
+    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
+    response_after = test_client.get("/v1/ingest/list")
+
+    count_ingest_after = len(response_after.json()["data"])
+    assert count_ingest_after > 0, "No documents were ingested"
+
+
+def test_ingest_disabled(file_path: str) -> None:
+    allowed_folder = "local_data/tests/allowed_folder"
+    init_structure(allowed_folder, file_path)
+
+    test_env = os.environ.copy()
+    test_env["PGPT_PROFILES"] = "test"
+    test_env["LOCAL_INGESTION_ENABLED"] = "False"
+
+    result = subprocess.run(
+        ["python", "scripts/ingest_folder.py", allowed_folder],
+        capture_output=True,
+        text=True,
+        env=test_env,
+    )
+
+    assert result.returncode != 0, f"Script failed with error: {result.stderr}"
--- a/tests/test_prompt_helper.py
+++ b/tests/test_prompt_helper.py
@ -150,7 +150,7 @@ def test_llama3_prompt_style_format():
    ]

    expected_prompt = (
-        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
+        "<|start_header_id|>system<|end_header_id|>\n\n"
        "You are a helpful assistant<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        "Hello, how are you doing?<|eot_id|>"
@ -166,7 +166,7 @@ def test_llama3_prompt_style_with_default_system():
        ChatMessage(content="Hello!", role=MessageRole.USER),
    ]
    expected = (
-        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
+        "<|start_header_id|>system<|end_header_id|>\n\n"
        f"{prompt_style.DEFAULT_SYSTEM_PROMPT}<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\nHello!<|eot_id|>"
        "<|start_header_id|>assistant<|end_header_id|>\n\n"
@ -185,7 +185,7 @@ def test_llama3_prompt_style_with_assistant_response():
    ]

    expected_prompt = (
-        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
+        "<|start_header_id|>system<|end_header_id|>\n\n"
        "You are a helpful assistant<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        "What is the capital of France?<|eot_id|>"