Merge d3d144366d into 829f42909c

2025-12-22 10:45:42 +01:00 · 2024-11-10 02:37:05 +02:00 · 2024-11-10 02:37:05 +02:00 · 0f36f484ba
commit 0f36f484ba
parent 829f42909c d3d144366d
79 changed files with 6877 additions and 3776 deletions
--- a/.docker/router.yml
+++ b/.docker/router.yml
@ -0,0 +1,16 @@
+http:
+  services:
+    ollama:
+      loadBalancer:
+        healthCheck:
+          interval: 5s
+          path: /
+        servers:
+          - url: http://ollama-cpu:11434
+          - url: http://ollama-cuda:11434
+          - url: http://host.docker.internal:11434
+
+  routers:
+    ollama-router:
+      rule: "PathPrefix(`/`)"
+      service: ollama
--- a/.github/ISSUE_TEMPLATE/bug.yml
+++ b/.github/ISSUE_TEMPLATE/bug.yml
@ -0,0 +1,105 @@
+name: Bug Report
+description: Report a bug or issue with the project.
+title: "[BUG] "
+labels: ["bug"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Please describe the bug you encountered.**
+
+  - type: checkboxes
+    id: pre-check
+    attributes:
+      label: Pre-check
+      description: Please confirm that you have searched for duplicate issues before creating this one.
+      options:
+        - label: I have searched the existing issues and none cover this bug.
+          required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Description
+      description: Provide a detailed description of the bug.
+      placeholder: "Detailed description of the bug"
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to Reproduce
+      description: Provide the steps to reproduce the bug.
+      placeholder: "1. Step one\n2. Step two\n3. Step three"
+    validations:
+      required: true
+
+  - type: input
+    id: expected
+    attributes:
+      label: Expected Behavior
+      description: Describe what you expected to happen.
+      placeholder: "Expected behavior"
+    validations:
+      required: true
+
+  - type: input
+    id: actual
+    attributes:
+      label: Actual Behavior
+      description: Describe what actually happened.
+      placeholder: "Actual behavior"
+    validations:
+      required: true
+
+  - type: input
+    id: environment
+    attributes:
+      label: Environment
+      description: Provide details about your environment (e.g., OS, GPU, profile, etc.).
+      placeholder: "Environment details"
+    validations:
+      required: true
+
+  - type: input
+    id: additional
+    attributes:
+      label: Additional Information
+      description: Provide any additional information that may be relevant (e.g., logs, screenshots).
+      placeholder: "Any additional information that may be relevant"
+
+  - type: input
+    id: version
+    attributes:
+      label: Version
+      description: Provide the version of the project where you encountered the bug.
+      placeholder: "Version number"
+
+  - type: markdown
+    attributes:
+      value: |
+        **Please ensure the following setup checklist has been reviewed before submitting the bug report.**
+
+  - type: checkboxes
+    id: general-setup-checklist
+    attributes:
+      label: Setup Checklist
+      description: Verify the following general aspects of your setup.
+      options:
+        - label: Confirm that you have followed the installation instructions in the project’s documentation.
+        - label: Check that you are using the latest version of the project.
+        - label: Verify disk space availability for model storage and data processing.
+        - label: Ensure that you have the necessary permissions to run the project.
+
+  - type: checkboxes
+    id: nvidia-setup-checklist
+    attributes:
+      label: NVIDIA GPU Setup Checklist
+      description: Verify the following aspects of your NVIDIA GPU setup.
+      options:
+        - label: Check that the all CUDA dependencies are installed and are compatible with your GPU (refer to [CUDA's documentation](https://docs.nvidia.com/deploy/cuda-compatibility/#frequently-asked-questions))
+        - label: Ensure an NVIDIA GPU is installed and recognized by the system (run `nvidia-smi` to verify).
+        - label: Ensure proper permissions are set for accessing GPU resources.
+        - label: Docker users - Verify that the NVIDIA Container Toolkit is configured correctly (e.g. run `sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base-ubuntu20.04 nvidia-smi`)
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Documentation
+    url: https://docs.privategpt.dev
+    about: Please refer to our documentation for more details and guidance.
+  - name: Discord
+    url: https://discord.gg/bK6mRVpErU
+    about: Join our Discord community to ask questions and get help.
--- a/.github/ISSUE_TEMPLATE/docs.yml
+++ b/.github/ISSUE_TEMPLATE/docs.yml
@ -0,0 +1,19 @@
+name: Documentation
+description: Suggest a change or addition to the documentation.
+title: "[DOCS] "
+labels: ["documentation"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Please describe the documentation change or addition you would like to suggest.**
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Description
+      description: Provide a detailed description of the documentation change.
+      placeholder: "Detailed description of the documentation change"
+    validations:
+      required: true
--- a/.github/ISSUE_TEMPLATE/feature.yml
+++ b/.github/ISSUE_TEMPLATE/feature.yml
@ -0,0 +1,37 @@
+name: Enhancement
+description: Suggest an enhancement or improvement to the project.
+title: "[FEATURE] "
+labels: ["enhancement"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Please describe the enhancement or improvement you would like to suggest.**
+
+  - type: textarea
+    id: feature_description
+    attributes:
+      label: Feature Description
+      description: Provide a detailed description of the enhancement.
+      placeholder: "Detailed description of the enhancement"
+    validations:
+      required: true
+
+  - type: textarea
+    id: reason
+    attributes:
+      label: Reason
+      description: Explain the reason for this enhancement.
+      placeholder: "Reason for the enhancement"
+    validations:
+      required: true
+
+  - type: textarea
+    id: value
+    attributes:
+      label: Value of Feature
+      description: Describe the value or benefits this feature will bring.
+      placeholder: "Value or benefits of the feature"
+    validations:
+      required: true
--- a/.github/ISSUE_TEMPLATE/question.yml
+++ b/.github/ISSUE_TEMPLATE/question.yml
@ -0,0 +1,19 @@
+name: Question
+description: Ask a question about the project.
+title: "[QUESTION] "
+labels: ["question"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Please describe your question in detail.**
+
+  - type: textarea
+    id: question
+    attributes:
+      label: Question
+      description: Provide a detailed description of your question.
+      placeholder: "Detailed description of the question"
+    validations:
+      required: true
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -0,0 +1,37 @@
+# Description
+
+Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
+
+## Type of Change
+
+Please delete options that are not relevant.
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] This change requires a documentation update
+
+## How Has This Been Tested?
+
+Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
+
+- [ ] Added new unit/integration tests
+- [ ] I stared at the code and made sure it makes sense
+
+**Test Configuration**:
+* Firmware version:
+* Hardware:
+* Toolchain:
+* SDK:
+
+## Checklist:
+
+- [ ] My code follows the style guidelines of this project
+- [ ] I have performed a self-review of my code
+- [ ] I have commented my code, particularly in hard-to-understand areas
+- [ ] I have made corresponding changes to the documentation
+- [ ] My changes generate no new warnings
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] New and existing unit tests pass locally with my changes
+- [ ] Any dependent changes have been merged and published in downstream modules
+- [ ] I ran `make check; make test` to ensure mypy and tests pass
--- a/.github/release_please/.release-please-config.json
+++ b/.github/release_please/.release-please-config.json
@ -0,0 +1,19 @@
+{
+    "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
+    "release-type": "simple",
+    "version-file": "version.txt",
+    "extra-files": [
+      {
+        "type": "toml",
+        "path": "pyproject.toml",
+        "jsonpath": "$.tool.poetry.version"
+      },
+      {
+        "type": "generic",
+        "path": "docker-compose.yaml"
+      }
+    ],
+    "packages": {
+      ".": {}
+    }
+  }
--- a/.github/release_please/.release-please-manifest.json
+++ b/.github/release_please/.release-please-manifest.json
@ -0,0 +1,3 @@
+{
+  ".": "0.7.0"
+}
--- a/.github/workflows/actions/install_dependencies/action.yml
+++ b/.github/workflows/actions/install_dependencies/action.yml
@ -8,7 +8,7 @@ inputs:
  poetry_version:
    required: true
    type: string
-    default: "1.5.1"
+    default: "1.8.3"

 runs:
  using: composite
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -1,45 +0,0 @@
-name: docker
-
-on:
-  release:
-    types: [ published ]
-  workflow_dispatch:
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}
-
-jobs:
-  build-and-push-image:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Log in to the Container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          tags: |
-            type=ref,event=branch
-            type=ref,event=pr
-            type=semver,pattern={{version}}
-            type=semver,pattern={{major}}.{{minor}}
-            type=sha
-      - name: Build and push Docker image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: Dockerfile.external
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/generate-release.yml
+++ b/.github/workflows/generate-release.yml
@ -0,0 +1,83 @@
+name: generate-release
+
+on:
+  release:
+    types: [ published ]
+  workflow_dispatch:
+
+env:
+  REGISTRY: docker.io
+  IMAGE_NAME: zylonai/private-gpt
+  platforms: linux/amd64,linux/arm64
+  DEFAULT_TYPE: "ollama"
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        type: [ llamacpp-cpu, ollama ]
+
+    permissions:
+      contents: read
+      packages: write
+
+    outputs:
+      version: ${{ steps.version.outputs.version }}
+
+    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}},enable=${{ matrix.type == env.DEFAULT_TYPE }}
+            type=semver,pattern={{version}}-${{ matrix.type }}
+            type=semver,pattern={{major}}.{{minor}},enable=${{ matrix.type == env.DEFAULT_TYPE }}
+            type=semver,pattern={{major}}.{{minor}}-${{ matrix.type }}
+            type=raw,value=latest,enable=${{ matrix.type == env.DEFAULT_TYPE }}
+            type=sha
+          flavor: |
+            latest=false
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile.${{ matrix.type }}
+          platforms: ${{ env.platforms }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Version output
+        id: version
+        run: echo "version=${{ steps.meta.outputs.version }}" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/preview-docs.yml
+++ b/.github/workflows/preview-docs.yml
@ -11,6 +11,10 @@ jobs:
  preview-docs:
    runs-on: ubuntu-latest

+    permissions:
+      contents: read
+      pull-requests: write
+
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@ -37,14 +41,14 @@ jobs:
          # Set the output for the step
          echo "::set-output name=preview_url::$preview_url"
      - name: Comment PR with URL using github-actions bot
-        uses: actions/github-script@v4
+        uses: actions/github-script@v7
        if: ${{ steps.generate_docs.outputs.preview_url }}
        with:
          script: |
            const preview_url = '${{ steps.generate_docs.outputs.preview_url }}';
-            const issue_number = context.issue.number;
-            github.issues.createComment({
-              ...context.repo,
-              issue_number: issue_number,
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
              body: `Published docs preview URL: ${preview_url}`
            })
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@ -13,7 +13,8 @@ jobs:
  release-please:
    runs-on: ubuntu-latest
    steps:
-      - uses: google-github-actions/release-please-action@v3
+      - uses: google-github-actions/release-please-action@v4
+        id: release
        with:
-          release-type: simple
-          version-file: version.txt
+          config-file: .github/release_please/.release-please-config.json
+          manifest-file: .github/release_please/.release-please-manifest.json
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -14,7 +14,7 @@ jobs:
  setup:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: ./.github/workflows/actions/install_dependencies

  checks:
@ -28,7 +28,7 @@ jobs:
          - ruff
          - mypy
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: ./.github/workflows/actions/install_dependencies
      - name: run ${{ matrix.quality-command }}
        run: make ${{ matrix.quality-command }}
@ -38,7 +38,7 @@ jobs:
    runs-on: ubuntu-latest
    name: test
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
      - uses: ./.github/workflows/actions/install_dependencies
      - name: run test
        run: make test-coverage
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,82 @@
 # Changelog

+## [0.7.0](https://github.com/zylon-ai/private-gpt/compare/v0.6.2...v0.7.0) (2024-10-17)
+
+
+### Features
+
+* add retry connection to ollama ([#2084](https://github.com/zylon-ai/private-gpt/issues/2084)) ([77461b9](https://github.com/zylon-ai/private-gpt/commit/77461b96cf2e18b88b592fff441206a49826db97))
+* Adding MistralAI mode ([#2065](https://github.com/zylon-ai/private-gpt/issues/2065)) ([f9182b3](https://github.com/zylon-ai/private-gpt/commit/f9182b3a86d88af7c699b41b3a5f21401117acfc))
+* update llama-index + dependencies ([#2092](https://github.com/zylon-ai/private-gpt/issues/2092)) ([5851b02](https://github.com/zylon-ai/private-gpt/commit/5851b02378313f3dba315e0251cfd421af79dae6))
+
+
+### Bug Fixes
+
+* 503 when private gpt gets ollama service ([#2104](https://github.com/zylon-ai/private-gpt/issues/2104)) ([940bdd4](https://github.com/zylon-ai/private-gpt/commit/940bdd49af14d9c1e7fd4af54f12648b5fc1f9c0))
+* Add default mode option to settings ([#2078](https://github.com/zylon-ai/private-gpt/issues/2078)) ([fa3c306](https://github.com/zylon-ai/private-gpt/commit/fa3c30661d2ab04634361e20e7819365e3dd351a))
+* docker permissions ([#2059](https://github.com/zylon-ai/private-gpt/issues/2059)) ([8c12c68](https://github.com/zylon-ai/private-gpt/commit/8c12c6830b37851cccb3fea75faa820fce49284a))
+* naming image and ollama-cpu ([#2056](https://github.com/zylon-ai/private-gpt/issues/2056)) ([89477ea](https://github.com/zylon-ai/private-gpt/commit/89477ea9d3a83181b0222b732a81c71db9edf142))
+* Rectify ffmpy poetry config; update version from 0.3.2 to 0.4.0 ([#2062](https://github.com/zylon-ai/private-gpt/issues/2062)) ([7603b36](https://github.com/zylon-ai/private-gpt/commit/7603b3627d91aed1cce2e1ae407fec11ca1ad132))
+* Sanitize null bytes before ingestion ([#2090](https://github.com/zylon-ai/private-gpt/issues/2090)) ([5fbb402](https://github.com/zylon-ai/private-gpt/commit/5fbb402477c41e09f56a3e5adc32f316341772bf))
+
+## [0.6.2](https://github.com/zylon-ai/private-gpt/compare/v0.6.1...v0.6.2) (2024-08-08)
+
+
+### Bug Fixes
+
+* add numpy issue to troubleshooting ([#2048](https://github.com/zylon-ai/private-gpt/issues/2048)) ([4ca6d0c](https://github.com/zylon-ai/private-gpt/commit/4ca6d0cb556be7a598f7d3e3b00d2a29214ee1e8))
+* auto-update version ([#2052](https://github.com/zylon-ai/private-gpt/issues/2052)) ([7fefe40](https://github.com/zylon-ai/private-gpt/commit/7fefe408b4267684c6e3c1a43c5dc2b73ec61fe4))
+* publish image name ([#2043](https://github.com/zylon-ai/private-gpt/issues/2043)) ([b1acf9d](https://github.com/zylon-ai/private-gpt/commit/b1acf9dc2cbca2047cd0087f13254ff5cda6e570))
+* update matplotlib to 3.9.1-post1 to fix win install ([b16abbe](https://github.com/zylon-ai/private-gpt/commit/b16abbefe49527ac038d235659854b98345d5387))
+
+## [0.6.1](https://github.com/zylon-ai/private-gpt/compare/v0.6.0...v0.6.1) (2024-08-05)
+
+
+### Bug Fixes
+
+* add built image from DockerHub ([#2042](https://github.com/zylon-ai/private-gpt/issues/2042)) ([f09f6dd](https://github.com/zylon-ai/private-gpt/commit/f09f6dd2553077d4566dbe6b48a450e05c2f049e))
+* Adding azopenai to model list ([#2035](https://github.com/zylon-ai/private-gpt/issues/2035)) ([1c665f7](https://github.com/zylon-ai/private-gpt/commit/1c665f7900658144f62814b51f6e3434a6d7377f))
+* **deploy:** generate docker release when new version is released ([#2038](https://github.com/zylon-ai/private-gpt/issues/2038)) ([1d4c14d](https://github.com/zylon-ai/private-gpt/commit/1d4c14d7a3c383c874b323d934be01afbaca899e))
+* **deploy:** improve Docker-Compose and quickstart on Docker ([#2037](https://github.com/zylon-ai/private-gpt/issues/2037)) ([dae0727](https://github.com/zylon-ai/private-gpt/commit/dae0727a1b4abd35d2b0851fe30e0a4ed67e0fbb))
+
+## [0.6.0](https://github.com/zylon-ai/private-gpt/compare/v0.5.0...v0.6.0) (2024-08-02)
+
+
+### Features
+
+* bump dependencies ([#1987](https://github.com/zylon-ai/private-gpt/issues/1987)) ([b687dc8](https://github.com/zylon-ai/private-gpt/commit/b687dc852413404c52d26dcb94536351a63b169d))
+* **docs:** add privategpt-ts sdk ([#1924](https://github.com/zylon-ai/private-gpt/issues/1924)) ([d13029a](https://github.com/zylon-ai/private-gpt/commit/d13029a046f6e19e8ee65bef3acd96365c738df2))
+* **docs:** Fix setup docu ([#1926](https://github.com/zylon-ai/private-gpt/issues/1926)) ([067a5f1](https://github.com/zylon-ai/private-gpt/commit/067a5f144ca6e605c99d7dbe9ca7d8207ac8808d))
+* **docs:** update doc for ipex-llm ([#1968](https://github.com/zylon-ai/private-gpt/issues/1968)) ([19a7c06](https://github.com/zylon-ai/private-gpt/commit/19a7c065ef7f42b37f289dd28ac945f7afc0e73a))
+* **docs:** update documentation and fix preview-docs ([#2000](https://github.com/zylon-ai/private-gpt/issues/2000)) ([4523a30](https://github.com/zylon-ai/private-gpt/commit/4523a30c8f004aac7a7ae224671e2c45ec0cb973))
+* **llm:** add progress bar when ollama is pulling models ([#2031](https://github.com/zylon-ai/private-gpt/issues/2031)) ([cf61bf7](https://github.com/zylon-ai/private-gpt/commit/cf61bf780f8d122e4057d002abf03563bb45614a))
+* **llm:** autopull ollama models ([#2019](https://github.com/zylon-ai/private-gpt/issues/2019)) ([20bad17](https://github.com/zylon-ai/private-gpt/commit/20bad17c9857809158e689e9671402136c1e3d84))
+* **llm:** Support for Google Gemini LLMs and Embeddings ([#1965](https://github.com/zylon-ai/private-gpt/issues/1965)) ([fc13368](https://github.com/zylon-ai/private-gpt/commit/fc13368bc72d1f4c27644677431420ed77731c03))
+* make llama3.1 as default ([#2022](https://github.com/zylon-ai/private-gpt/issues/2022)) ([9027d69](https://github.com/zylon-ai/private-gpt/commit/9027d695c11fbb01e62424b855665de71d513417))
+* prompt_style applied to all LLMs + extra LLM params. ([#1835](https://github.com/zylon-ai/private-gpt/issues/1835)) ([e21bf20](https://github.com/zylon-ai/private-gpt/commit/e21bf20c10938b24711d9f2c765997f44d7e02a9))
+* **recipe:** add our first recipe  `Summarize` ([#2028](https://github.com/zylon-ai/private-gpt/issues/2028)) ([8119842](https://github.com/zylon-ai/private-gpt/commit/8119842ae6f1f5ecfaf42b06fa0d1ffec675def4))
+* **vectordb:** Milvus vector db Integration ([#1996](https://github.com/zylon-ai/private-gpt/issues/1996)) ([43cc31f](https://github.com/zylon-ai/private-gpt/commit/43cc31f74015f8d8fcbf7a8ea7d7d9ecc66cf8c9))
+* **vectorstore:** Add clickhouse support as vectore store ([#1883](https://github.com/zylon-ai/private-gpt/issues/1883)) ([2612928](https://github.com/zylon-ai/private-gpt/commit/26129288394c7483e6fc0496a11dc35679528cc1))
+
+
+### Bug Fixes
+
+* "no such group" error in Dockerfile, added docx2txt and cryptography deps ([#1841](https://github.com/zylon-ai/private-gpt/issues/1841)) ([947e737](https://github.com/zylon-ai/private-gpt/commit/947e737f300adf621d2261d527192f36f3387f8e))
+* **config:** make tokenizer optional and include a troubleshooting doc ([#1998](https://github.com/zylon-ai/private-gpt/issues/1998)) ([01b7ccd](https://github.com/zylon-ai/private-gpt/commit/01b7ccd0648be032846647c9a184925d3682f612))
+* **docs:** Fix concepts.mdx referencing to installation page ([#1779](https://github.com/zylon-ai/private-gpt/issues/1779)) ([dde0224](https://github.com/zylon-ai/private-gpt/commit/dde02245bcd51a7ede7b6789c82ae217cac53d92))
+* **docs:** Update installation.mdx ([#1866](https://github.com/zylon-ai/private-gpt/issues/1866)) ([c1802e7](https://github.com/zylon-ai/private-gpt/commit/c1802e7cf0e56a2603213ec3b6a4af8fadb8a17a))
+* ffmpy dependency ([#2020](https://github.com/zylon-ai/private-gpt/issues/2020)) ([dabf556](https://github.com/zylon-ai/private-gpt/commit/dabf556dae9cb00fe0262270e5138d982585682e))
+* light mode ([#2025](https://github.com/zylon-ai/private-gpt/issues/2025)) ([1020cd5](https://github.com/zylon-ai/private-gpt/commit/1020cd53288af71a17882781f392512568f1b846))
+* **LLM:** mistral ignoring assistant messages ([#1954](https://github.com/zylon-ai/private-gpt/issues/1954)) ([c7212ac](https://github.com/zylon-ai/private-gpt/commit/c7212ac7cc891f9e3c713cc206ae9807c5dfdeb6))
+* **llm:** special tokens and leading space ([#1831](https://github.com/zylon-ai/private-gpt/issues/1831)) ([347be64](https://github.com/zylon-ai/private-gpt/commit/347be643f7929c56382a77c3f45f0867605e0e0a))
+* make embedding_api_base match api_base when on docker ([#1859](https://github.com/zylon-ai/private-gpt/issues/1859)) ([2a432bf](https://github.com/zylon-ai/private-gpt/commit/2a432bf9c5582a94eb4052b1e80cabdb118d298e))
+* nomic embeddings ([#2030](https://github.com/zylon-ai/private-gpt/issues/2030)) ([5465958](https://github.com/zylon-ai/private-gpt/commit/54659588b5b109a3dd17cca835e275240464d275))
+* prevent to ingest local files (by default) ([#2010](https://github.com/zylon-ai/private-gpt/issues/2010)) ([e54a8fe](https://github.com/zylon-ai/private-gpt/commit/e54a8fe0433252808d0a60f6a08a43c9f5a42f3b))
+* Replacing unsafe `eval()` with `json.loads()` ([#1890](https://github.com/zylon-ai/private-gpt/issues/1890)) ([9d0d614](https://github.com/zylon-ai/private-gpt/commit/9d0d614706581a8bfa57db45f62f84ab23d26f15))
+* **settings:** enable cors by default so it will work when using ts sdk (spa) ([#1925](https://github.com/zylon-ai/private-gpt/issues/1925)) ([966af47](https://github.com/zylon-ai/private-gpt/commit/966af4771dbe5cf3fdf554b5fdf8f732407859c4))
+* **ui:** gradio bug fixes ([#2021](https://github.com/zylon-ai/private-gpt/issues/2021)) ([d4375d0](https://github.com/zylon-ai/private-gpt/commit/d4375d078f18ba53562fd71651159f997fff865f))
+* unify embedding models ([#2027](https://github.com/zylon-ai/private-gpt/issues/2027)) ([40638a1](https://github.com/zylon-ai/private-gpt/commit/40638a18a5713d60fec8fe52796dcce66d88258c))
+
 ## [0.5.0](https://github.com/zylon-ai/private-gpt/compare/v0.4.0...v0.5.0) (2024-04-02)


--- a/CITATION.cff
+++ b/CITATION.cff
@ -8,18 +8,9 @@ message: >-
  metadata from this file.
 type: software
 authors:
-  - given-names: Iván
-    family-names: Martínez Toro
-    email: ivanmartit@gmail.com
-    orcid: 'https://orcid.org/0009-0004-5065-2311'
-  - family-names: Gallego Vico
-    given-names: Daniel
-    email: danielgallegovico@gmail.com
-    orcid: 'https://orcid.org/0009-0006-8582-4384'
-  - given-names: Pablo
-    family-names: Orgaz
-    email: pabloogc+gh@gmail.com
-    orcid: 'https://orcid.org/0009-0008-0080-1437'
-repository-code: 'https://github.com/imartinez/privateGPT'
+  - name: Zylon by PrivateGPT
+    address: hello@zylon.ai
+    website: 'https://www.zylon.ai/'
+repository-code: 'https://github.com/zylon-ai/private-gpt'
 license: Apache-2.0
 date-released: '2023-05-02'
--- a/Dockerfile.external
+++ b/Dockerfile.external
@ -1,40 +0,0 @@
-FROM python:3.11.6-slim-bookworm as base
-
-# Install poetry
-RUN pip install pipx
-RUN python3 -m pipx ensurepath
-RUN pipx install poetry
-ENV PATH="/root/.local/bin:$PATH"
-ENV PATH=".venv/bin/:$PATH"
-
-# https://python-poetry.org/docs/configuration/#virtualenvsin-project
-ENV POETRY_VIRTUALENVS_IN_PROJECT=true
-
-FROM base as dependencies
-WORKDIR /home/worker/app
-COPY pyproject.toml poetry.lock ./
-
-RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
-
-FROM base as app
-
-ENV PYTHONUNBUFFERED=1
-ENV PORT=8080
-EXPOSE 8080
-
-# Prepare a non-root user
-RUN adduser --system worker
-WORKDIR /home/worker/app
-
-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
-COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
-COPY --chown=worker private_gpt/ private_gpt
-COPY --chown=worker fern/ fern
-COPY --chown=worker *.yaml *.md ./
-COPY --chown=worker scripts/ scripts
-
-ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
-
-USER worker
-ENTRYPOINT python -m private_gpt
--- a/Dockerfile.llamacpp-cpu
+++ b/Dockerfile.llamacpp-cpu
@ -0,0 +1,62 @@
+### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
+### You will run into a segfault in mac
+FROM python:3.11.6-slim-bookworm AS base
+
+# Install poetry
+RUN pip install pipx
+RUN python3 -m pipx ensurepath
+RUN pipx install poetry==1.8.3
+ENV PATH="/root/.local/bin:$PATH"
+ENV PATH=".venv/bin/:$PATH"
+
+# Dependencies to build llama-cpp
+RUN apt update && apt install -y \
+  libopenblas-dev\
+  ninja-build\
+  build-essential\
+  pkg-config\
+  wget
+
+# https://python-poetry.org/docs/configuration/#virtualenvsin-project
+ENV POETRY_VIRTUALENVS_IN_PROJECT=true
+
+FROM base AS dependencies
+WORKDIR /home/worker/app
+COPY pyproject.toml poetry.lock ./
+
+ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
+RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
+
+FROM base AS app
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+ENV APP_ENV=prod
+ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
+EXPOSE 8080
+
+# Prepare a non-root user
+# More info about how to configure UIDs and GIDs in Docker:
+# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
+
+# Define the User ID (UID) for the non-root user
+# UID 100 is chosen to avoid conflicts with existing system users
+ARG UID=100
+
+# Define the Group ID (GID) for the non-root user
+# GID 65534 is often used for the 'nogroup' or 'nobody' group
+ARG GID=65534
+
+RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
+WORKDIR /home/worker/app
+
+RUN chown worker /home/worker/app
+RUN mkdir local_data && chown worker local_data
+RUN mkdir models && chown worker models
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker *.yaml ./
+COPY --chown=worker scripts/ scripts
+
+USER worker
+ENTRYPOINT python -m private_gpt
--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -1,50 +0,0 @@
-### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
-### You will run into a segfault in mac
-FROM python:3.11.6-slim-bookworm as base
-
-# Install poetry
-RUN pip install pipx
-RUN python3 -m pipx ensurepath
-RUN pipx install poetry
-ENV PATH="/root/.local/bin:$PATH"
-ENV PATH=".venv/bin/:$PATH"
-
-# Dependencies to build llama-cpp
-RUN apt update && apt install -y \
-  libopenblas-dev\
-  ninja-build\
-  build-essential\
-  pkg-config\
-  wget
-
-# https://python-poetry.org/docs/configuration/#virtualenvsin-project
-ENV POETRY_VIRTUALENVS_IN_PROJECT=true
-
-FROM base as dependencies
-WORKDIR /home/worker/app
-COPY pyproject.toml poetry.lock ./
-
-RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
-
-FROM base as app
-
-ENV PYTHONUNBUFFERED=1
-ENV PORT=8080
-EXPOSE 8080
-
-# Prepare a non-root user
-RUN adduser --system worker
-WORKDIR /home/worker/app
-
-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
-COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
-COPY --chown=worker private_gpt/ private_gpt
-COPY --chown=worker fern/ fern
-COPY --chown=worker *.yaml *.md ./
-COPY --chown=worker scripts/ scripts
-
-ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
-
-USER worker
-ENTRYPOINT python -m private_gpt
--- a/Dockerfile.ollama
+++ b/Dockerfile.ollama
@ -0,0 +1,51 @@
+FROM python:3.11.6-slim-bookworm AS base
+
+# Install poetry
+RUN pip install pipx
+RUN python3 -m pipx ensurepath
+RUN pipx install poetry==1.8.3
+ENV PATH="/root/.local/bin:$PATH"
+ENV PATH=".venv/bin/:$PATH"
+
+# https://python-poetry.org/docs/configuration/#virtualenvsin-project
+ENV POETRY_VIRTUALENVS_IN_PROJECT=true
+
+FROM base AS dependencies
+WORKDIR /home/worker/app
+COPY pyproject.toml poetry.lock ./
+
+ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama"
+RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
+
+FROM base AS app
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+ENV APP_ENV=prod
+ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
+EXPOSE 8080
+
+# Prepare a non-root user
+# More info about how to configure UIDs and GIDs in Docker:
+# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
+
+# Define the User ID (UID) for the non-root user
+# UID 100 is chosen to avoid conflicts with existing system users
+ARG UID=100
+
+# Define the Group ID (GID) for the non-root user
+# GID 65534 is often used for the 'nogroup' or 'nobody' group
+ARG GID=65534
+
+RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
+WORKDIR /home/worker/app
+
+RUN chown worker /home/worker/app
+RUN mkdir local_data && chown worker local_data
+RUN mkdir models && chown worker models
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker *.yaml .
+COPY --chown=worker scripts/ scripts
+
+USER worker
+ENTRYPOINT python -m private_gpt
--- a/README.md
+++ b/README.md
@ -1,22 +1,22 @@
 # 🔒 PrivateGPT 📑

-[![Tests](https://github.com/imartinez/privateGPT/actions/workflows/tests.yml/badge.svg)](https://github.com/imartinez/privateGPT/actions/workflows/tests.yml?query=branch%3Amain)
+[![Tests](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml/badge.svg)](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml?query=branch%3Amain)
 [![Website](https://img.shields.io/website?up_message=check%20it&down_message=down&url=https%3A%2F%2Fdocs.privategpt.dev%2F&label=Documentation)](https://docs.privategpt.dev/)
-
 [![Discord](https://img.shields.io/discord/1164200432894234644?logo=discord&label=PrivateGPT)](https://discord.gg/bK6mRVpErU)
 [![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/ZylonPrivateGPT)](https://twitter.com/ZylonPrivateGPT)

-
-> Install & usage docs: https://docs.privategpt.dev/
-> 
-> Join the community: [Twitter](https://twitter.com/PrivateGPT_AI) & [Discord](https://discord.gg/bK6mRVpErU)
-
 ![Gradio UI](/fern/docs/assets/ui.png?raw=true)

 PrivateGPT is a production-ready AI project that allows you to ask questions about your documents using the power
 of Large Language Models (LLMs), even in scenarios without an Internet connection. 100% private, no data leaves your
 execution environment at any point.

+>[!TIP]
+> If you are looking for an **enterprise-ready, fully private AI workspace**
+> check out [Zylon's website](https://zylon.ai)  or [request a demo](https://cal.com/zylon/demo?source=pgpt-readme).
+> Crafted by the team behind PrivateGPT, Zylon is a best-in-class AI collaborative
+> workspace that can be easily deployed on-premise (data center, bare metal...) or in your private cloud (AWS, GCP, Azure...).
+
 The project provides an API offering all the primitives required to build private, context-aware AI applications.
 It follows and extends the [OpenAI API standard](https://openai.com/blog/openai-api),
 and supports both normal and streaming responses.
@ -38,13 +38,10 @@ In addition to this, a working [Gradio UI](https://www.gradio.app/)
 client is provided to test the API, together with a set of useful tools such as bulk model
 download script, ingestion script, documents folder watch, etc.

-> 👂 **Need help applying PrivateGPT to your specific use case?**
-> [Let us know more about it](https://forms.gle/4cSDmH13RZBHV9at7)
-> and we'll try to help! We are refining PrivateGPT through your feedback.
-
 ## 🎞️ Overview
-DISCLAIMER: This README is not updated as frequently as the [documentation](https://docs.privategpt.dev/).
-Please check it out for the latest updates!
+>[!WARNING]
+>  This README is not updated as frequently as the [documentation](https://docs.privategpt.dev/).
+>  Please check it out for the latest updates!

 ### Motivation behind PrivateGPT
 Generative AI is a game changer for our society, but adoption in companies of all sizes and data-sensitive
@ -62,7 +59,7 @@ thus a simpler and more educational implementation to understand the basic conce
 to build a fully local -and therefore, private- chatGPT-like tool.

 If you want to keep experimenting with it, we have saved it in the
-[primordial branch](https://github.com/imartinez/privateGPT/tree/primordial) of the project.
+[primordial branch](https://github.com/zylon-ai/private-gpt/tree/primordial) of the project.

 > It is strongly recommended to do a clean clone and install of this new version of
 PrivateGPT if you come from the previous, primordial version.
@ -73,7 +70,7 @@ completions, document ingestion, RAG pipelines and other low-level building bloc
 We want to make it easier for any developer to build AI applications and experiences, as well as provide
 a suitable extensive architecture for the community to keep contributing.

-Stay tuned to our [releases](https://github.com/imartinez/privateGPT/releases) to check out all the new features and changes included.
+Stay tuned to our [releases](https://github.com/zylon-ai/private-gpt/releases) to check out all the new features and changes included.

 ## 📄 Documentation
 Full documentation on installation, dependencies, configuration, running the server, deployment options,
@ -132,19 +129,19 @@ Here are a couple of examples:

 #### BibTeX
 ```bibtex
-@software{Martinez_Toro_PrivateGPT_2023,
-author = {Martínez Toro, Iván and Gallego Vico, Daniel and Orgaz, Pablo},
+@software{Zylon_PrivateGPT_2023,
+author = {Zylon by PrivateGPT},
 license = {Apache-2.0},
 month = may,
 title = {{PrivateGPT}},
-url = {https://github.com/imartinez/privateGPT},
+url = {https://github.com/zylon-ai/private-gpt},
 year = {2023}
 }
 ```

 #### APA
 ```
-Martínez Toro, I., Gallego Vico, D., & Orgaz, P. (2023). PrivateGPT [Computer software]. https://github.com/imartinez/privateGPT
+Zylon by PrivateGPT (2023). PrivateGPT [Computer software]. https://github.com/zylon-ai/private-gpt
 ```

 ## 🤗 Partners & Supporters
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -1,16 +1,116 @@
 services:
-  private-gpt:
+
+  #-----------------------------------
+  #---- Private-GPT services ---------
+  #-----------------------------------
+
+  # Private-GPT service for the Ollama CPU and GPU modes
+  # This service builds from an external Dockerfile and runs the Ollama mode.
+  private-gpt-ollama:
+    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.7.0}-ollama  # x-release-please-version
+    user: root
    build:
-      dockerfile: Dockerfile.external
+      context: .
+      dockerfile: Dockerfile.ollama
    volumes:
-      - ./local_data/:/home/worker/app/local_data
+      - ./local_data:/home/worker/app/local_data
    ports:
-      - 8001:8080
+      - "8001:8001"
    environment:
-      PORT: 8080
+      PORT: 8001
      PGPT_PROFILES: docker
      PGPT_MODE: ollama
+      PGPT_EMBED_MODE: ollama
+      PGPT_OLLAMA_API_BASE: http://ollama:11434
+      HF_TOKEN: ${HF_TOKEN:-}
+    profiles:
+      - ""
+      - ollama-cpu
+      - ollama-cuda
+      - ollama-api
+    depends_on:
      ollama:
+        condition: service_healthy
+
+  # Private-GPT service for the local mode
+  # This service builds from a local Dockerfile and runs the application in local mode.
+  private-gpt-llamacpp-cpu:
+    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.7.0}-llamacpp-cpu # x-release-please-version
+    user: root
+    build:
+      context: .
+      dockerfile: Dockerfile.llamacpp-cpu
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+      - ./models/:/home/worker/app/models
+    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
+    ports:
+      - "8001:8001"
+    environment:
+      PORT: 8001
+      PGPT_PROFILES: local
+      HF_TOKEN: ${HF_TOKEN:-}
+    profiles:
+      - llamacpp-cpu
+
+  #-----------------------------------
+  #---- Ollama services --------------
+  #-----------------------------------
+
+  # Traefik reverse proxy for the Ollama service
+  # This will route requests to the Ollama service based on the profile.
+  ollama:
+    image: traefik:v2.10
+    healthcheck:
+      test: ["CMD", "sh", "-c", "wget -q --spider http://ollama:11434 || exit 1"]
+      interval: 10s
+      retries: 3
+      start_period: 5s
+      timeout: 5s
+    ports:
+      - "8080:8080"
+    command:
+      - "--providers.file.filename=/etc/router.yml"
+      - "--log.level=ERROR"
+      - "--api.insecure=true"
+      - "--providers.docker=true"
+      - "--providers.docker.exposedbydefault=false"
+      - "--entrypoints.web.address=:11434"
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - ./.docker/router.yml:/etc/router.yml:ro
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    profiles:
+      - ""
+      - ollama-cpu
+      - ollama-cuda
+      - ollama-api
+
+  # Ollama service for the CPU mode
+  ollama-cpu:
    image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
    volumes:
      - ./models:/root/.ollama
+    profiles:
+      - ""
+      - ollama-cpu
+
+  # Ollama service for the CUDA mode
+  ollama-cuda:
+    image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
+    volumes:
+      - ./models:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    profiles:
+      - ollama-cuda
--- a/fern/README.md
+++ b/fern/README.md
@ -1,4 +1,4 @@
-# Documentation of privateGPT
+# Documentation of PrivateGPT

 The documentation of this project is being rendered thanks to [fern](https://github.com/fern-api/fern).

--- a/fern/docs.yml
+++ b/fern/docs.yml
@ -10,6 +10,9 @@ tabs:
  overview:
    display-name: Overview
    icon: "fa-solid fa-home"
+  quickstart:
+    display-name: Quickstart
+    icon: "fa-solid fa-rocket"
  installation:
    display-name: Installation
    icon: "fa-solid fa-download"
@ -32,7 +35,13 @@ navigation:
        contents:
          - page: Introduction
            path: ./docs/pages/overview/welcome.mdx
-  # How to install privateGPT, with FAQ and troubleshooting
+  - tab: quickstart
+    layout:
+      - section: Getting started
+        contents:
+          - page: Quickstart
+            path: ./docs/pages/quickstart/quickstart.mdx
+  # How to install PrivateGPT, with FAQ and troubleshooting
  - tab: installation
    layout:
      - section: Getting started
@ -41,7 +50,9 @@ navigation:
            path: ./docs/pages/installation/concepts.mdx
          - page: Installation
            path: ./docs/pages/installation/installation.mdx
-  # Manual of privateGPT: how to use it and configure it
+          - page: Troubleshooting
+            path: ./docs/pages/installation/troubleshooting.mdx
+  # Manual of PrivateGPT: how to use it and configure it
  - tab: manual
    layout:
      - section: General configuration
@ -68,17 +79,21 @@ navigation:
            path: ./docs/pages/manual/reranker.mdx
      - section: User Interface
        contents:
-          - page: User interface (Gradio) Manual
-            path: ./docs/pages/manual/ui.mdx
-  # Small code snippet or example of usage to help users
+          - page: Gradio Manual
+            path: ./docs/pages/ui/gradio.mdx
+          - page: Alternatives
+            path: ./docs/pages/ui/alternatives.mdx
  - tab: recipes
    layout:
-      - section: Choice of LLM
+      - section: Getting started
        contents:
-          # TODO: add recipes
-          - page: List of LLMs
-            path: ./docs/pages/recipes/list-llm.mdx
-  # More advanced usage of privateGPT, by API
+          - page: Quickstart
+            path: ./docs/pages/recipes/quickstart.mdx
+      - section: General use cases
+        contents:
+          - page: Summarize
+            path: ./docs/pages/recipes/summarize.mdx
+  # More advanced usage of PrivateGPT, by API
  - tab: api-reference
    layout:
      - section: Overview
@ -92,12 +107,11 @@ navigation:
 # Definition of the navbar, will be displayed in the top right corner.
 # `type:primary` is always displayed at the most right side of the navbar
 navbar-links:
-  - type: secondary
-    text: GitHub
-    url: "https://github.com/imartinez/privateGPT"
  - type: secondary
    text: Contact us
    url: "mailto:hello@zylon.ai"
+  - type: github
+    value: "https://github.com/zylon-ai/private-gpt"
  - type: primary
    text: Join the Discord
    url: https://discord.com/invite/bK6mRVpErU
--- a/fern/docs/assets/ui.png
+++ b/fern/docs/assets/ui.png
--- a/fern/docs/pages/api-reference/sdks.mdx
+++ b/fern/docs/pages/api-reference/sdks.mdx
@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la

 <Cards>
  <Card
-    title="Node.js/TypeScript - WIP"
+    title="TypeScript"
    icon="fa-brands fa-node"
-    href="https://github.com/imartinez/privateGPT-typescript"
+    href="https://github.com/zylon-ai/privategpt-ts"
  />
  <Card
-    title="Python - Ready!"
+    title="Python"
    icon="fa-brands fa-python"
-    href="https://github.com/imartinez/pgpt_python"
+    href="https://github.com/zylon-ai/pgpt-python"
  />
  <br />
 </Cards>
@ -26,12 +26,12 @@ The clients are kept up to date automatically, so we encourage you to use the la
  <Card
    title="Java - WIP"
    icon="fa-brands fa-java"
-    href="https://github.com/imartinez/privateGPT-java"
+    href="https://github.com/zylon-ai/private-gpt-java"
  />
  <Card
    title="Go - WIP"
    icon="fa-brands fa-golang"
-    href="https://github.com/imartinez/privateGPT-go"
+    href="https://github.com/zylon-ai/private-gpt-go"
  />
 </Cards>

--- a/fern/docs/pages/installation/concepts.mdx
+++ b/fern/docs/pages/installation/concepts.mdx
@ -8,20 +8,27 @@ It supports a variety of LLM providers, embeddings providers, and vector stores,

 ## Setup configurations available
 You get to decide the setup for these 3 main components:
- LLM: the large language model provider used for inference. It can be local, or remote, or even OpenAI.
- Embeddings: the embeddings provider used to encode the input, the documents and the users' queries. Same as the LLM, it can be local, or remote, or even OpenAI.
- Vector store: the store used to index and retrieve the documents.
+- **LLM**: the large language model provider used for inference. It can be local, or remote, or even OpenAI.
+- **Embeddings**: the embeddings provider used to encode the input, the documents and the users' queries. Same as the LLM, it can be local, or remote, or even OpenAI.
+- **Vector store**: the store used to index and retrieve the documents.

 There is an extra component that can be enabled or disabled: the UI. It is a Gradio UI that allows to interact with the API in a more user-friendly way.

+<Callout intent = "warning">
+A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
+model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.
+</Callout>
+
 ### Setups and Dependencies
-Your setup will be the combination of the different options available. You'll find recommended setups in the [installation](/installation) section.
+Your setup will be the combination of the different options available. You'll find recommended setups in the [installation](./installation) section.
 PrivateGPT uses poetry to manage its dependencies. You can install the dependencies for the different setups by running `poetry install --extras "<extra1> <extra2>..."`.
-Extras are the different options available for each component. For example, to install the dependencies for a a local setup with UI and qdrant as vector database, Ollama as LLM and HuggingFace as local embeddings, you would run
+Extras are the different options available for each component. For example, to install the dependencies for a a local setup with UI and qdrant as vector database, Ollama as LLM and local embeddings, you would run:

-`poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-huggingface"`.
+```bash
+poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
+```

-Refer to the [installation](/installation) section for more details.
+Refer to the [installation](./installation) section for more details.

 ### Setups and Configuration
 PrivateGPT uses yaml to define its configuration in files named `settings-<profile>.yaml`.
@ -37,17 +44,6 @@ will load the configuration from `settings.yaml` and `settings-ollama.yaml`.

 ## About Fully Local Setups
 In order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally.
-### Vector stores
-The vector stores supported (Qdrant, ChromaDB and Postgres) run locally by default.
-### Embeddings
-For local Embeddings there are two options:
-* (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.
-* You can use the 'embeddings-huggingface' option in PrivateGPT, which will use HuggingFace.
-
-In order for HuggingFace LLM to work (the second option), you need to download the embeddings model to the `models` folder. You can do so by running the `setup` script:
-```bash
-poetry run python scripts/setup
-```

 ### LLM
 For local LLM there are two options:
@ -58,3 +54,14 @@ In order for LlamaCPP powered LLM to work (the second option), you need to downl
 ```bash
 poetry run python scripts/setup
 ```
+### Embeddings
+For local Embeddings there are two options:
+* (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.
+* You can use the 'embeddings-huggingface' option in PrivateGPT, which will use HuggingFace.
+
+In order for HuggingFace LLM to work (the second option), you need to download the embeddings model to the `models` folder. You can do so by running the `setup` script:
+```bash
+poetry run python scripts/setup
+```
+### Vector stores
+The vector stores supported (Qdrant, Milvus, ChromaDB and Postgres) run locally by default.
--- a/fern/docs/pages/installation/installation.mdx
+++ b/fern/docs/pages/installation/installation.mdx
@ -1,63 +1,107 @@
-It is important that you review the Main Concepts before you start the installation process.
+It is important that you review the [Main Concepts](../concepts) section to understand the different components of PrivateGPT and how they interact with each other.

 ## Base requirements to run PrivateGPT

-* Clone PrivateGPT repository, and navigate to it:
-
+### 1. Clone the PrivateGPT Repository
+Clone the repository and navigate to it:
 ```bash
-  git clone https://github.com/imartinez/privateGPT
-  cd privateGPT
+git clone https://github.com/zylon-ai/private-gpt
+cd private-gpt
 ```

-* Install Python `3.11` (*if you do not have it already*). Ideally through a python version manager like `pyenv`.
-  Earlier python versions are not supported.
-    * osx/linux: [pyenv](https://github.com/pyenv/pyenv)
-    * windows: [pyenv-win](https://github.com/pyenv-win/pyenv-win)
-
+### 2. Install Python 3.11
+If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported.
+#### macOS/Linux
+Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv):
+```bash
+pyenv install 3.11
+pyenv local 3.11
+```
+#### Windows
+Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win):
 ```bash
 pyenv install 3.11
 pyenv local 3.11
 ```

-* Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
+### 3. Install `Poetry`
+Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management:
+Follow the instructions on the official Poetry website to install it.

-* Install `make` to be able to run the different scripts:
-    * osx: (Using homebrew): `brew install make`
-    * windows: (Using chocolatey) `choco install make`
+<Callout intent="warning">
+A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version.
+To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it.
+</Callout>

-## Install and run your desired setup
+### 4. Optional: Install `make`
+To run various scripts, you need to install `make`. Follow the instructions for your operating system:
+#### macOS
+(Using Homebrew):
+```bash
+brew install make
+```
+#### Windows
+(Using Chocolatey):
+```bash
+choco install make
+```

-PrivateGPT allows to customize the setup -from fully local to cloud based- by deciding the modules to use.
-Here are the different options available:
+## Install and Run Your Desired Setup

- LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike", "azopenai"
- Embeddings: "huggingface", "openai", "sagemaker", "azopenai"
- Vector stores: "qdrant", "chroma", "postgres"
- UI: whether or not to enable UI (Gradio) or just go with the API
-
-In order to only install the required dependencies, PrivateGPT offers different `extras` that can be combined during the installation process:
+PrivateGPT allows customization of the setup, from fully local to cloud-based, by deciding the modules to use. To install only the required dependencies, PrivateGPT offers different `extras` that can be combined during the installation process:

 ```bash
 poetry install --extras "<extra1> <extra2>..."
 ```
+Where `<extra>` can be any of the following options described below.

-Where `<extra>` can be any of the following:
+### Available Modules

- ui: adds support for UI using Gradio
- llms-ollama: adds support for Ollama LLM, the easiest way to get a local LLM running, requires Ollama running locally
- llms-llama-cpp: adds support for local LLM using LlamaCPP - expect a messy installation process on some platforms
- llms-sagemaker: adds support for Amazon Sagemaker LLM, requires Sagemaker inference endpoints
- llms-openai: adds support for OpenAI LLM, requires OpenAI API key
- llms-openai-like: adds support for 3rd party LLM providers that are compatible with OpenAI's API
- llms-azopenai: adds support for Azure OpenAI LLM, requires Azure OpenAI inference endpoints
- embeddings-ollama: adds support for Ollama Embeddings, requires Ollama running locally
- embeddings-huggingface: adds support for local Embeddings using HuggingFace
- embeddings-sagemaker: adds support for Amazon Sagemaker Embeddings, requires Sagemaker inference endpoints
- embeddings-openai = adds support for OpenAI Embeddings, requires OpenAI API key
- embeddings-azopenai = adds support for Azure OpenAI Embeddings, requires Azure OpenAI inference endpoints
- vector-stores-qdrant: adds support for Qdrant vector store
- vector-stores-chroma: adds support for Chroma DB vector store
- vector-stores-postgres: adds support for Postgres vector store
+You need to choose one option per category (LLM, Embeddings, Vector Stores, UI). Below are the tables listing the available options for each category.
+
+#### LLM
+
+| **Option**   | **Description**                                                        | **Extra**           |
+|--------------|------------------------------------------------------------------------|---------------------|
+| **ollama**   | Adds support for Ollama LLM, requires Ollama running locally           | llms-ollama         |
+| llama-cpp    | Adds support for local LLM using LlamaCPP                              | llms-llama-cpp      |
+| sagemaker    | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints    | llms-sagemaker      |
+| openai       | Adds support for OpenAI LLM, requires OpenAI API key                   | llms-openai         |
+| openailike   | Adds support for 3rd party LLM providers compatible with OpenAI's API  | llms-openai-like    |
+| azopenai     | Adds support for Azure OpenAI LLM, requires Azure endpoints            | llms-azopenai       |
+| gemini       | Adds support for Gemini LLM, requires Gemini API key                   | llms-gemini         |
+
+#### Embeddings
+
+| **Option**       | **Description**                                                                | **Extra**               |
+|------------------|--------------------------------------------------------------------------------|-------------------------|
+| **ollama**       | Adds support for Ollama Embeddings, requires Ollama running locally            | embeddings-ollama       |
+| huggingface      | Adds support for local Embeddings using HuggingFace                            | embeddings-huggingface  |
+| openai           | Adds support for OpenAI Embeddings, requires OpenAI API key                    | embeddings-openai       |
+| sagemaker        | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints     | embeddings-sagemaker    |
+| azopenai         | Adds support for Azure OpenAI Embeddings, requires Azure endpoints             | embeddings-azopenai     |
+| gemini           | Adds support for Gemini Embeddings, requires Gemini API key                    | embeddings-gemini       |
+
+#### Vector Stores
+
+| **Option**       | **Description**                         | **Extra**               |
+|------------------|-----------------------------------------|-------------------------|
+| **qdrant**       | Adds support for Qdrant vector store    | vector-stores-qdrant    |
+| milvus           | Adds support for Milvus vector store    | vector-stores-milvus    |
+| chroma           | Adds support for Chroma DB vector store | vector-stores-chroma    |
+| postgres         | Adds support for Postgres vector store  | vector-stores-postgres  |
+| clickhouse       | Adds support for Clickhouse vector store| vector-stores-clickhouse|
+
+#### UI
+
+| **Option**   | **Description**                          | **Extra** |
+|--------------|------------------------------------------|-----------|
+| Gradio       | Adds support for UI using Gradio         | ui        |
+
+<Callout intent = "warning">
+A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
+model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.
+</Callout>

 ## Recommended Setups

@ -81,6 +125,8 @@ set PGPT_PROFILES=ollama
 make run
 ```

+Refer to the [troubleshooting](./troubleshooting) section for specific issues you might encounter.
+
 ### Local, Ollama-powered setup - RECOMMENDED

 **The easiest way to run PrivateGPT fully locally** is to depend on Ollama for the LLM. Ollama provides local LLM and Embeddings super easy to install and use, abstracting the complexity of GPU support. It's the recommended setup for local development.
@ -89,18 +135,22 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll

 After the installation, make sure the Ollama desktop app is closed.

-Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore:
-
-```bash
-ollama pull mistral
-ollama pull nomic-embed-text
-```
-
 Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
 ```bash
 ollama serve
 ```

+Install the models to be used, the default settings-ollama.yaml is configured to user llama3.1 8b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)
+
+By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.
+
+In any case, if you want to manually pull models, run the following commands:
+
+```bash
+ollama pull llama3.1
+ollama pull nomic-embed-text
+```
+
 Once done, on a different terminal, you can install PrivateGPT with the following command:
 ```bash
 poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
@ -257,11 +307,12 @@ If you have all required dependencies properly configured running the
 following powershell command should succeed.

 ```powershell
-$env:CMAKE_ARGS='-DLLAMA_CUBLAS=on'; poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
+$env:CMAKE_ARGS='-DLLAMA_CUBLAS=on'; poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python numpy==1.26.0
 ```

 If your installation was correct, you should see a message similar to the following next
-time you start the server `BLAS = 1`.
+time you start the server `BLAS = 1`. If there is some issue, please refer to the
+[troubleshooting](/installation/getting-started/troubleshooting#building-llama-cpp-with-nvidia-gpu-support) section.

 ```console
 llama_new_context_with_model: total VRAM used: 4857.93 MB (model: 4095.05 MB, context: 762.87 MB)
@ -289,11 +340,12 @@ Some tips:
 After that running the following command in the repository will install llama.cpp with GPU support:

 ```bash
-CMAKE_ARGS='-DLLAMA_CUBLAS=on' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
+CMAKE_ARGS='-DLLAMA_CUBLAS=on' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python numpy==1.26.0
 ```

 If your installation was correct, you should see a message similar to the following next
-time you start the server `BLAS = 1`.
+time you start the server `BLAS = 1`. If there is some issue, please refer to the
+[troubleshooting](/installation/getting-started/troubleshooting#building-llama-cpp-with-nvidia-gpu-support) section.

 ```
 llama_new_context_with_model: total VRAM used: 4857.93 MB (model: 4095.05 MB, context: 762.87 MB)
--- a/fern/docs/pages/installation/troubleshooting.mdx
+++ b/fern/docs/pages/installation/troubleshooting.mdx
@ -0,0 +1,64 @@
+# Downloading Gated and Private Models
+Many models are gated or private, requiring special access to use them. Follow these steps to gain access and set up your environment for using these models.
+## Accessing Gated Models
+1. **Request Access:**
+   Follow the instructions provided [here](https://huggingface.co/docs/hub/en/models-gated) to request access to the gated model.
+2. **Generate a Token:**
+   Once you have access, generate a token by following the instructions [here](https://huggingface.co/docs/hub/en/security-tokens).
+3. **Set the Token:**
+   Add the generated token to your `settings.yaml` file:
+   ```yaml
+   huggingface:
+     access_token: <your-token>
+   ```
+   Alternatively, set the `HF_TOKEN` environment variable:
+   ```bash
+   export HF_TOKEN=<your-token>
+   ```
+
+# Tokenizer Setup
+PrivateGPT uses the `AutoTokenizer` library to tokenize input text accurately. It connects to HuggingFace's API to download the appropriate tokenizer for the specified model.
+
+## Configuring the Tokenizer
+1. **Specify the Model:**
+   In your `settings.yaml` file, specify the model you want to use:
+   ```yaml
+   llm:
+     tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
+   ```
+2. **Set Access Token for Gated Models:**
+   If you are using a gated model, ensure the `access_token` is set as mentioned in the previous section.
+This configuration ensures that PrivateGPT can download and use the correct tokenizer for the model you are working with.
+
+# Embedding dimensions mismatch
+If you encounter an error message like `Embedding dimensions mismatch`, it is likely due to the embedding model and
+current vector dimension mismatch. To resolve this issue, ensure that the model and the input data have the same vector dimensions.
+
+By default, PrivateGPT uses `nomic-embed-text` embeddings, which have a vector dimension of 768.
+If you are using a different embedding model, ensure that the vector dimensions match the model's output.
+
+<Callout intent = "warning">
+In versions below to 0.6.0, the default embedding model was `BAAI/bge-small-en-v1.5` in `huggingface` setup.
+If you plan to reuse the old generated embeddings, you need to update the `settings.yaml` file to use the correct embedding model:
+```yaml
+huggingface:
+  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+embedding:
+  embed_dim: 384
+```
+</Callout>
+
+# Building Llama-cpp with NVIDIA GPU support
+
+## Out-of-memory error
+
+If you encounter an out-of-memory error while running `llama-cpp` with CUDA, you can try the following steps to resolve the issue:
+1. **Set the next environment:**
+    ```bash
+    TOKENIZERS_PARALLELISM=true
+    ```
+2. **Run PrivateGPT:**
+    ```bash
+    poetry run python -m privategpt
+    ```
+Give thanks to [MarioRossiGithub](https://github.com/MarioRossiGithub) for providing the following solution.
--- a/fern/docs/pages/manual/ingestion.mdx
+++ b/fern/docs/pages/manual/ingestion.mdx
@ -8,6 +8,14 @@ The ingestion of documents can be done in different ways:

 ## Bulk Local Ingestion

+You will need to activate `data.local_ingestion.enabled` in your setting file to use this feature. Additionally,
+it is probably a good idea to set `data.local_ingestion.allow_ingest_from` to specify which folders are allowed to be ingested.
+
+<Callout intent = "warning">
+Be careful enabling this feature in a production environment, as it can be a security risk, as it allows users to
+ingest any local file with permissions.
+</Callout>
+
 When you are running PrivateGPT in a fully local setup, you can ingest a complete folder for convenience (containing
 pdf, text files, etc.)
 and optionally watch changes on it with the command:
@ -93,7 +101,7 @@ time PGPT_PROFILES=mock python ./scripts/ingest_folder.py ~/my-dir/to-ingest/

 ## Supported file formats

-privateGPT by default supports all the file formats that contains clear text (for example, `.txt` files, `.html`, etc.).
+PrivateGPT by default supports all the file formats that contains clear text (for example, `.txt` files, `.html`, etc.).
 However, these text based file formats as only considered as text files, and are not pre-processed in any other way.

 It also supports the following file formats:
@ -115,11 +123,15 @@ It also supports the following file formats:
 * `.ipynb`
 * `.json`

-**Please note the following nuance**: while `privateGPT` supports these file formats, it **might** require additional
+<Callout intent = "info">
+While `PrivateGPT` supports these file formats, it **might** require additional
 dependencies to be installed in your python's virtual environment.
-For example, if you try to ingest `.epub` files, `privateGPT` might fail to do it, and will instead display an
+For example, if you try to ingest `.epub` files, `PrivateGPT` might fail to do it, and will instead display an
 explanatory error asking you to download the necessary dependencies to install this file format.
+</Callout>

-
+<Callout intent = "info">
 **Other file formats might work**, but they will be considered as plain text
 files (in other words, they will be ingested as `.txt` files).
+</Callout>
+
--- a/fern/docs/pages/manual/llms.mdx
+++ b/fern/docs/pages/manual/llms.mdx
@ -193,3 +193,42 @@ or

 When the server is started it will print a log *Application startup complete*.
 Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+
+### Using IPEX-LLM
+
+For a fully private setup on Intel GPUs (such as a local PC with an iGPU, or discrete GPUs like Arc, Flex, and Max), you can use [IPEX-LLM](https://github.com/intel-analytics/ipex-llm).
+
+To deploy Ollama and pull models using IPEX-LLM, please refer to [this guide](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html). Then, follow the same steps outlined in the [Using Ollama](#using-ollama) section to create a `settings-ollama.yaml` profile and run the private-GPT server.
+
+### Using Gemini
+
+If you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may
+decide to run PrivateGPT using Gemini as the LLM and Embeddings model. In addition, you will benefit from
+multimodal inputs, such as text and images, in a very large contextual window.
+
+In order to do so, create a profile `settings-gemini.yaml` with the following contents:
+
+```yaml
+llm:
+  mode: gemini
+
+embedding:
+  mode: gemini
+
+gemini:
+  api_key: <your_gemini_api_key>                # You could skip this configuration and use the GEMINI_API_KEY env var instead
+  model: <gemini_model_to_use>                  # Optional model to use. Default is models/gemini-pro"
+  embedding_model: <gemini_embeddings_to_use>   # Optional model to use. Default is "models/embedding-001"
+```
+
+And run PrivateGPT loading that profile you just created:
+
+`PGPT_PROFILES=gemini make run`
+
+or
+
+`PGPT_PROFILES=gemini poetry run python -m private_gpt`
+
+When the server is started it will print a log *Application startup complete*.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+
--- a/fern/docs/pages/manual/settings.mdx
+++ b/fern/docs/pages/manual/settings.mdx
@ -3,8 +3,8 @@
 The configuration of your private GPT server is done thanks to `settings` files (more precisely `settings.yaml`).
 These text files are written using the [YAML](https://en.wikipedia.org/wiki/YAML) syntax.

-While privateGPT is distributing safe and universal configuration files, you might want to quickly customize your
-privateGPT, and this can be done using the `settings` files.
+While PrivateGPT is distributing safe and universal configuration files, you might want to quickly customize your
+PrivateGPT, and this can be done using the `settings` files.

 This project is defining the concept of **profiles** (or configuration profiles).
 This mechanism, using your environment variables, is giving you the ability to easily switch between
@ -30,15 +30,20 @@ For example, on **linux and macOS**, this gives:
 export PGPT_PROFILES=my_profile_name_here
 ```

-Windows Powershell(s) have a different syntax, one of them being:
+Windows Command Prompt (cmd) has a different syntax:
 ```shell
 set PGPT_PROFILES=my_profile_name_here
 ```
+
+Windows Powershell has a different syntax:
+```shell
+$env:PGPT_PROFILES="my_profile_name_here"
+```
 If the above is not working, you might want to try other ways to set an env variable in your window's terminal.

 ---

-Once you've set this environment variable to the desired profile, you can simply launch your privateGPT,
+Once you've set this environment variable to the desired profile, you can simply launch your PrivateGPT,
 and it will run using your profile on top of the default configuration.

 ## Reference
--- a/fern/docs/pages/manual/vectordb.mdx
+++ b/fern/docs/pages/manual/vectordb.mdx
@ -1,7 +1,7 @@
 ## Vectorstores
-PrivateGPT supports [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [PGVector](https://github.com/pgvector/pgvector) as vectorstore providers. Qdrant being the default.
+PrivateGPT supports [Qdrant](https://qdrant.tech/), [Milvus](https://milvus.io/), [Chroma](https://www.trychroma.com/), [PGVector](https://github.com/pgvector/pgvector) and [ClickHouse](https://github.com/ClickHouse/ClickHouse) as vectorstore providers. Qdrant being the default.

-In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `chroma` or `postgres`.
+In order to select one or the other, set the `vectorstore.database` property in the `settings.yaml` file to `qdrant`, `milvus`, `chroma`, `postgres` and `clickhouse`.

 ```yaml
 vectorstore:
@ -39,6 +39,24 @@ qdrant:
  path: local_data/private_gpt/qdrant
 ```

+### Milvus configuration
+
+To enable Milvus, set the `vectorstore.database` property in the `settings.yaml` file to `milvus` and install the `milvus` extra.
+
+```bash
+poetry install --extras vector-stores-milvus
+```
+
+The available configuration options are:
+| Field        | Description |
+|--------------|-------------|
+| uri     | Default is set to "local_data/private_gpt/milvus/milvus_local.db" as a local file; you can also set up a more performant Milvus server on docker or k8s e.g.http://localhost:19530, as your uri; To use Zilliz Cloud, adjust the uri and token to Endpoint and Api key in Zilliz Cloud.|
+| token          | Pair with Milvus server on docker or k8s or zilliz cloud api key.|
+| collection_name         | The name of the collection, set to default "milvus_db".|
+| overwrite    | Overwrite the data in collection if it existed, set to default as True. |
+
+To obtain a local setup (disk-based database) without running a Milvus server, configure the uri value in settings.yaml, to store in local_data/private_gpt/milvus/milvus_local.db.
+
 ### Chroma configuration

 To enable Chroma, set the `vectorstore.database` property in the `settings.yaml` file to `chroma` and install the `chroma` extra.
@ -101,3 +119,69 @@ Indexes:
 postgres=# 
 ```
 The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value.  If the embedding model changes this table may need to be dropped and recreated to avoid a dimension mismatch.
+
+### ClickHouse
+
+To utilize ClickHouse as the vector store, a [ClickHouse](https://github.com/ClickHouse/ClickHouse) database must be employed.
+
+To enable ClickHouse, set the `vectorstore.database` property in the `settings.yaml` file to `clickhouse` and install the `vector-stores-clickhouse` extra.
+
+```bash
+poetry install --extras vector-stores-clickhouse
+```
+
+ClickHouse settings can be configured by setting values to the `clickhouse` property in the `settings.yaml` file.
+
+The available configuration options are:
+| Field                | Description                                                    |
+|----------------------|----------------------------------------------------------------|
+| **host**             | The server hosting the ClickHouse database. Default is `localhost` |
+| **port**             | The port on which the ClickHouse database is accessible. Default is `8123` |
+| **username**         | The username for database access. Default is `default` |
+| **password**         | The password for database access. (Optional) |
+| **database**         | The specific database to connect to. Default is `__default__` |
+| **secure**           | Use https/TLS for secure connection to the server. Default is `false` |
+| **interface**        | The protocol used for the connection, either 'http' or 'https'. (Optional) |
+| **settings**         | Specific ClickHouse server settings to be used with the session. (Optional) |
+| **connect_timeout**  | Timeout in seconds for establishing a connection. (Optional) |
+| **send_receive_timeout** | Read timeout in seconds for http connection. (Optional) |
+| **verify**           | Verify the server certificate in secure/https mode. (Optional) |
+| **ca_cert**          | Path to Certificate Authority root certificate (.pem format). (Optional) |
+| **client_cert**      | Path to TLS Client certificate (.pem format). (Optional) |
+| **client_cert_key**  | Path to the private key for the TLS Client certificate. (Optional) |
+| **http_proxy**       | HTTP proxy address. (Optional) |
+| **https_proxy**      | HTTPS proxy address. (Optional) |
+| **server_host_name** | Server host name to be checked against the TLS certificate. (Optional) |
+
+For example:
+```yaml
+vectorstore:
+  database: clickhouse
+
+clickhouse:
+  host: localhost
+  port: 8443
+  username: admin
+  password: <PASSWORD>
+  database: embeddings
+  secure: false
+```
+
+The following table will be created in the database:
+```
+clickhouse-client
+:) \d embeddings.llama_index
+                                   Table "llama_index"
+  № |  name     | type                                         | default_type | default_expression | comment | codec_expression | ttl_expression
+----|-----------|----------------------------------------------|--------------|--------------------|---------|------------------|---------------
+  1 | id        | String                                       |              |                    |         |                  |
+  2 | doc_id    | String                                       |              |                    |         |                  |
+  3 | text      | String                                       |              |                    |         |                  |
+  4 | vector    | Array(Float32)                               |              |                    |         |                  |
+  5 | node_info | Tuple(start Nullable(UInt64), end Nullable(UInt64)) |       |                    |         |                  |
+  6 | metadata  | String                                       |              |                    |         |                  |
+
+clickhouse-client
+```
+
+The dimensions of the embeddings columns will be set based on the `embedding.embed_dim` value. If the embedding model changes, this table may need to be dropped and recreated to avoid a dimension mismatch.
--- a/fern/docs/pages/overview/welcome.mdx
+++ b/fern/docs/pages/overview/welcome.mdx
@ -1,8 +1,16 @@
 PrivateGPT provides an **API** containing all the building blocks required to
 build **private, context-aware AI applications**.
+
+<Callout intent = "tip">
+If you are looking for an **enterprise-ready, fully private AI workspace**
+check out [Zylon's website](https://zylon.ai)  or [request a demo](https://cal.com/zylon/demo?source=pgpt-docs).
+Crafted by the team behind PrivateGPT, Zylon is a best-in-class AI collaborative
+workspace that can be easily deployed on-premise (data center, bare metal...) or in your private cloud (AWS, GCP, Azure...).
+</Callout>
+
 The API follows and extends OpenAI API standard, and supports both normal and streaming responses.
 That means that, if you can use OpenAI API in one of your tools, you can use your own PrivateGPT API instead,
-with no code changes, **and for free** if you are running privateGPT in a `local` setup.
+with no code changes, **and for free** if you are running PrivateGPT in a `local` setup.

 Get started by understanding the [Main Concepts and Installation](/installation) and then dive into the [API Reference](/api-reference).

@ -32,9 +40,3 @@ Get started by understanding the [Main Concepts and Installation](/installation)
 </Cards>

 <br />
-
-
-<Callout intent = "info">
-A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
-model download script, ingestion script, documents folder watch, etc.
-</Callout>
--- a/fern/docs/pages/quickstart/quickstart.mdx
+++ b/fern/docs/pages/quickstart/quickstart.mdx
@ -0,0 +1,105 @@
+This guide provides a quick start for running different profiles of PrivateGPT using Docker Compose.
+The profiles cater to various environments, including Ollama setups (CPU, CUDA, MacOS), and a fully local setup.
+
+By default, Docker Compose will download pre-built images from a remote registry when starting the services. However, you have the option to build the images locally if needed. Details on building Docker image locally are provided at the end of this guide.
+
+If you want to run PrivateGPT locally without Docker, refer to the [Local Installation Guide](/installation).
+
+## Prerequisites
+- **Docker and Docker Compose:** Ensure both are installed on your system.
+  [Installation Guide for Docker](https://docs.docker.com/get-docker/), [Installation Guide for Docker Compose](https://docs.docker.com/compose/install/).
+- **Clone PrivateGPT Repository:** Clone the PrivateGPT repository to your machine and navigate to the directory:
+  ```sh
+  git clone https://github.com/zylon-ai/private-gpt.git
+  cd private-gpt
+  ```
+
+## Setups
+
+### Ollama Setups (Recommended)
+
+#### 1. Default/Ollama CPU
+
+**Description:**
+This profile runs the Ollama service using CPU resources. It is the standard configuration for running Ollama-based Private-GPT services without GPU acceleration.
+
+**Run:**
+To start the services using pre-built images, run:
+```sh
+docker-compose up
+```
+or with a specific profile:
+```sh
+docker-compose --profile ollama-cpu up
+```
+
+#### 2. Ollama Nvidia CUDA
+
+**Description:**
+This profile leverages GPU acceleration with CUDA support, suitable for computationally intensive tasks that benefit from GPU resources.
+
+**Requirements:**
+Ensure that your system has compatible GPU hardware and the necessary NVIDIA drivers installed. The installation process is detailed [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
+
+**Run:**
+To start the services with CUDA support using pre-built images, run:
+```sh
+docker-compose --profile ollama-cuda up
+```
+
+#### 3. Ollama External API
+
+**Description:**
+This profile is designed for running PrivateGPT using Ollama installed on the host machine. This setup is particularly useful for MacOS users, as Docker does not yet support Metal GPU.
+
+**Requirements:**
+Install Ollama on your machine by following the instructions at [ollama.ai](https://ollama.ai/).
+
+**Run:**
+To start the Ollama service, use:
+```sh
+OLLAMA_HOST=0.0.0.0 ollama serve
+```
+To start the services with the host configuration using pre-built images, run:
+```sh
+docker-compose --profile ollama-api up
+```
+
+### Fully Local Setups
+
+#### 1. LlamaCPP CPU
+
+**Description:**
+This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
+
+**Requirements:**
+A **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
+
+**Run:**
+Start the services with your Hugging Face token using pre-built images:
+```sh
+HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cpu up
+```
+Replace `<your_hf_token>` with your actual Hugging Face token.
+
+## Building Locally
+
+If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps:
+
+### Building Locally
+To build the Docker images locally, navigate to the cloned repository directory and run:
+```sh
+docker-compose build
+```
+This command compiles the necessary Docker images based on the current codebase and Dockerfile configurations.
+
+### Forcing a Rebuild with --build
+If you have made changes and need to ensure these changes are reflected in the Docker images, you can force a rebuild before starting the services:
+```sh
+docker-compose up --build
+```
+or with a specific profile:
+```sh
+docker-compose --profile <profile_name> up --build
+```
+Replace `<profile_name>` with the desired profile.
--- a/fern/docs/pages/recipes/list-llm.mdx
+++ b/fern/docs/pages/recipes/list-llm.mdx
@ -1,121 +0,0 @@
-# List of working LLM
-
-**Do you have any working combination of LLM and embeddings?**
-Please open a PR to add it to the list, and come on our Discord to tell us about it!
-
-## Prompt style
-
-LLMs might have been trained with different prompt styles.
-The prompt style is the way the prompt is written, and how the system message is injected in the prompt.
-
-For example, `llama2` looks like this:
-```text
-<s>[INST] <<SYS>>
-{{ system_prompt }}
-<</SYS>>
-
-{{ user_message }} [/INST]
-```
-
-While `default` (the `llama_index` default) looks like this:
-```text
-system: {{ system_prompt }}
-user: {{ user_message }}
-assistant: {{ assistant_message }}
-```
-
-The "`tag`" style looks like this:
-
-```text
-<|system|>: {{ system_prompt }}
-<|user|>: {{ user_message }}
-<|assistant|>: {{ assistant_message }}
-```
-
-The "`mistral`" style looks like this: 
-
-```text 
-<s>[INST] You are an AI assistant. [/INST]</s>[INST] Hello, how are you doing? [/INST]
-```
-
-The "`chatml`" style looks like this: 
-```text
-<|im_start|>system
-{{ system_prompt }}<|im_end|>
-<|im_start|>user"
-{{ user_message }}<|im_end|>
-<|im_start|>assistant
-{{ assistant_message }}
-```
-
-Some LLMs will not understand these prompt styles, and will not work (returning nothing).
-You can try to change the prompt style to `default` (or `tag`) in the settings, and it will
-change the way the messages are formatted to be passed to the LLM.
-
-## Example of configuration
-
-You might want to change the prompt depending on the language and model you are using.
-
-### English, with instructions
-
-`settings-en.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
-  prompt_style: "llama2"
-```
-
-### French, with instructions
-
-`settings-fr.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/Vigogne-2-7B-Instruct-GGUF
-  llm_hf_model_file: vigogne-2-7b-instruct.Q4_K_M.gguf
-  embedding_hf_model_name: dangvantuan/sentence-camembert-base
-  prompt_style: "default"
-  # prompt_style: "tag" # also works
-  # The default system prompt is injected only when the `prompt_style` != default, and there are no system message in the discussion
-  # default_system_prompt: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français.
-```
-
-You might want to change the prompt as the one above might not directly answer your question.
-You can read online about how to write a good prompt, but in a nutshell, make it (extremely) directive.
-
-You can try and troubleshot your prompt by writing multiline requests in the UI, while
-writing your interaction with the model, for example:
-
-```text
-Tu es un programmeur senior qui programme en python et utilise le framework fastapi. Ecrit moi un serveur qui retourne "hello world".
-```
-
-Another example:
-```text
-Context: None
-Situation: tu es au milieu d'un champ.
-Tache: va a la rivière, en bas du champ.
-Décrit comment aller a la rivière.
-```
-
-### Optimised Models
-GodziLLa2-70B LLM (English, rank 2 on HuggingFace OpenLLM Leaderboard), bge large Embedding Model (rank 1 on HuggingFace MTEB Leaderboard)
-`settings-optimised.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/GodziLLa2-70B-GGUF
-  llm_hf_model_file: godzilla2-70b.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-large-en
-  prompt_style: "llama2"
-```
-### German speaking model
-`settings-de.yaml`:
-```yml
-local:
-  llm_hf_repo_id: TheBloke/em_german_leo_mistral-GGUF
-  llm_hf_model_file:   em_german_leo_mistral.Q4_K_M.gguf
-  embedding_hf_model_name: T-Systems-onsite/german-roberta-sentence-transformer-v2
-  #llama, default or tag
-  prompt_style: "default"
-```
--- a/fern/docs/pages/recipes/quickstart.mdx
+++ b/fern/docs/pages/recipes/quickstart.mdx
@ -0,0 +1,23 @@
+# Recipes
+
+Recipes are predefined use cases that help users solve very specific tasks using PrivateGPT.
+They provide a streamlined approach to achieve common goals with the platform, offering both a starting point and inspiration for further exploration.
+The main goal of Recipes is to empower the community to create and share solutions, expanding the capabilities of PrivateGPT.
+
+## How to Create a New Recipe
+
+1. **Identify the Task**: Define a specific task or problem that the Recipe will address.
+2. **Develop the Solution**: Create a clear and concise guide, including any necessary code snippets or configurations.
+3. **Submit a PR**: Fork the PrivateGPT repository, add your Recipe to the appropriate section, and submit a PR for review.
+
+We encourage you to be creative and think outside the box! Your contributions help shape the future of PrivateGPT.
+
+## Available Recipes
+
+<Cards>
+  <Card
+    title="Summarize"
+    icon="fa-solid fa-file-alt"
+    href="/recipes/general-use-cases/summarize"
+  />
+</Cards>
--- a/fern/docs/pages/recipes/summarize.mdx
+++ b/fern/docs/pages/recipes/summarize.mdx
@ -0,0 +1,20 @@
+The Summarize Recipe provides a method to extract concise summaries from ingested documents or texts using PrivateGPT.
+This tool is particularly useful for quickly understanding large volumes of information by distilling key points and main ideas.
+
+## Use Case
+
+The primary use case for the `Summarize` tool is to automate the summarization of lengthy documents,
+making it easier for users to grasp the essential information without reading through entire texts.
+This can be applied in various scenarios, such as summarizing research papers, news articles, or business reports.
+
+## Key Features
+
+1. **Ingestion-compatible**: The user provides the text to be summarized. The text can be directly inputted or retrieved from ingested documents within the system.
+2. **Customization**: The summary generation can be influenced by providing specific `instructions` or a `prompt`. These inputs guide the model on how to frame the summary, allowing for customization according to user needs.
+3. **Streaming Support**: The tool supports streaming, allowing for real-time summary generation, which can be particularly useful for handling large texts or providing immediate feedback.
+
+## Contributing
+
+If you have ideas for improving the Summarize or want to add new features, feel free to contribute!
+You can submit your enhancements via a pull request on our [GitHub repository](https://github.com/zylon-ai/private-gpt).
+
--- a/fern/docs/pages/ui/alternatives.mdx
+++ b/fern/docs/pages/ui/alternatives.mdx
@ -0,0 +1,21 @@
+
+This page aims to present different user interface (UI) alternatives for integrating and using PrivateGPT. These alternatives range from demo applications to fully customizable UI setups that can be adapted to your specific needs.
+
+**Do you have any working demo project using PrivateGPT?**
+
+Please open a PR to add it to the list, and come on our Discord to tell us about it!
+
+<Callout intent = "note">
+WIP: This page provides an overview of one of the UI alternatives available for PrivateGPT. More alternatives will be added to this page as they become available.
+</Callout>
+
+## [PrivateGPT SDK Demo App](https://github.com/frgarciames/privategpt-react)
+
+The PrivateGPT SDK demo app is a robust starting point for developers looking to integrate and customize PrivateGPT in their applications. Leveraging modern technologies like Tailwind, shadcn/ui, and Biomejs, it provides a smooth development experience and a highly customizable user interface. Refer to the [repository](https://github.com/frgarciames/privategpt-react) for more details and to get started.
+
+**Tech Stack:**
+
+- **Tailwind:** A utility-first CSS framework for rapid UI development.
+- **shadcn/ui:** A set of high-quality, customizable UI components.
+- **PrivateGPT Web SDK:** The core SDK for interacting with PrivateGPT.
+- **Biomejs formatter/linter:** A tool for maintaining code quality and consistency.
--- a/fern/docs/pages/ui/gradio.mdx
+++ b/fern/docs/pages/ui/gradio.mdx
@ -2,7 +2,12 @@

 Gradio UI is a ready to use way of testing most of PrivateGPT API functionalities.

-![Gradio PrivateGPT](https://lh3.googleusercontent.com/drive-viewer/AK7aPaD_Hc-A8A9ooMe-hPgm_eImgsbxAjb__8nFYj8b_WwzvL1Gy90oAnp1DfhPaN6yGiEHCOXs0r77W1bYHtPzlVwbV7fMsA=s1600)
+![Gradio PrivateGPT](https://github.com/zylon-ai/private-gpt/raw/main/fern/docs/assets/ui.png?raw=true)
+
+<Callout intent = "warning">
+A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
+model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives.
+</Callout>

 ### Execution Modes

--- a/fern/fern.config.json
+++ b/fern/fern.config.json
@ -1,4 +1,4 @@
 {
  "organization": "privategpt",
-  "version": "0.19.10"
+  "version": "0.31.17"
 }
--- a/fern/openapi/openapi.json
+++ b/fern/openapi/openapi.json
@ -339,6 +339,48 @@
        }
      }
    },
+    "/v1/summarize": {
+      "post": {
+        "tags": [
+          "Recipes"
+        ],
+        "summary": "Summarize",
+        "description": "Given a text, the model will return a summary.\n\nOptionally include `instructions` to influence the way the summary is generated.\n\nIf `use_context`\nis set to `true`, the model will also use the content coming from the ingested\ndocuments in the summary. The documents being used can\nbe filtered by their metadata using the `context_filter`.\nIngested documents metadata can be found using `/ingest/list` endpoint.\nIf you want all ingested documents to be used, remove `context_filter` altogether.\n\nIf `prompt` is set, it will be used as the prompt for the summarization,\notherwise the default prompt will be used.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```",
+        "operationId": "summarize_v1_summarize_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/SummarizeBody"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SummarizeResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
    "/v1/embeddings": {
      "post": {
        "tags": [
@ -500,6 +542,10 @@
      "Chunk": {
        "properties": {
          "object": {
+            "type": "string",
+            "enum": [
+              "context.chunk"
+            ],
            "const": "context.chunk",
            "title": "Object"
          },
@ -612,10 +658,18 @@
      "ChunksResponse": {
        "properties": {
          "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
            "const": "list",
            "title": "Object"
          },
          "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
            "const": "private-gpt",
            "title": "Model"
          },
@ -728,6 +782,10 @@
            "title": "Index"
          },
          "object": {
+            "type": "string",
+            "enum": [
+              "embedding"
+            ],
            "const": "embedding",
            "title": "Object"
          },
@ -779,10 +837,18 @@
      "EmbeddingsResponse": {
        "properties": {
          "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
            "const": "list",
            "title": "Object"
          },
          "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
            "const": "private-gpt",
            "title": "Model"
          },
@ -818,6 +884,10 @@
      "HealthResponse": {
        "properties": {
          "status": {
+            "type": "string",
+            "enum": [
+              "ok"
+            ],
            "const": "ok",
            "title": "Status",
            "default": "ok"
@ -829,10 +899,18 @@
      "IngestResponse": {
        "properties": {
          "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
            "const": "list",
            "title": "Object"
          },
          "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
            "const": "private-gpt",
            "title": "Model"
          },
@ -879,6 +957,10 @@
      "IngestedDoc": {
        "properties": {
          "object": {
+            "type": "string",
+            "enum": [
+              "ingest.document"
+            ],
            "const": "ingest.document",
            "title": "Object"
          },
@ -1001,6 +1083,10 @@
            ]
          },
          "model": {
+            "type": "string",
+            "enum": [
+              "private-gpt"
+            ],
            "const": "private-gpt",
            "title": "Model"
          },
@ -1074,6 +1160,78 @@
        "title": "OpenAIMessage",
        "description": "Inference result, with the source of the message.\n\nRole could be the assistant or system\n(providing a default response, not AI generated)."
      },
+      "SummarizeBody": {
+        "properties": {
+          "text": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Text"
+          },
+          "use_context": {
+            "type": "boolean",
+            "title": "Use Context",
+            "default": false
+          },
+          "context_filter": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/ContextFilter"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
+          "prompt": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Prompt"
+          },
+          "instructions": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Instructions"
+          },
+          "stream": {
+            "type": "boolean",
+            "title": "Stream",
+            "default": false
+          }
+        },
+        "type": "object",
+        "title": "SummarizeBody"
+      },
+      "SummarizeResponse": {
+        "properties": {
+          "summary": {
+            "type": "string",
+            "title": "Summary"
+          }
+        },
+        "type": "object",
+        "required": [
+          "summary"
+        ],
+        "title": "SummarizeResponse"
+      },
      "ValidationError": {
        "properties": {
          "loc": {
--- a/poetry.lock
+++ b/poetry.lock
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -31,6 +31,7 @@ class EmbeddingComponent:
                self.embedding_model = HuggingFaceEmbedding(
                    model_name=settings.huggingface.embedding_hf_model_name,
                    cache_folder=str(models_cache_path),
+                    trust_remote_code=settings.huggingface.trust_remote_code,
                )
            case "sagemaker":
                try:
@ -55,23 +56,62 @@ class EmbeddingComponent:
                        "OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`"
                    ) from e

-                openai_settings = settings.openai.api_key
-                self.embedding_model = OpenAIEmbedding(api_key=openai_settings)
+                api_base = (
+                    settings.openai.embedding_api_base or settings.openai.api_base
+                )
+                api_key = settings.openai.embedding_api_key or settings.openai.api_key
+                model = settings.openai.embedding_model
+
+                self.embedding_model = OpenAIEmbedding(
+                    api_base=api_base,
+                    api_key=api_key,
+                    model=model,
+                )
            case "ollama":
                try:
                    from llama_index.embeddings.ollama import (  # type: ignore
                        OllamaEmbedding,
                    )
+                    from ollama import Client  # type: ignore
                except ImportError as e:
                    raise ImportError(
                        "Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
                    ) from e

                ollama_settings = settings.ollama
+
+                # Calculate embedding model. If not provided tag, it will be use latest
+                model_name = (
+                    ollama_settings.embedding_model + ":latest"
+                    if ":" not in ollama_settings.embedding_model
+                    else ollama_settings.embedding_model
+                )
+
                self.embedding_model = OllamaEmbedding(
-                    model_name=ollama_settings.embedding_model,
+                    model_name=model_name,
                    base_url=ollama_settings.embedding_api_base,
                )
+
+                if ollama_settings.autopull_models:
+                    if ollama_settings.autopull_models:
+                        from private_gpt.utils.ollama import (
+                            check_connection,
+                            pull_model,
+                        )
+
+                        # TODO: Reuse llama-index client when llama-index is updated
+                        client = Client(
+                            host=ollama_settings.embedding_api_base,
+                            timeout=ollama_settings.request_timeout,
+                        )
+
+                        if not check_connection(client):
+                            raise ValueError(
+                                f"Failed to connect to Ollama, "
+                                f"check if Ollama server is running on {ollama_settings.api_base}"
+                            )
+                        pull_model(client, model_name)
+
            case "azopenai":
                try:
                    from llama_index.embeddings.azure_openai import (  # type: ignore
@ -90,6 +130,37 @@ class EmbeddingComponent:
                    azure_endpoint=azopenai_settings.azure_endpoint,
                    api_version=azopenai_settings.api_version,
                )
+            case "gemini":
+                try:
+                    from llama_index.embeddings.gemini import (  # type: ignore
+                        GeminiEmbedding,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Gemini dependencies not found, install with `poetry install --extras embeddings-gemini`"
+                    ) from e
+
+                self.embedding_model = GeminiEmbedding(
+                    api_key=settings.gemini.api_key,
+                    model_name=settings.gemini.embedding_model,
+                )
+            case "mistralai":
+                try:
+                    from llama_index.embeddings.mistralai import (  # type: ignore
+                        MistralAIEmbedding,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`"
+                    ) from e
+
+                api_key = settings.openai.api_key
+                model = settings.openai.embedding_model
+
+                self.embedding_model = MistralAIEmbedding(
+                    api_key=api_key,
+                    model=model,
+                )
            case "mock":
                # Not a random number, is the dimensionality used by
                # the default embedding model
--- a/private_gpt/components/ingest/ingest_component.py
+++ b/private_gpt/components/ingest/ingest_component.py
@ -403,7 +403,7 @@ class PipelineIngestComponent(BaseIngestComponentWithIndex):
                self.transformations,
                show_progress=self.show_progress,
            )
-            self.node_q.put(("process", file_name, documents, nodes))
+            self.node_q.put(("process", file_name, documents, list(nodes)))
        finally:
            self.doc_semaphore.release()
            self.doc_q.task_done()  # unblock Q joins
--- a/private_gpt/components/ingest/ingest_helper.py
+++ b/private_gpt/components/ingest/ingest_helper.py
@ -92,7 +92,13 @@ class IngestionHelper:
            return string_reader.load_data([file_data.read_text()])

        logger.debug("Specific reader found for extension=%s", extension)
-        return reader_cls().load_data(file_data)
+        documents = reader_cls().load_data(file_data)
+
+        # Sanitize NUL bytes in text which can't be stored in Postgres
+        for i in range(len(documents)):
+            documents[i].text = documents[i].text.replace("\u0000", "")
+
+        return documents

    @staticmethod
    def _exclude_metadata(documents: list[Document]) -> None:
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM):

        response_body = resp["Body"]
        response_str = response_body.read().decode("utf-8")
-        response_dict = eval(response_str)
+        response_dict = json.loads(response_str)

        return CompletionResponse(
            text=response_dict[0]["generated_text"][len(prompt) :], raw=resp
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -22,13 +22,24 @@ class LLMComponent:
    @inject
    def __init__(self, settings: Settings) -> None:
        llm_mode = settings.llm.mode
-        if settings.llm.tokenizer:
+        if settings.llm.tokenizer and settings.llm.mode != "mock":
+            # Try to download the tokenizer. If it fails, the LLM will still work
+            # using the default one, which is less accurate.
+            try:
                set_global_tokenizer(
                    AutoTokenizer.from_pretrained(
                        pretrained_model_name_or_path=settings.llm.tokenizer,
                        cache_dir=str(models_cache_path),
+                        token=settings.huggingface.access_token,
                    )
                )
+            except Exception as e:
+                logger.warning(
+                    f"Failed to download tokenizer {settings.llm.tokenizer}: {e!s}"
+                    f"Please follow the instructions in the documentation to download it if needed: "
+                    f"https://docs.privategpt.dev/installation/getting-started/troubleshooting#tokenizer-setup."
+                    f"Falling back to default tokenizer."
+                )

        logger.info("Initializing the LLM in mode=%s", llm_mode)
        match settings.llm.mode:
@ -40,7 +51,7 @@ class LLMComponent:
                        "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
                    ) from e

-                prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                settings_kwargs = {
                    "tfs_z": settings.llamacpp.tfs_z,  # ollama and llama-cpp
                    "top_k": settings.llamacpp.top_k,  # ollama and llama-cpp
@ -98,15 +109,22 @@ class LLMComponent:
                    raise ImportError(
                        "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
                    ) from e
-
+                prompt_style = get_prompt_style(settings.llm.prompt_style)
                openai_settings = settings.openai
                self.llm = OpenAILike(
                    api_base=openai_settings.api_base,
                    api_key=openai_settings.api_key,
                    model=openai_settings.model,
                    is_chat_model=True,
-                    max_tokens=None,
+                    max_tokens=settings.llm.max_new_tokens,
                    api_version="",
+                    temperature=settings.llm.temperature,
+                    context_window=settings.llm.context_window,
+                    messages_to_prompt=prompt_style.messages_to_prompt,
+                    completion_to_prompt=prompt_style.completion_to_prompt,
+                    tokenizer=settings.llm.tokenizer,
+                    timeout=openai_settings.request_timeout,
+                    reuse_client=False,
                )
            case "ollama":
                try:
@ -127,8 +145,15 @@ class LLMComponent:
                    "repeat_penalty": ollama_settings.repeat_penalty,  # ollama llama-cpp
                }

-                self.llm = Ollama(
-                    model=ollama_settings.llm_model,
+                # calculate llm model. If not provided tag, it will be use latest
+                model_name = (
+                    ollama_settings.llm_model + ":latest"
+                    if ":" not in ollama_settings.llm_model
+                    else ollama_settings.llm_model
+                )
+
+                llm = Ollama(
+                    model=model_name,
                    base_url=ollama_settings.api_base,
                    temperature=settings.llm.temperature,
                    context_window=settings.llm.context_window,
@ -136,6 +161,16 @@ class LLMComponent:
                    request_timeout=ollama_settings.request_timeout,
                )

+                if ollama_settings.autopull_models:
+                    from private_gpt.utils.ollama import check_connection, pull_model
+
+                    if not check_connection(llm.client):
+                        raise ValueError(
+                            f"Failed to connect to Ollama, "
+                            f"check if Ollama server is running on {ollama_settings.api_base}"
+                        )
+                    pull_model(llm.client, model_name)
+
                if (
                    ollama_settings.keep_alive
                    != ollama_settings.model_fields["keep_alive"].default
@ -148,10 +183,12 @@ class LLMComponent:

                        return wrapper

-                    Ollama.chat = add_keep_alive(Ollama.chat)
-                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
-                    Ollama.complete = add_keep_alive(Ollama.complete)
-                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
+                    Ollama.chat = add_keep_alive(Ollama.chat)  # type: ignore
+                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)  # type: ignore
+                    Ollama.complete = add_keep_alive(Ollama.complete)  # type: ignore
+                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)  # type: ignore
+
+                self.llm = llm

            case "azopenai":
                try:
@ -171,5 +208,18 @@ class LLMComponent:
                    azure_endpoint=azopenai_settings.azure_endpoint,
                    api_version=azopenai_settings.api_version,
                )
+            case "gemini":
+                try:
+                    from llama_index.llms.gemini import (  # type: ignore
+                        Gemini,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Google Gemini dependencies not found, install with `poetry install --extras llms-gemini`"
+                    ) from e
+                gemini_settings = settings.gemini
+                self.llm = Gemini(
+                    model_name=gemini_settings.model, api_key=gemini_settings.api_key
+                )
            case "mock":
                self.llm = MockLLM()
--- a/private_gpt/components/llm/prompt_helper.py
+++ b/private_gpt/components/llm/prompt_helper.py
@ -40,7 +40,8 @@ class AbstractPromptStyle(abc.ABC):
        logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
        return prompt

-    def completion_to_prompt(self, completion: str) -> str:
+    def completion_to_prompt(self, prompt: str) -> str:
+        completion = prompt  # Fix: Llama-index parameter has to be named as prompt
        prompt = self._completion_to_prompt(completion)
        logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
        return prompt
@ -138,6 +139,72 @@ class Llama2PromptStyle(AbstractPromptStyle):
        )


+class Llama3PromptStyle(AbstractPromptStyle):
+    r"""Template for Meta's Llama 3.1.
+
+    The format follows this structure:
+    <|begin_of_text|>
+    <|start_header_id|>system<|end_header_id|>
+
+    [System message content]<|eot_id|>
+    <|start_header_id|>user<|end_header_id|>
+
+    [User message content]<|eot_id|>
+    <|start_header_id|>assistant<|end_header_id|>
+
+    [Assistant message content]<|eot_id|>
+    ...
+    (Repeat for each message, including possible 'ipython' role)
+    """
+
+    BOS, EOS = "<|begin_of_text|>", "<|end_of_text|>"
+    B_INST, E_INST = "<|start_header_id|>", "<|end_header_id|>"
+    EOT = "<|eot_id|>"
+    B_SYS, E_SYS = "<|start_header_id|>system<|end_header_id|>", "<|eot_id|>"
+    ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>"
+    DEFAULT_SYSTEM_PROMPT = """\
+    You are a helpful, respectful and honest assistant. \
+    Always answer as helpfully as possible and follow ALL given instructions. \
+    Do not speculate or make up information. \
+    Do not reference any given instructions or context. \
+    """
+
+    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
+        prompt = ""
+        has_system_message = False
+
+        for i, message in enumerate(messages):
+            if not message or message.content is None:
+                continue
+            if message.role == MessageRole.SYSTEM:
+                prompt += f"{self.B_SYS}\n\n{message.content.strip()}{self.E_SYS}"
+                has_system_message = True
+            else:
+                role_header = f"{self.B_INST}{message.role.value}{self.E_INST}"
+                prompt += f"{role_header}\n\n{message.content.strip()}{self.EOT}"
+
+            # Add assistant header if the last message is not from the assistant
+            if i == len(messages) - 1 and message.role != MessageRole.ASSISTANT:
+                prompt += f"{self.ASSISTANT_INST}\n\n"
+
+        # Add default system prompt if no system message was provided
+        if not has_system_message:
+            prompt = (
+                f"{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}" + prompt
+            )
+
+        # TODO: Implement tool handling logic
+
+        return prompt
+
+    def _completion_to_prompt(self, completion: str) -> str:
+        return (
+            f"{self.B_SYS}\n\n{self.DEFAULT_SYSTEM_PROMPT}{self.E_SYS}"
+            f"{self.B_INST}user{self.E_INST}\n\n{completion.strip()}{self.EOT}"
+            f"{self.ASSISTANT_INST}\n\n"
+        )
+
+
 class TagPromptStyle(AbstractPromptStyle):
    """Tag prompt style (used by Vigogne) that uses the prompt style `<|ROLE|>`.

@ -173,18 +240,22 @@ class TagPromptStyle(AbstractPromptStyle):

 class MistralPromptStyle(AbstractPromptStyle):
    def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
-        prompt = "<s>"
+        inst_buffer = []
+        text = ""
        for message in messages:
-            role = message.role
-            content = message.content or ""
-            if role.lower() == "system":
-                message_from_user = f"[INST] {content.strip()} [/INST]"
-                prompt += message_from_user
-            elif role.lower() == "user":
-                prompt += "</s>"
-                message_from_user = f"[INST] {content.strip()} [/INST]"
-                prompt += message_from_user
-        return prompt
+            if message.role == MessageRole.SYSTEM or message.role == MessageRole.USER:
+                inst_buffer.append(str(message.content).strip())
+            elif message.role == MessageRole.ASSISTANT:
+                text += "<s>[INST] " + "\n".join(inst_buffer) + " [/INST]"
+                text += " " + str(message.content).strip() + "</s>"
+                inst_buffer.clear()
+            else:
+                raise ValueError(f"Unknown message role {message.role}")
+
+        if len(inst_buffer) > 0:
+            text += "<s>[INST] " + "\n".join(inst_buffer) + " [/INST]"
+
+        return text

    def _completion_to_prompt(self, completion: str) -> str:
        return self._messages_to_prompt(
@ -215,7 +286,9 @@ class ChatMLPromptStyle(AbstractPromptStyle):


 def get_prompt_style(
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] | None
+    prompt_style: (
+        Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
+    )
 ) -> AbstractPromptStyle:
    """Get the prompt style to use from the given string.

@ -226,6 +299,8 @@ def get_prompt_style(
        return DefaultPromptStyle()
    elif prompt_style == "llama2":
        return Llama2PromptStyle()
+    elif prompt_style == "llama3":
+        return Llama3PromptStyle()
    elif prompt_style == "tag":
        return TagPromptStyle()
    elif prompt_style == "mistral":
--- a/private_gpt/components/node_store/node_store_component.py
+++ b/private_gpt/components/node_store/node_store_component.py
@ -38,10 +38,10 @@ class NodeStoreComponent:

            case "postgres":
                try:
-                    from llama_index.core.storage.docstore.postgres_docstore import (
+                    from llama_index.storage.docstore.postgres import (  # type: ignore
                        PostgresDocumentStore,
                    )
-                    from llama_index.core.storage.index_store.postgres_index_store import (
+                    from llama_index.storage.index_store.postgres import (  # type: ignore
                        PostgresIndexStore,
                    )
                except ImportError:
@ -55,6 +55,7 @@ class NodeStoreComponent:
                self.index_store = PostgresIndexStore.from_params(
                    **settings.postgres.model_dump(exclude_none=True)
                )
+
                self.doc_store = PostgresDocumentStore.from_params(
                    **settings.postgres.model_dump(exclude_none=True)
                )
--- a/private_gpt/components/vector_store/batched_chroma.py
+++ b/private_gpt/components/vector_store/batched_chroma.py
@ -1,14 +1,17 @@
-from collections.abc import Generator
-from typing import Any
+from collections.abc import Generator, Sequence
+from typing import TYPE_CHECKING, Any

 from llama_index.core.schema import BaseNode, MetadataMode
 from llama_index.core.vector_stores.utils import node_to_metadata_dict
 from llama_index.vector_stores.chroma import ChromaVectorStore  # type: ignore

+if TYPE_CHECKING:
+    from collections.abc import Mapping
+

 def chunk_list(
-    lst: list[BaseNode], max_chunk_size: int
-) -> Generator[list[BaseNode], None, None]:
+    lst: Sequence[BaseNode], max_chunk_size: int
+) -> Generator[Sequence[BaseNode], None, None]:
    """Yield successive max_chunk_size-sized chunks from lst.

    Args:
@ -60,7 +63,7 @@ class BatchedChromaVectorStore(ChromaVectorStore):  # type: ignore
        )
        self.chroma_client = chroma_client

-    def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
+    def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
        """Add nodes to index, batching the insertion to avoid issues.

        Args:
@ -78,8 +81,8 @@ class BatchedChromaVectorStore(ChromaVectorStore):  # type: ignore

        all_ids = []
        for node_chunk in node_chunks:
-            embeddings = []
-            metadatas = []
+            embeddings: list[Sequence[float]] = []
+            metadatas: list[Mapping[str, Any]] = []
            ids = []
            documents = []
            for node in node_chunk:
--- a/private_gpt/components/vector_store/vector_store_component.py
+++ b/private_gpt/components/vector_store/vector_store_component.py
@ -4,10 +4,10 @@ import typing
 from injector import inject, singleton
 from llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex
 from llama_index.core.vector_stores.types import (
+    BasePydanticVectorStore,
    FilterCondition,
    MetadataFilter,
    MetadataFilters,
-    VectorStore,
 )

 from private_gpt.open_ai.extensions.context_filter import ContextFilter
@ -32,7 +32,7 @@ def _doc_id_metadata_filter(
@singleton
 class VectorStoreComponent:
    settings: Settings
-    vector_store: VectorStore
+    vector_store: BasePydanticVectorStore

    @inject
    def __init__(self, settings: Settings) -> None:
@ -54,7 +54,7 @@ class VectorStoreComponent:
                    )

                self.vector_store = typing.cast(
-                    VectorStore,
+                    BasePydanticVectorStore,
                    PGVectorStore.from_params(
                        **settings.postgres.model_dump(exclude_none=True),
                        table_name="embeddings",
@ -87,7 +87,7 @@ class VectorStoreComponent:
                )  # TODO

                self.vector_store = typing.cast(
-                    VectorStore,
+                    BasePydanticVectorStore,
                    BatchedChromaVectorStore(
                        chroma_client=chroma_client, chroma_collection=chroma_collection
                    ),
@ -115,12 +115,78 @@ class VectorStoreComponent:
                        **settings.qdrant.model_dump(exclude_none=True)
                    )
                self.vector_store = typing.cast(
-                    VectorStore,
+                    BasePydanticVectorStore,
                    QdrantVectorStore(
                        client=client,
                        collection_name="make_this_parameterizable_per_api_call",
                    ),  # TODO
                )
+
+            case "milvus":
+                try:
+                    from llama_index.vector_stores.milvus import (  # type: ignore
+                        MilvusVectorStore,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Milvus dependencies not found, install with `poetry install --extras vector-stores-milvus`"
+                    ) from e
+
+                if settings.milvus is None:
+                    logger.info(
+                        "Milvus config not found. Using default settings.\n"
+                        "Trying to connect to Milvus at local_data/private_gpt/milvus/milvus_local.db "
+                        "with collection 'make_this_parameterizable_per_api_call'."
+                    )
+
+                    self.vector_store = typing.cast(
+                        BasePydanticVectorStore,
+                        MilvusVectorStore(
+                            dim=settings.embedding.embed_dim,
+                            collection_name="make_this_parameterizable_per_api_call",
+                            overwrite=True,
+                        ),
+                    )
+
+                else:
+                    self.vector_store = typing.cast(
+                        BasePydanticVectorStore,
+                        MilvusVectorStore(
+                            dim=settings.embedding.embed_dim,
+                            uri=settings.milvus.uri,
+                            token=settings.milvus.token,
+                            collection_name=settings.milvus.collection_name,
+                            overwrite=settings.milvus.overwrite,
+                        ),
+                    )
+
+            case "clickhouse":
+                try:
+                    from clickhouse_connect import (  # type: ignore
+                        get_client,
+                    )
+                    from llama_index.vector_stores.clickhouse import (  # type: ignore
+                        ClickHouseVectorStore,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "ClickHouse dependencies not found, install with `poetry install --extras vector-stores-clickhouse`"
+                    ) from e
+
+                if settings.clickhouse is None:
+                    raise ValueError(
+                        "ClickHouse settings not found. Please provide settings."
+                    )
+
+                clickhouse_client = get_client(
+                    host=settings.clickhouse.host,
+                    port=settings.clickhouse.port,
+                    username=settings.clickhouse.username,
+                    password=settings.clickhouse.password,
+                )
+                self.vector_store = ClickHouseVectorStore(
+                    clickhouse_client=clickhouse_client
+                )
            case _:
                # Should be unreachable
                # The settings validator should have caught this
--- a/private_gpt/launcher.py
+++ b/private_gpt/launcher.py
@ -15,6 +15,7 @@ from private_gpt.server.completions.completions_router import completions_router
 from private_gpt.server.embeddings.embeddings_router import embeddings_router
 from private_gpt.server.health.health_router import health_router
 from private_gpt.server.ingest.ingest_router import ingest_router
+from private_gpt.server.recipes.summarize.summarize_router import summarize_router
 from private_gpt.settings.settings import Settings

 logger = logging.getLogger(__name__)
@ -32,11 +33,13 @@ def create_app(root_injector: Injector) -> FastAPI:
    app.include_router(chat_router)
    app.include_router(chunks_router)
    app.include_router(ingest_router)
+    app.include_router(summarize_router)
    app.include_router(embeddings_router)
    app.include_router(health_router)

    # Add LlamaIndex simple observability
    global_handler = create_global_handler("simple")
+    if global_handler:
        LlamaIndexSettings.callback_manager = CallbackManager([global_handler])

    settings = root_injector.get(Settings)
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from typing import TYPE_CHECKING

 from injector import inject, singleton
 from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
@ -26,6 +27,9 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.server.chunks.chunks_service import Chunk
 from private_gpt.settings.settings import Settings

+if TYPE_CHECKING:
+    from llama_index.core.postprocessor.types import BaseNodePostprocessor
+

 class Completion(BaseModel):
    response: str
@ -114,12 +118,15 @@ class ChatService:
                context_filter=context_filter,
                similarity_top_k=self.settings.rag.similarity_top_k,
            )
-            node_postprocessors = [
+            node_postprocessors: list[BaseNodePostprocessor] = [
                MetadataReplacementPostProcessor(target_metadata_key="window"),
+            ]
+            if settings.rag.similarity_value:
+                node_postprocessors.append(
                    SimilarityPostprocessor(
                        similarity_cutoff=settings.rag.similarity_value
-                ),
-            ]
+                    )
+                )

            if settings.rag.rerank.enabled:
                rerank_postprocessor = SentenceTransformerRerank(
--- a/private_gpt/server/recipes/summarize/init.py
+++ b/private_gpt/server/recipes/summarize/init.py
--- a/private_gpt/server/recipes/summarize/summarize_router.py
+++ b/private_gpt/server/recipes/summarize/summarize_router.py
@ -0,0 +1,86 @@
+from fastapi import APIRouter, Depends, Request
+from pydantic import BaseModel
+from starlette.responses import StreamingResponse
+
+from private_gpt.open_ai.extensions.context_filter import ContextFilter
+from private_gpt.open_ai.openai_models import (
+    to_openai_sse_stream,
+)
+from private_gpt.server.recipes.summarize.summarize_service import SummarizeService
+from private_gpt.server.utils.auth import authenticated
+
+summarize_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
+
+
+class SummarizeBody(BaseModel):
+    text: str | None = None
+    use_context: bool = False
+    context_filter: ContextFilter | None = None
+    prompt: str | None = None
+    instructions: str | None = None
+    stream: bool = False
+
+
+class SummarizeResponse(BaseModel):
+    summary: str
+
+
+@summarize_router.post(
+    "/summarize",
+    response_model=None,
+    summary="Summarize",
+    responses={200: {"model": SummarizeResponse}},
+    tags=["Recipes"],
+)
+def summarize(
+    request: Request, body: SummarizeBody
+) -> SummarizeResponse | StreamingResponse:
+    """Given a text, the model will return a summary.
+
+    Optionally include `instructions` to influence the way the summary is generated.
+
+    If `use_context`
+    is set to `true`, the model will also use the content coming from the ingested
+    documents in the summary. The documents being used can
+    be filtered by their metadata using the `context_filter`.
+    Ingested documents metadata can be found using `/ingest/list` endpoint.
+    If you want all ingested documents to be used, remove `context_filter` altogether.
+
+    If `prompt` is set, it will be used as the prompt for the summarization,
+    otherwise the default prompt will be used.
+
+    When using `'stream': true`, the API will return data chunks following [OpenAI's
+    streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):
+    ```
+    {"id":"12345","object":"completion.chunk","created":1694268190,
+    "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"},
+    "finish_reason":null}]}
+    ```
+    """
+    service: SummarizeService = request.state.injector.get(SummarizeService)
+
+    if body.stream:
+        completion_gen = service.stream_summarize(
+            text=body.text,
+            instructions=body.instructions,
+            use_context=body.use_context,
+            context_filter=body.context_filter,
+            prompt=body.prompt,
+        )
+        return StreamingResponse(
+            to_openai_sse_stream(
+                response_generator=completion_gen,
+            ),
+            media_type="text/event-stream",
+        )
+    else:
+        completion = service.summarize(
+            text=body.text,
+            instructions=body.instructions,
+            use_context=body.use_context,
+            context_filter=body.context_filter,
+            prompt=body.prompt,
+        )
+        return SummarizeResponse(
+            summary=completion,
+        )
--- a/private_gpt/server/recipes/summarize/summarize_service.py
+++ b/private_gpt/server/recipes/summarize/summarize_service.py
@ -0,0 +1,172 @@
+from itertools import chain
+
+from injector import inject, singleton
+from llama_index.core import (
+    Document,
+    StorageContext,
+    SummaryIndex,
+)
+from llama_index.core.base.response.schema import Response, StreamingResponse
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.response_synthesizers import ResponseMode
+from llama_index.core.storage.docstore.types import RefDocInfo
+from llama_index.core.types import TokenGen
+
+from private_gpt.components.embedding.embedding_component import EmbeddingComponent
+from private_gpt.components.llm.llm_component import LLMComponent
+from private_gpt.components.node_store.node_store_component import NodeStoreComponent
+from private_gpt.components.vector_store.vector_store_component import (
+    VectorStoreComponent,
+)
+from private_gpt.open_ai.extensions.context_filter import ContextFilter
+from private_gpt.settings.settings import Settings
+
+DEFAULT_SUMMARIZE_PROMPT = (
+    "Provide a comprehensive summary of the provided context information. "
+    "The summary should cover all the key points and main ideas presented in "
+    "the original text, while also condensing the information into a concise "
+    "and easy-to-understand format. Please ensure that the summary includes "
+    "relevant details and examples that support the main ideas, while avoiding "
+    "any unnecessary information or repetition."
+)
+
+
+@singleton
+class SummarizeService:
+    @inject
+    def __init__(
+        self,
+        settings: Settings,
+        llm_component: LLMComponent,
+        node_store_component: NodeStoreComponent,
+        vector_store_component: VectorStoreComponent,
+        embedding_component: EmbeddingComponent,
+    ) -> None:
+        self.settings = settings
+        self.llm_component = llm_component
+        self.node_store_component = node_store_component
+        self.vector_store_component = vector_store_component
+        self.embedding_component = embedding_component
+        self.storage_context = StorageContext.from_defaults(
+            vector_store=vector_store_component.vector_store,
+            docstore=node_store_component.doc_store,
+            index_store=node_store_component.index_store,
+        )
+
+    @staticmethod
+    def _filter_ref_docs(
+        ref_docs: dict[str, RefDocInfo], context_filter: ContextFilter | None
+    ) -> list[RefDocInfo]:
+        if context_filter is None or not context_filter.docs_ids:
+            return list(ref_docs.values())
+
+        return [
+            ref_doc
+            for doc_id, ref_doc in ref_docs.items()
+            if doc_id in context_filter.docs_ids
+        ]
+
+    def _summarize(
+        self,
+        use_context: bool = False,
+        stream: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> str | TokenGen:
+
+        nodes_to_summarize = []
+
+        # Add text to summarize
+        if text:
+            text_documents = [Document(text=text)]
+            nodes_to_summarize += (
+                SentenceSplitter.from_defaults().get_nodes_from_documents(
+                    text_documents
+                )
+            )
+
+        # Add context documents to summarize
+        if use_context:
+            # 1. Recover all ref docs
+            ref_docs: dict[str, RefDocInfo] | None = (
+                self.storage_context.docstore.get_all_ref_doc_info()
+            )
+            if ref_docs is None:
+                raise ValueError("No documents have been ingested yet.")
+
+            # 2. Filter documents based on context_filter (if provided)
+            filtered_ref_docs = self._filter_ref_docs(ref_docs, context_filter)
+
+            # 3. Get all nodes from the filtered documents
+            filtered_node_ids = chain.from_iterable(
+                [ref_doc.node_ids for ref_doc in filtered_ref_docs]
+            )
+            filtered_nodes = self.storage_context.docstore.get_nodes(
+                node_ids=list(filtered_node_ids),
+            )
+
+            nodes_to_summarize += filtered_nodes
+
+        # Create a SummaryIndex to summarize the nodes
+        summary_index = SummaryIndex(
+            nodes=nodes_to_summarize,
+            storage_context=StorageContext.from_defaults(),  # In memory SummaryIndex
+            show_progress=True,
+        )
+
+        # Make a tree summarization query
+        # above the set of all candidate nodes
+        query_engine = summary_index.as_query_engine(
+            llm=self.llm_component.llm,
+            response_mode=ResponseMode.TREE_SUMMARIZE,
+            streaming=stream,
+            use_async=self.settings.summarize.use_async,
+        )
+
+        prompt = prompt or DEFAULT_SUMMARIZE_PROMPT
+
+        summarize_query = prompt + "\n" + (instructions or "")
+
+        response = query_engine.query(summarize_query)
+        if isinstance(response, Response):
+            return response.response or ""
+        elif isinstance(response, StreamingResponse):
+            return response.response_gen
+        else:
+            raise TypeError(f"The result is not of a supported type: {type(response)}")
+
+    def summarize(
+        self,
+        use_context: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> str:
+        return self._summarize(
+            use_context=use_context,
+            stream=False,
+            text=text,
+            instructions=instructions,
+            context_filter=context_filter,
+            prompt=prompt,
+        )  # type: ignore
+
+    def stream_summarize(
+        self,
+        use_context: bool = False,
+        text: str | None = None,
+        instructions: str | None = None,
+        context_filter: ContextFilter | None = None,
+        prompt: str | None = None,
+    ) -> TokenGen:
+        return self._summarize(
+            use_context=use_context,
+            stream=True,
+            text=text,
+            instructions=instructions,
+            context_filter=context_filter,
+            prompt=prompt,
+        )  # type: ignore
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -1,4 +1,4 @@
-from typing import Literal
+from typing import Any, Literal

 from pydantic import BaseModel, Field

@ -59,6 +59,27 @@ class AuthSettings(BaseModel):
    )


+class IngestionSettings(BaseModel):
+    """Ingestion configuration.
+
+    This configuration is used to control the ingestion of data into the system
+    using non-server methods. This is useful for local development and testing;
+    or to ingest in bulk from a folder.
+
+    Please note that this configuration is not secure and should be used in
+    a controlled environment only (setting right permissions, etc.).
+    """
+
+    enabled: bool = Field(
+        description="Flag indicating if local ingestion is enabled or not.",
+        default=False,
+    )
+    allow_ingest_from: list[str] = Field(
+        description="A list of folders that should be permitted to make ingest requests.",
+        default=[],
+    )
+
+
 class ServerSettings(BaseModel):
    env_name: str = Field(
        description="Name of the environment (prod, staging, local...)"
@ -74,6 +95,10 @@ class ServerSettings(BaseModel):


 class DataSettings(BaseModel):
+    local_ingestion: IngestionSettings = Field(
+        description="Ingestion configuration",
+        default_factory=lambda: IngestionSettings(allow_ingest_from=["*"]),
+    )
    local_data_folder: str = Field(
        description="Path to local storage."
        "It will be treated as an absolute path if it starts with /"
@ -82,7 +107,14 @@ class DataSettings(BaseModel):

 class LLMSettings(BaseModel):
    mode: Literal[
-        "llamacpp", "openai", "openailike", "azopenai", "sagemaker", "mock", "ollama"
+        "llamacpp",
+        "openai",
+        "openailike",
+        "azopenai",
+        "sagemaker",
+        "mock",
+        "ollama",
+        "gemini",
    ]
    max_new_tokens: int = Field(
        256,
@ -104,10 +136,24 @@ class LLMSettings(BaseModel):
        0.1,
        description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
    )
+    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
+        Field(
+            "llama2",
+            description=(
+                "The prompt style to use for the chat engine. "
+                "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+                "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+                "If `llama3` - use the llama3 prompt style from the llama_index."
+                "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+                "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+                "`llama2` is the historic behaviour. `default` might work better with your custom models."
+            ),
+        )
+    )


 class VectorstoreSettings(BaseModel):
-    database: Literal["chroma", "qdrant", "postgres"]
+    database: Literal["chroma", "qdrant", "postgres", "clickhouse", "milvus"]


 class NodeStoreSettings(BaseModel):
@ -117,18 +163,6 @@ class NodeStoreSettings(BaseModel):
 class LlamaCPPSettings(BaseModel):
    llm_hf_repo_id: str
    llm_hf_model_file: str
-    prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
-        "llama2",
-        description=(
-            "The prompt style to use for the chat engine. "
-            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
-            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
-            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
-            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
-            "`llama2` is the historic behaviour. `default` might work better with your custom models."
-        ),
-    )
-
    tfs_z: float = Field(
        1.0,
        description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
@ -151,10 +185,27 @@ class HuggingFaceSettings(BaseModel):
    embedding_hf_model_name: str = Field(
        description="Name of the HuggingFace model to use for embeddings"
    )
+    access_token: str = Field(
+        None,
+        description="Huggingface access token, required to download some models",
+    )
+    trust_remote_code: bool = Field(
+        False,
+        description="If set to True, the code from the remote model will be trusted and executed.",
+    )


 class EmbeddingSettings(BaseModel):
-    mode: Literal["huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock"]
+    mode: Literal[
+        "huggingface",
+        "openai",
+        "azopenai",
+        "sagemaker",
+        "ollama",
+        "mock",
+        "gemini",
+        "mistralai",
+    ]
    ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
        "simple",
        description=(
@ -202,6 +253,31 @@ class OpenAISettings(BaseModel):
        "gpt-3.5-turbo",
        description="OpenAI Model to use. Example: 'gpt-4'.",
    )
+    request_timeout: float = Field(
+        120.0,
+        description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
+    )
+    embedding_api_base: str = Field(
+        None,
+        description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
+    )
+    embedding_api_key: str
+    embedding_model: str = Field(
+        "text-embedding-ada-002",
+        description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
+    )
+
+
+class GeminiSettings(BaseModel):
+    api_key: str
+    model: str = Field(
+        "models/gemini-pro",
+        description="Google Model to use. Example: 'models/gemini-pro'.",
+    )
+    embedding_model: str = Field(
+        "models/embedding-001",
+        description="Google Embedding Model to use. Example: 'models/embedding-001'.",
+    )


 class OllamaSettings(BaseModel):
@ -253,6 +329,10 @@ class OllamaSettings(BaseModel):
        120.0,
        description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
    )
+    autopull_models: bool = Field(
+        False,
+        description="If set to True, the Ollama will automatically pull the models from the API base.",
+    )


 class AzureOpenAISettings(BaseModel):
@ -277,6 +357,10 @@ class AzureOpenAISettings(BaseModel):
 class UISettings(BaseModel):
    enabled: bool
    path: str
+    default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field(
+        "RAG",
+        description="The default mode.",
+    )
    default_chat_system_prompt: str = Field(
        None,
        description="The default system prompt to use for the chat mode.",
@ -284,6 +368,10 @@ class UISettings(BaseModel):
    default_query_system_prompt: str = Field(
        None, description="The default system prompt to use for the query mode."
    )
+    default_summarization_system_prompt: str = Field(
+        None,
+        description="The default system prompt to use for the summarization mode.",
+    )
    delete_file_button_enabled: bool = Field(
        True, description="If the button to delete a file is enabled or not."
    )
@ -319,6 +407,84 @@ class RagSettings(BaseModel):
    rerank: RerankSettings


+class SummarizeSettings(BaseModel):
+    use_async: bool = Field(
+        True,
+        description="If set to True, the summarization will be done asynchronously.",
+    )
+
+
+class ClickHouseSettings(BaseModel):
+    host: str = Field(
+        "localhost",
+        description="The server hosting the ClickHouse database",
+    )
+    port: int = Field(
+        8443,
+        description="The port on which the ClickHouse database is accessible",
+    )
+    username: str = Field(
+        "default",
+        description="The username to use to connect to the ClickHouse database",
+    )
+    password: str = Field(
+        "",
+        description="The password to use to connect to the ClickHouse database",
+    )
+    database: str = Field(
+        "__default__",
+        description="The default database to use for connections",
+    )
+    secure: bool | str = Field(
+        False,
+        description="Use https/TLS for secure connection to the server",
+    )
+    interface: str | None = Field(
+        None,
+        description="Must be either 'http' or 'https'. Determines the protocol to use for the connection",
+    )
+    settings: dict[str, Any] | None = Field(
+        None,
+        description="Specific ClickHouse server settings to be used with the session",
+    )
+    connect_timeout: int | None = Field(
+        None,
+        description="Timeout in seconds for establishing a connection",
+    )
+    send_receive_timeout: int | None = Field(
+        None,
+        description="Read timeout in seconds for http connection",
+    )
+    verify: bool | None = Field(
+        None,
+        description="Verify the server certificate in secure/https mode",
+    )
+    ca_cert: str | None = Field(
+        None,
+        description="Path to Certificate Authority root certificate (.pem format)",
+    )
+    client_cert: str | None = Field(
+        None,
+        description="Path to TLS Client certificate (.pem format)",
+    )
+    client_cert_key: str | None = Field(
+        None,
+        description="Path to the private key for the TLS Client certificate",
+    )
+    http_proxy: str | None = Field(
+        None,
+        description="HTTP proxy address",
+    )
+    https_proxy: str | None = Field(
+        None,
+        description="HTTPS proxy address",
+    )
+    server_host_name: str | None = Field(
+        None,
+        description="Server host name to be checked against the TLS certificate",
+    )
+
+
 class PostgresSettings(BaseModel):
    host: str = Field(
        "localhost",
@ -400,6 +566,27 @@ class QdrantSettings(BaseModel):
    )


+class MilvusSettings(BaseModel):
+    uri: str = Field(
+        "local_data/private_gpt/milvus/milvus_local.db",
+        description="The URI of the Milvus instance. For example: 'local_data/private_gpt/milvus/milvus_local.db' for Milvus Lite.",
+    )
+    token: str = Field(
+        "",
+        description=(
+            "A valid access token to access the specified Milvus instance. "
+            "This can be used as a recommended alternative to setting user and password separately. "
+        ),
+    )
+    collection_name: str = Field(
+        "make_this_parameterizable_per_api_call",
+        description="The name of the collection in Milvus. Default is 'make_this_parameterizable_per_api_call'.",
+    )
+    overwrite: bool = Field(
+        True, description="Overwrite the previous collection schema if it exists."
+    )
+
+
 class Settings(BaseModel):
    server: ServerSettings
    data: DataSettings
@ -410,13 +597,17 @@ class Settings(BaseModel):
    huggingface: HuggingFaceSettings
    sagemaker: SagemakerSettings
    openai: OpenAISettings
+    gemini: GeminiSettings
    ollama: OllamaSettings
    azopenai: AzureOpenAISettings
    vectorstore: VectorstoreSettings
    nodestore: NodeStoreSettings
    rag: RagSettings
+    summarize: SummarizeSettings
    qdrant: QdrantSettings | None = None
    postgres: PostgresSettings | None = None
+    clickhouse: ClickHouseSettings | None = None
+    milvus: MilvusSettings | None = None


 """
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@ -1,9 +1,10 @@
 """This file should be imported if and only if you want to run the UI locally."""

-import itertools
+import base64
 import logging
 import time
 from collections.abc import Iterable
+from enum import Enum
 from pathlib import Path
 from typing import Any

@ -12,6 +13,7 @@ from fastapi import FastAPI
 from gradio.themes.utils.colors import slate  # type: ignore
 from injector import inject, singleton
 from llama_index.core.llms import ChatMessage, ChatResponse, MessageRole
+from llama_index.core.types import TokenGen
 from pydantic import BaseModel

 from private_gpt.constants import PROJECT_ROOT_PATH
@ -20,6 +22,7 @@ from private_gpt.open_ai.extensions.context_filter import ContextFilter
 from private_gpt.server.chat.chat_service import ChatService, CompletionGen
 from private_gpt.server.chunks.chunks_service import Chunk, ChunksService
 from private_gpt.server.ingest.ingest_service import IngestService
+from private_gpt.server.recipes.summarize.summarize_service import SummarizeService
 from private_gpt.settings.settings import settings
 from private_gpt.ui.images import logo_svg

@ -31,9 +34,22 @@ AVATAR_BOT = THIS_DIRECTORY_RELATIVE / "avatar-bot.ico"

 UI_TAB_TITLE = "My Private GPT"

-SOURCES_SEPARATOR = "\n\n Sources: \n"
+SOURCES_SEPARATOR = "<hr>Sources: \n"

-MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"]
+
+class Modes(str, Enum):
+    RAG_MODE = "RAG"
+    SEARCH_MODE = "Search"
+    BASIC_CHAT_MODE = "Basic"
+    SUMMARIZE_MODE = "Summarize"
+
+
+MODES: list[Modes] = [
+    Modes.RAG_MODE,
+    Modes.SEARCH_MODE,
+    Modes.BASIC_CHAT_MODE,
+    Modes.SUMMARIZE_MODE,
+]


 class Source(BaseModel):
@ -71,10 +87,12 @@ class PrivateGptUi:
        ingest_service: IngestService,
        chat_service: ChatService,
        chunks_service: ChunksService,
+        summarizeService: SummarizeService,
    ) -> None:
        self._ingest_service = ingest_service
        self._chat_service = chat_service
        self._chunks_service = chunks_service
+        self._summarize_service = summarizeService

        # Cache the UI blocks
        self._ui_block = None
@ -82,10 +100,15 @@ class PrivateGptUi:
        self._selected_filename = None

        # Initialize system prompt based on default mode
-        self.mode = MODES[0]
-        self._system_prompt = self._get_default_system_prompt(self.mode)
+        default_mode_map = {mode.value: mode for mode in Modes}
+        self._default_mode = default_mode_map.get(
+            settings().ui.default_mode, Modes.RAG_MODE
+        )
+        self._system_prompt = self._get_default_system_prompt(self._default_mode)

-    def _chat(self, message: str, history: list[list[str]], mode: str, *_: Any) -> Any:
+    def _chat(
+        self, message: str, history: list[list[str]], mode: Modes, *_: Any
+    ) -> Any:
        def yield_deltas(completion_gen: CompletionGen) -> Iterable[str]:
            full_response: str = ""
            stream = completion_gen.response
@ -109,23 +132,29 @@ class PrivateGptUi:
                            + f"{index}. {source.file} (page {source.page}) \n\n"
                        )
                        used_files.add(f"{source.file}-{source.page}")
+                sources_text += "<hr>\n\n"
                full_response += sources_text
            yield full_response

+        def yield_tokens(token_gen: TokenGen) -> Iterable[str]:
+            full_response: str = ""
+            for token in token_gen:
+                full_response += str(token)
+                yield full_response
+
        def build_history() -> list[ChatMessage]:
-            history_messages: list[ChatMessage] = list(
-                itertools.chain(
-                    *[
-                        [
-                            ChatMessage(content=interaction[0], role=MessageRole.USER),
+            history_messages: list[ChatMessage] = []
+
+            for interaction in history:
+                history_messages.append(
+                    ChatMessage(content=interaction[0], role=MessageRole.USER)
+                )
+                if len(interaction) > 1 and interaction[1] is not None:
+                    history_messages.append(
                        ChatMessage(
                            # Remove from history content the Sources information
                            content=interaction[1].split(SOURCES_SEPARATOR)[0],
                            role=MessageRole.ASSISTANT,
-                            ),
-                        ]
-                        for interaction in history
-                    ]
                        )
                    )

@ -144,8 +173,7 @@ class PrivateGptUi:
                ),
            )
        match mode:
-            case "Query Files":
-
+            case Modes.RAG_MODE:
                # Use only the selected file for the query
                context_filter = None
                if self._selected_filename is not None:
@ -164,14 +192,14 @@ class PrivateGptUi:
                    context_filter=context_filter,
                )
                yield from yield_deltas(query_stream)
-            case "LLM Chat (no context from files)":
+            case Modes.BASIC_CHAT_MODE:
                llm_stream = self._chat_service.stream_chat(
                    messages=all_messages,
                    use_context=False,
                )
                yield from yield_deltas(llm_stream)

-            case "Search Files":
+            case Modes.SEARCH_MODE:
                response = self._chunks_service.retrieve_relevant(
                    text=message, limit=4, prev_next_chunks=0
                )
@ -184,37 +212,76 @@ class PrivateGptUi:
                    f"{source.text}"
                    for index, source in enumerate(sources, start=1)
                )
+            case Modes.SUMMARIZE_MODE:
+                # Summarize the given message, optionally using selected files
+                context_filter = None
+                if self._selected_filename:
+                    docs_ids = []
+                    for ingested_document in self._ingest_service.list_ingested():
+                        if (
+                            ingested_document.doc_metadata["file_name"]
+                            == self._selected_filename
+                        ):
+                            docs_ids.append(ingested_document.doc_id)
+                    context_filter = ContextFilter(docs_ids=docs_ids)
+
+                summary_stream = self._summarize_service.stream_summarize(
+                    use_context=True,
+                    context_filter=context_filter,
+                    instructions=message,
+                )
+                yield from yield_tokens(summary_stream)

    # On initialization and on mode change, this function set the system prompt
    # to the default prompt based on the mode (and user settings).
    @staticmethod
-    def _get_default_system_prompt(mode: str) -> str:
+    def _get_default_system_prompt(mode: Modes) -> str:
        p = ""
        match mode:
            # For query chat mode, obtain default system prompt from settings
-            case "Query Files":
+            case Modes.RAG_MODE:
                p = settings().ui.default_query_system_prompt
            # For chat mode, obtain default system prompt from settings
-            case "LLM Chat (no context from files)":
+            case Modes.BASIC_CHAT_MODE:
                p = settings().ui.default_chat_system_prompt
+            # For summarization mode, obtain default system prompt from settings
+            case Modes.SUMMARIZE_MODE:
+                p = settings().ui.default_summarization_system_prompt
            # For any other mode, clear the system prompt
            case _:
                p = ""
        return p

+    @staticmethod
+    def _get_default_mode_explanation(mode: Modes) -> str:
+        match mode:
+            case Modes.RAG_MODE:
+                return "Get contextualized answers from selected files."
+            case Modes.SEARCH_MODE:
+                return "Find relevant chunks of text in selected files."
+            case Modes.BASIC_CHAT_MODE:
+                return "Chat with the LLM using its training data. Files are ignored."
+            case Modes.SUMMARIZE_MODE:
+                return "Generate a summary of the selected files. Prompt to customize the result."
+            case _:
+                return ""
+
    def _set_system_prompt(self, system_prompt_input: str) -> None:
        logger.info(f"Setting system prompt to: {system_prompt_input}")
        self._system_prompt = system_prompt_input

-    def _set_current_mode(self, mode: str) -> Any:
+    def _set_explanatation_mode(self, explanation_mode: str) -> None:
+        self._explanation_mode = explanation_mode
+
+    def _set_current_mode(self, mode: Modes) -> Any:
        self.mode = mode
        self._set_system_prompt(self._get_default_system_prompt(mode))
-        # Update placeholder and allow interaction if default system prompt is set
-        if self._system_prompt:
-            return gr.update(placeholder=self._system_prompt, interactive=True)
-        # Update placeholder and disable interaction if no default system prompt is set
-        else:
-            return gr.update(placeholder=self._system_prompt, interactive=False)
+        self._set_explanatation_mode(self._get_default_mode_explanation(mode))
+        interactive = self._system_prompt is not None
+        return [
+            gr.update(placeholder=self._system_prompt, interactive=interactive),
+            gr.update(value=self._explanation_mode),
+        ]

    def _list_ingested_files(self) -> list[list[str]]:
        files = set()
@ -314,17 +381,30 @@ class PrivateGptUi:
            ".contain { display: flex !important; flex-direction: column !important; }"
            "#component-0, #component-3, #component-10, #component-8  { height: 100% !important; }"
            "#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
-            "#col { height: calc(100vh - 112px - 16px) !important; }",
+            "#col { height: calc(100vh - 112px - 16px) !important; }"
+            "hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }"
+            ".avatar-image { background-color: antiquewhite; border-radius: 2px; }"
+            ".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }"
+            ".footer-zylon-link { display:flex; margin-left: 5px; text-decoration: auto; color: var(--body-text-color); }"
+            ".footer-zylon-link:hover { color: #C7BAFF; }"
+            ".footer-zylon-ico { height: 20px; margin-left: 5px; background-color: antiquewhite; border-radius: 2px; }",
        ) as blocks:
            with gr.Row():
                gr.HTML(f"<div class='logo'/><img src={logo_svg} alt=PrivateGPT></div")

            with gr.Row(equal_height=False):
                with gr.Column(scale=3):
+                    default_mode = self._default_mode
                    mode = gr.Radio(
-                        MODES,
+                        [mode.value for mode in MODES],
                        label="Mode",
-                        value="Query Files",
+                        value=default_mode,
+                    )
+                    explanation_mode = gr.Textbox(
+                        placeholder=self._get_default_mode_explanation(default_mode),
+                        show_label=False,
+                        max_lines=3,
+                        interactive=False,
                    )
                    upload_button = gr.components.UploadButton(
                        "Upload File(s)",
@ -408,9 +488,11 @@ class PrivateGptUi:
                        interactive=True,
                        render=False,
                    )
-                    # When mode changes, set default system prompt
+                    # When mode changes, set default system prompt, and other stuffs
                    mode.change(
-                        self._set_current_mode, inputs=mode, outputs=system_prompt_input
+                        self._set_current_mode,
+                        inputs=mode,
+                        outputs=[system_prompt_input, explanation_mode],
                    )
                    # On blur, set system prompt to use in queries
                    system_prompt_input.blur(
@ -441,9 +523,11 @@ class PrivateGptUi:
                            "llamacpp": config_settings.llamacpp.llm_hf_model_file,
                            "openai": config_settings.openai.model,
                            "openailike": config_settings.openai.model,
+                            "azopenai": config_settings.azopenai.llm_model,
                            "sagemaker": config_settings.sagemaker.llm_endpoint_name,
                            "mock": llm_mode,
                            "ollama": config_settings.ollama.llm_model,
+                            "gemini": config_settings.gemini.model,
                        }

                        if llm_mode not in model_mapping:
@ -476,6 +560,14 @@ class PrivateGptUi:
                        ),
                        additional_inputs=[mode, upload_button, system_prompt_input],
                    )
+
+            with gr.Row():
+                avatar_byte = AVATAR_BOT.read_bytes()
+                f_base64 = f"data:image/png;base64,{base64.b64encode(avatar_byte).decode('utf-8')}"
+                gr.HTML(
+                    f"<div class='footer'><a class='footer-zylon-link' href='https://zylon.ai/'>Maintained by Zylon <img class='footer-zylon-ico' src='{f_base64}' alt=Zylon></a></div>"
+                )
+
        return blocks

    def get_ui_blocks(self) -> gr.Blocks:
@ -487,7 +579,7 @@ class PrivateGptUi:
        blocks = self.get_ui_blocks()
        blocks.queue()
        logger.info("Mounting the gradio UI, at path=%s", path)
-        gr.mount_gradio_app(app, blocks, path=path)
+        gr.mount_gradio_app(app, blocks, path=path, favicon_path=AVATAR_BOT)


 if __name__ == "__main__":
--- a/private_gpt/utils/ollama.py
+++ b/private_gpt/utils/ollama.py
@ -0,0 +1,95 @@
+import logging
+from collections import deque
+from collections.abc import Iterator, Mapping
+from typing import Any
+
+from httpx import ConnectError
+from tqdm import tqdm  # type: ignore
+
+from private_gpt.utils.retry import retry
+
+try:
+    from ollama import Client, ResponseError  # type: ignore
+except ImportError as e:
+    raise ImportError(
+        "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
+    ) from e
+
+logger = logging.getLogger(__name__)
+
+_MAX_RETRIES = 5
+_JITTER = (3.0, 10.0)
+
+
+@retry(
+    is_async=False,
+    exceptions=(ConnectError, ResponseError),
+    tries=_MAX_RETRIES,
+    jitter=_JITTER,
+    logger=logger,
+)
+def check_connection(client: Client) -> bool:
+    try:
+        client.list()
+        return True
+    except (ConnectError, ResponseError) as e:
+        raise e
+    except Exception as e:
+        logger.error(f"Failed to connect to Ollama: {type(e).__name__}: {e!s}")
+        return False
+
+
+def process_streaming(generator: Iterator[Mapping[str, Any]]) -> None:
+    progress_bars = {}
+    queue = deque()  # type: ignore
+
+    def create_progress_bar(dgt: str, total: int) -> Any:
+        return tqdm(
+            total=total, desc=f"Pulling model {dgt[7:17]}...", unit="B", unit_scale=True
+        )
+
+    current_digest = None
+
+    for chunk in generator:
+        digest = chunk.get("digest")
+        completed_size = chunk.get("completed", 0)
+        total_size = chunk.get("total")
+
+        if digest and total_size is not None:
+            if digest not in progress_bars and completed_size > 0:
+                progress_bars[digest] = create_progress_bar(digest, total=total_size)
+                if current_digest is None:
+                    current_digest = digest
+                else:
+                    queue.append(digest)
+
+            if digest in progress_bars:
+                progress_bar = progress_bars[digest]
+                progress = completed_size - progress_bar.n
+                if completed_size > 0 and total_size >= progress != progress_bar.n:
+                    if digest == current_digest:
+                        progress_bar.update(progress)
+                        if progress_bar.n >= total_size:
+                            progress_bar.close()
+                            current_digest = queue.popleft() if queue else None
+                    else:
+                        # Store progress for later update
+                        progress_bars[digest].total = total_size
+                        progress_bars[digest].n = completed_size
+
+    # Close any remaining progress bars at the end
+    for progress_bar in progress_bars.values():
+        progress_bar.close()
+
+
+def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
+    try:
+        installed_models = [model["name"] for model in client.list().get("models", {})]
+        if model_name not in installed_models:
+            logger.info(f"Pulling model {model_name}. Please wait...")
+            process_streaming(client.pull(model_name, stream=True))
+            logger.info(f"Model {model_name} pulled successfully")
+    except Exception as e:
+        logger.error(f"Failed to pull model {model_name}: {e!s}")
+        if raise_error:
+            raise e
--- a/private_gpt/utils/retry.py
+++ b/private_gpt/utils/retry.py
@ -0,0 +1,31 @@
+import logging
+from collections.abc import Callable
+from typing import Any
+
+from retry_async import retry as retry_untyped  # type: ignore
+
+retry_logger = logging.getLogger(__name__)
+
+
+def retry(
+    exceptions: Any = Exception,
+    *,
+    is_async: bool = False,
+    tries: int = -1,
+    delay: float = 0,
+    max_delay: float | None = None,
+    backoff: float = 1,
+    jitter: float | tuple[float, float] = 0,
+    logger: logging.Logger = retry_logger,
+) -> Callable[..., Any]:
+    wrapped = retry_untyped(
+        exceptions=exceptions,
+        is_async=is_async,
+        tries=tries,
+        delay=delay,
+        max_delay=max_delay,
+        backoff=backoff,
+        jitter=jitter,
+        logger=logger,
+    )
+    return wrapped  # type: ignore
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,78 +1,98 @@
 [tool.poetry]
 name = "private-gpt"
-version = "0.5.0"
+version = "0.7.0"
 description = "Private GPT"
 authors = ["Zylon <hi@zylon.ai>"]

 [tool.poetry.dependencies]
 python = ">=3.11,<3.12"
 # PrivateGPT
-fastapi = { extras = ["all"], version = "^0.110.0" }
-python-multipart = "^0.0.9"
-injector = "^0.21.0"
-pyyaml = "^6.0.1"
-watchdog = "^4.0.0"
-transformers = "^4.38.2"
+fastapi = { extras = ["all"], version = "^0.115.0" }
+python-multipart = "^0.0.10"
+injector = "^0.22.0"
+pyyaml = "^6.0.2"
+watchdog = "^4.0.1"
+transformers = "^4.44.2"
+docx2txt = "^0.8"
+cryptography = "^3.1"
 # LlamaIndex core libs
-llama-index-core = "^0.10.14"
-llama-index-readers-file = "^0.1.6"
+llama-index-core = ">=0.11.2,<0.12.0"
+llama-index-readers-file = "*"
 # Optional LlamaIndex integration libs
-llama-index-llms-llama-cpp = {version = "^0.1.3", optional = true}
-llama-index-llms-openai = {version = "^0.1.6", optional = true}
-llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
-llama-index-llms-ollama = {version ="^0.1.2", optional = true}
-llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
-llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
-llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true}
-llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
-llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
-llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
-llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true}
-llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
-llama-index-storage-docstore-postgres = {version ="^0.1.2", optional = true}
-llama-index-storage-index-store-postgres = {version ="^0.1.2", optional = true}
+llama-index-llms-llama-cpp = {version = "*", optional = true}
+llama-index-llms-openai = {version ="*", optional = true}
+llama-index-llms-openai-like = {version ="*", optional = true}
+llama-index-llms-ollama = {version ="*", optional = true}
+llama-index-llms-azure-openai = {version ="*", optional = true}
+llama-index-llms-gemini = {version ="*", optional = true}
+llama-index-embeddings-ollama = {version ="*", optional = true}
+llama-index-embeddings-huggingface = {version ="*", optional = true}
+llama-index-embeddings-openai = {version ="*", optional = true}
+llama-index-embeddings-azure-openai = {version ="*", optional = true}
+llama-index-embeddings-gemini = {version ="*", optional = true}
+llama-index-embeddings-mistralai = {version ="*", optional = true}
+llama-index-vector-stores-qdrant = {version ="*", optional = true}
+llama-index-vector-stores-milvus = {version ="*", optional = true}
+llama-index-vector-stores-chroma = {version ="*", optional = true}
+llama-index-vector-stores-postgres = {version ="*", optional = true}
+llama-index-vector-stores-clickhouse = {version ="*", optional = true}
+llama-index-storage-docstore-postgres = {version ="*", optional = true}
+llama-index-storage-index-store-postgres = {version ="*", optional = true}
 # Postgres
 psycopg2-binary = {version ="^2.9.9", optional = true}
 asyncpg = {version="^0.29.0", optional = true}

+# ClickHouse
+clickhouse-connect = {version = "^0.7.19", optional = true}
+
 # Optional Sagemaker dependency
-boto3 = {version ="^1.34.51", optional = true}
+boto3 = {version ="^1.35.26", optional = true}

 # Optional Reranker dependencies
-torch = {version ="^2.1.2", optional = true}
-sentence-transformers = {version ="^2.6.1", optional = true}
+torch = {version ="^2.4.1", optional = true}
+sentence-transformers = {version ="^3.1.1", optional = true}

 # Optional UI
-gradio = {version ="^4.19.2", optional = true}
+gradio = {version ="^4.44.0", optional = true}
+ffmpy = {version ="^0.4.0", optional = true}
+
+# Optional HF Transformers
+einops = {version = "^0.8.0", optional = true}
+retry-async = "^0.1.4"

 [tool.poetry.extras]
-ui = ["gradio"]
+ui = ["gradio", "ffmpy"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-openai = ["llama-index-llms-openai"]
 llms-openai-like = ["llama-index-llms-openai-like"]
 llms-ollama = ["llama-index-llms-ollama"]
 llms-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
+llms-gemini = ["llama-index-llms-gemini"]
 embeddings-ollama = ["llama-index-embeddings-ollama"]
-embeddings-huggingface = ["llama-index-embeddings-huggingface"]
+embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
+embeddings-gemini = ["llama-index-embeddings-gemini"]
+embeddings-mistral = ["llama-index-embeddings-mistralai"]
 vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
+vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
 vector-stores-postgres = ["llama-index-vector-stores-postgres"]
+vector-stores-milvus = ["llama-index-vector-stores-milvus"]
 storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
 rerank-sentence-transformers = ["torch", "sentence-transformers"]

 [tool.poetry.group.dev.dependencies]
-black = "^22"
-mypy = "^1.2"
-pre-commit = "^2"
-pytest = "^7"
-pytest-cov = "^3"
+black = "^24"
+mypy = "^1.11"
+pre-commit = "^3"
+pytest = "^8"
+pytest-cov = "^5"
 ruff = "^0"
-pytest-asyncio = "^0.21.1"
-types-pyyaml = "^6.0.12.12"
+pytest-asyncio = "^0.24.0"
+types-pyyaml = "^6.0.12.20240917"

 [build-system]
 requires = ["poetry-core>=1.0.0"]
@ -100,7 +120,7 @@ target-version = ['py311']
 target-version = 'py311'

 # See all rules at https://beta.ruff.rs/docs/rules/
-select = [
+lint.select = [
    "E", # pycodestyle
    "W", # pycodestyle
    "F", # Pyflakes
@ -117,7 +137,7 @@ select = [
    "RUF", # Ruff-specific rules
 ]

-ignore = [
+lint.ignore = [
    "E501", # "Line too long"
    # -> line length already regulated by black
    "PT011", # "pytest.raises() should specify expected exception"
@ -135,24 +155,24 @@ ignore = [
    # -> "Missing docstring in public function too restrictive"
 ]

-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 # Automatically disable rules that are incompatible with Google docstring convention
 convention = "google"

-[tool.ruff.pycodestyle]
+[tool.ruff.lint.pycodestyle]
 max-doc-length = 88

-[tool.ruff.flake8-tidy-imports]
+[tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "all"

-[tool.ruff.flake8-type-checking]
+[tool.ruff.lint.flake8-type-checking]
 strict = true
 runtime-evaluated-base-classes = ["pydantic.BaseModel"]
 # Pydantic needs to be able to evaluate types at runtime
 # see https://pypi.org/project/flake8-type-checking/ for flake8-type-checking documentation
 # see https://beta.ruff.rs/docs/settings/#flake8-type-checking-runtime-evaluated-base-classes for ruff documentation

-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 # Allow missing docstrings for tests
 "tests/**/*.py" = ["D1"]

--- a/scripts/ingest_folder.py
+++ b/scripts/ingest_folder.py
@ -7,12 +7,13 @@ from pathlib import Path
 from private_gpt.di import global_injector
 from private_gpt.server.ingest.ingest_service import IngestService
 from private_gpt.server.ingest.ingest_watcher import IngestWatcher
+from private_gpt.settings.settings import Settings

 logger = logging.getLogger(__name__)


 class LocalIngestWorker:
-    def __init__(self, ingest_service: IngestService) -> None:
+    def __init__(self, ingest_service: IngestService, setting: Settings) -> None:
        self.ingest_service = ingest_service

        self.total_documents = 0
@ -20,6 +21,24 @@ class LocalIngestWorker:

        self._files_under_root_folder: list[Path] = []

+        self.is_local_ingestion_enabled = setting.data.local_ingestion.enabled
+        self.allowed_local_folders = setting.data.local_ingestion.allow_ingest_from
+
+    def _validate_folder(self, folder_path: Path) -> None:
+        if not self.is_local_ingestion_enabled:
+            raise ValueError(
+                "Local ingestion is disabled."
+                "You can enable it in settings `ingestion.enabled`"
+            )
+
+        # Allow all folders if wildcard is present
+        if "*" in self.allowed_local_folders:
+            return
+
+        for allowed_folder in self.allowed_local_folders:
+            if not folder_path.is_relative_to(allowed_folder):
+                raise ValueError(f"Folder {folder_path} is not allowed for ingestion")
+
    def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None:
        """Search all files under the root folder recursively.

@ -28,6 +47,7 @@ class LocalIngestWorker:
        for file_path in root_path.iterdir():
            if file_path.is_file() and file_path.name not in ignored:
                self.total_documents += 1
+                self._validate_folder(file_path)
                self._files_under_root_folder.append(file_path)
            elif file_path.is_dir() and file_path.name not in ignored:
                self._find_all_files_in_folder(file_path, ignored)
@ -92,13 +112,13 @@ if args.log_file:
    logger.addHandler(file_handler)

 if __name__ == "__main__":
-
    root_path = Path(args.folder)
    if not root_path.exists():
        raise ValueError(f"Path {args.folder} does not exist")

    ingest_service = global_injector.get(IngestService)
-    worker = LocalIngestWorker(ingest_service)
+    settings = global_injector.get(Settings)
+    worker = LocalIngestWorker(ingest_service, settings)
    worker.ingest_folder(root_path, args.ignored)

    if args.ignored:
--- a/scripts/setup
+++ b/scripts/setup
@ -24,6 +24,7 @@ snapshot_download(
    repo_id=settings().huggingface.embedding_hf_model_name,
    cache_dir=models_cache_path,
    local_dir=embedding_path,
+    token=settings().huggingface.access_token,
 )
 print("Embedding model downloaded!")

@ -35,15 +36,18 @@ hf_hub_download(
    cache_dir=models_cache_path,
    local_dir=models_path,
    resume_download=resume_download,
+    token=settings().huggingface.access_token,
 )
 print("LLM model downloaded!")

 # Download Tokenizer
-print(f"Downloading tokenizer {settings().llm.tokenizer}")
-AutoTokenizer.from_pretrained(
+if settings().llm.tokenizer:
+    print(f"Downloading tokenizer {settings().llm.tokenizer}")
+    AutoTokenizer.from_pretrained(
        pretrained_model_name_or_path=settings().llm.tokenizer,
        cache_dir=models_cache_path,
-)
-print("Tokenizer downloaded!")
+        token=settings().huggingface.access_token,
+    )
+    print("Tokenizer downloaded!")

 print("Setup done")
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -6,29 +6,31 @@ llm:
  mode: ${PGPT_MODE:mock}

 embedding:
-  mode: ${PGPT_MODE:sagemaker}
+  mode: ${PGPT_EMBED_MODE:mock}

 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}

 huggingface:
-  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:nomic-ai/nomic-embed-text-v1.5}

 sagemaker:
  llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
  embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}

 ollama:
-  llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
+  llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3.1}
  embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
  api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
+  embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
  tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
  top_k: ${PGPT_OLLAMA_TOP_K:40}
  top_p: ${PGPT_OLLAMA_TOP_P:0.9}
  repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
  repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
  request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
+  autopull_models: ${PGPT_OLLAMA_AUTOPULL_MODELS:true}

 ui:
  enabled: true
--- a/settings-gemini.yaml
+++ b/settings-gemini.yaml
@ -0,0 +1,10 @@
+llm:
+  mode: gemini
+
+embedding:
+  mode: gemini
+
+gemini:
+  api_key: ${GOOGLE_API_KEY:}
+  model: models/gemini-pro
+  embedding_model: models/embedding-001
--- a/settings-local.yaml
+++ b/settings-local.yaml
@ -7,18 +7,18 @@ llm:
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
-  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
+  prompt_style: "llama3"

 llamacpp:
-  prompt_style: "mistral"
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
+  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
+  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf

 embedding:
  mode: huggingface

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5

 vectorstore:
  database: qdrant
--- a/settings-ollama-pg.yaml
+++ b/settings-ollama-pg.yaml
@ -14,7 +14,7 @@ embedding:
  embed_dim: 768

 ollama:
-  llm_model: mistral
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434

--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@ -11,7 +11,7 @@ embedding:
  mode: ollama

 ollama:
-  llm_model: mistral
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
--- a/settings-vllm.yaml
+++ b/settings-vllm.yaml
@ -3,15 +3,19 @@ server:

 llm:
  mode: openailike
+  max_new_tokens: 512
+  tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
+  temperature: 0.1

 embedding:
  mode: huggingface
  ingest_mode: simple

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5

 openai:
  api_base: http://localhost:8000/v1
  api_key: EMPTY
  model: facebook/opt-125m
+  request_timeout: 600.0
--- a/settings.yaml
+++ b/settings.yaml
@ -5,7 +5,7 @@ server:
  env_name: ${APP_ENV:prod}
  port: ${PORT:8001}
  cors:
-    enabled: false
+    enabled: true
    allow_origins: ["*"]
    allow_methods: ["*"]
    allow_headers: ["*"]
@ -17,11 +17,16 @@ server:
    secret: "Basic c2VjcmV0OmtleQ=="

 data:
+  local_ingestion:
+    enabled: ${LOCAL_INGESTION_ENABLED:false}
+    allow_ingest_from: ["*"]
  local_data_folder: local_data/private_gpt

 ui:
  enabled: true
  path: /
+  # "RAG", "Search", "Basic", or "Summarize"
+  default_mode: "RAG"
  default_chat_system_prompt: >
    You are a helpful, respectful and honest assistant.
    Always answer as helpfully as possible and follow ALL given instructions.
@ -31,15 +36,24 @@ ui:
    You can only answer questions about the provided context.
    If you know the answer but it is not based in the provided context, don't provide
    the answer, just state the answer is not in the context provided.
+  default_summarization_system_prompt: >
+    Provide a comprehensive summary of the provided context information.
+    The summary should cover all the key points and main ideas presented in
+    the original text, while also condensing the information into a concise
+    and easy-to-understand format. Please ensure that the summary includes
+    relevant details and examples that support the main ideas, while avoiding
+    any unnecessary information or repetition.
  delete_file_button_enabled: true
  delete_all_files_button_enabled: true

 llm:
  mode: llamacpp
+  prompt_style: "llama3"
  # Should be matching the selected model
  max_new_tokens: 512
  context_window: 3900
-  tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+  # Select your tokenizer. Llama-index tokenizer is the default.
+  # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

 rag:
@ -52,10 +66,19 @@ rag:
    model: cross-encoder/ms-marco-MiniLM-L-2-v2
    top_n: 1

+summarize:
+  use_async: true
+
+clickhouse:
+    host: localhost
+    port: 8443
+    username: admin
+    password: clickhouse
+    database: embeddings
+
 llamacpp:
-  prompt_style: "mistral"
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
+  llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
+  llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
  tfs_z: 1.0            # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
  top_k: 40             # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
  top_p: 1.0            # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
@ -65,10 +88,14 @@ embedding:
  # Should be matching the value above in most cases
  mode: huggingface
  ingest_mode: simple
-  embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
+  embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5

 huggingface:
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5
+  access_token: ${HF_TOKEN:}
+  # Warning: Enabling this option will allow the model to download and execute code from the internet.
+  # Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model.
+  trust_remote_code: true

 vectorstore:
  database: qdrant
@ -76,6 +103,11 @@ vectorstore:
 nodestore:
  database: simple

+milvus:
+  uri: local_data/private_gpt/milvus/milvus_local.db
+  collection_name: milvus_db
+  overwrite: false
+
 qdrant:
  path: local_data/private_gpt/qdrant

@ -94,14 +126,16 @@ sagemaker:
 openai:
  api_key: ${OPENAI_API_KEY:}
  model: gpt-3.5-turbo
+  embedding_api_key: ${OPENAI_API_KEY:}

 ollama:
-  llm_model: llama2
+  llm_model: llama3.1
  embedding_model: nomic-embed-text
  api_base: http://localhost:11434
  embedding_api_base: http://localhost:11434  # change if your embedding model runs on another ollama
  keep_alive: 5m
  request_timeout: 120.0
+  autopull_models: true

 azopenai:
  api_key: ${AZ_OPENAI_API_KEY:}
@ -111,3 +145,8 @@ azopenai:
  api_version: "2023-05-15"
  embedding_model: text-embedding-ada-002
  llm_model: gpt-35-turbo
+
+gemini:
+  api_key: ${GOOGLE_API_KEY:}
+  model: models/gemini-pro
+  embedding_model: models/embedding-001
--- a/tests/fixtures/fast_api_test_client.py
+++ b/tests/fixtures/fast_api_test_client.py
@ -5,7 +5,7 @@ from private_gpt.launcher import create_app
 from tests.fixtures.mock_injector import MockInjector


-@pytest.fixture()
+@pytest.fixture
 def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
    if request is not None and hasattr(request, "param"):
        injector.bind_settings(request.param or {})
--- a/tests/fixtures/ingest_helper.py
+++ b/tests/fixtures/ingest_helper.py
@ -19,6 +19,6 @@ class IngestHelper:
        return ingest_result


-@pytest.fixture()
+@pytest.fixture
 def ingest_helper(test_client: TestClient) -> IngestHelper:
    return IngestHelper(test_client)
--- a/tests/fixtures/mock_injector.py
+++ b/tests/fixtures/mock_injector.py
@ -37,6 +37,6 @@ class MockInjector:
        return self.test_injector.get(interface)


-@pytest.fixture()
+@pytest.fixture
 def injector() -> MockInjector:
    return MockInjector()
--- a/tests/server/ingest/test_local_ingest.py
+++ b/tests/server/ingest/test_local_ingest.py
@ -0,0 +1,74 @@
+import os
+import subprocess
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+@pytest.fixture
+def file_path() -> str:
+    return "test.txt"
+
+
+def create_test_file(file_path: str) -> None:
+    with open(file_path, "w") as f:
+        f.write("test")
+
+
+def clear_log_file(log_file_path: str) -> None:
+    if Path(log_file_path).exists():
+        os.remove(log_file_path)
+
+
+def read_log_file(log_file_path: str) -> str:
+    with open(log_file_path) as f:
+        return f.read()
+
+
+def init_structure(folder: str, file_path: str) -> None:
+    clear_log_file(file_path)
+    os.makedirs(folder, exist_ok=True)
+    create_test_file(f"{folder}/${file_path}")
+
+
+def test_ingest_one_file_in_allowed_folder(
+    file_path: str, test_client: TestClient
+) -> None:
+    allowed_folder = "local_data/tests/allowed_folder"
+    init_structure(allowed_folder, file_path)
+
+    test_env = os.environ.copy()
+    test_env["PGPT_PROFILES"] = "test"
+    test_env["LOCAL_INGESTION_ENABLED"] = "True"
+
+    result = subprocess.run(
+        ["python", "scripts/ingest_folder.py", allowed_folder],
+        capture_output=True,
+        text=True,
+        env=test_env,
+    )
+
+    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
+    response_after = test_client.get("/v1/ingest/list")
+
+    count_ingest_after = len(response_after.json()["data"])
+    assert count_ingest_after > 0, "No documents were ingested"
+
+
+def test_ingest_disabled(file_path: str) -> None:
+    allowed_folder = "local_data/tests/allowed_folder"
+    init_structure(allowed_folder, file_path)
+
+    test_env = os.environ.copy()
+    test_env["PGPT_PROFILES"] = "test"
+    test_env["LOCAL_INGESTION_ENABLED"] = "False"
+
+    result = subprocess.run(
+        ["python", "scripts/ingest_folder.py", allowed_folder],
+        capture_output=True,
+        text=True,
+        env=test_env,
+    )
+
+    assert result.returncode != 0, f"Script failed with error: {result.stderr}"
--- a/tests/server/recipes/test_summarize_router.py
+++ b/tests/server/recipes/test_summarize_router.py
@ -0,0 +1,159 @@
+from fastapi.testclient import TestClient
+
+from private_gpt.server.recipes.summarize.summarize_router import (
+    SummarizeBody,
+    SummarizeResponse,
+)
+
+
+def test_summarize_route_produces_a_stream(test_client: TestClient) -> None:
+    body = SummarizeBody(
+        text="Test",
+        stream=True,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    raw_events = response.text.split("\n\n")
+    events = [
+        item.removeprefix("data: ") for item in raw_events if item.startswith("data: ")
+    ]
+    assert response.status_code == 200
+    assert "text/event-stream" in response.headers["content-type"]
+    assert len(events) > 0
+    assert events[-1] == "[DONE]"
+
+
+def test_summarize_route_produces_a_single_value(test_client: TestClient) -> None:
+    body = SummarizeBody(
+        text="test",
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    # No asserts, if it validates it's good
+    SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+
+
+def test_summarize_with_document_context(test_client: TestClient) -> None:
+    # Ingest an document
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name",
+            "text": "Lorem ipsum dolor sit amet",
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={"docs_ids": [doc["doc_id"] for doc in ingested_docs]},
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("Lorem ipsum dolor sit amet") != -1
+
+
+def test_summarize_with_non_existent_document_context_not_fails(
+    test_client: TestClient,
+) -> None:
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={
+            "docs_ids": ["non-existent-doc-id"],
+        },
+        stream=False,
+    )
+
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("Empty Response") != -1
+
+
+def test_summarize_with_metadata_and_document_context(test_client: TestClient) -> None:
+    docs = []
+
+    # Ingest a first document
+    document_1_content = "Content of document 1"
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name_1",
+            "text": document_1_content,
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+    docs += ingested_docs
+
+    # Ingest a second document
+    document_2_content = "Text of document 2"
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name_2",
+            "text": document_2_content,
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+    docs += ingested_docs
+
+    # Completions with the first document's id and the second document's metadata
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={"docs_ids": [doc["doc_id"] for doc in docs]},
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # Assert both documents are part of the used sources
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find(document_1_content) != -1
+    assert completion.summary.find(document_2_content) != -1
+
+
+def test_summarize_with_prompt(test_client: TestClient) -> None:
+    ingest_response = test_client.post(
+        "/v1/ingest/text",
+        json={
+            "file_name": "file_name",
+            "text": "Lorem ipsum dolor sit amet",
+        },
+    )
+    assert ingest_response.status_code == 200
+    ingested_docs = ingest_response.json()["data"]
+    assert len(ingested_docs) == 1
+
+    body = SummarizeBody(
+        use_context=True,
+        context_filter={
+            "docs_ids": [doc["doc_id"] for doc in ingested_docs],
+        },
+        prompt="This is a custom summary prompt, 54321",
+        stream=False,
+    )
+    response = test_client.post("/v1/summarize", json=body.model_dump())
+
+    completion: SummarizeResponse = SummarizeResponse.model_validate(response.json())
+    assert response.status_code == 200
+    # We can check the content of the completion, because mock LLM used in tests
+    # always echoes the prompt. In the case of summary, the input context is passed.
+    assert completion.summary.find("This is a custom summary prompt, 54321") != -1
--- a/tests/test_prompt_helper.py
+++ b/tests/test_prompt_helper.py
@ -5,6 +5,7 @@ from private_gpt.components.llm.prompt_helper import (
    ChatMLPromptStyle,
    DefaultPromptStyle,
    Llama2PromptStyle,
+    Llama3PromptStyle,
    MistralPromptStyle,
    TagPromptStyle,
    get_prompt_style,
@ -69,17 +70,21 @@ def test_tag_prompt_style_format_with_system_prompt():
 def test_mistral_prompt_style_format():
    prompt_style = MistralPromptStyle()
    messages = [
-        ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM),
-        ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
+        ChatMessage(content="A", role=MessageRole.SYSTEM),
+        ChatMessage(content="B", role=MessageRole.USER),
    ]
-
-    expected_prompt = (
-        "<s>[INST] You are an AI assistant. [/INST]</s>"
-        "[INST] Hello, how are you doing? [/INST]"
-    )
-
+    expected_prompt = "<s>[INST] A\nB [/INST]"
    assert prompt_style.messages_to_prompt(messages) == expected_prompt

+    messages2 = [
+        ChatMessage(content="A", role=MessageRole.SYSTEM),
+        ChatMessage(content="B", role=MessageRole.USER),
+        ChatMessage(content="C", role=MessageRole.ASSISTANT),
+        ChatMessage(content="D", role=MessageRole.USER),
+    ]
+    expected_prompt2 = "<s>[INST] A\nB [/INST] C</s><s>[INST] D [/INST]"
+    assert prompt_style.messages_to_prompt(messages2) == expected_prompt2
+

 def test_chatml_prompt_style_format():
    prompt_style = ChatMLPromptStyle()
@ -135,3 +140,57 @@ def test_llama2_prompt_style_with_system_prompt():
    )

    assert prompt_style.messages_to_prompt(messages) == expected_prompt
+
+
+def test_llama3_prompt_style_format():
+    prompt_style = Llama3PromptStyle()
+    messages = [
+        ChatMessage(content="You are a helpful assistant", role=MessageRole.SYSTEM),
+        ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
+    ]
+
+    expected_prompt = (
+        "<|start_header_id|>system<|end_header_id|>\n\n"
+        "You are a helpful assistant<|eot_id|>"
+        "<|start_header_id|>user<|end_header_id|>\n\n"
+        "Hello, how are you doing?<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>\n\n"
+    )
+
+    assert prompt_style.messages_to_prompt(messages) == expected_prompt
+
+
+def test_llama3_prompt_style_with_default_system():
+    prompt_style = Llama3PromptStyle()
+    messages = [
+        ChatMessage(content="Hello!", role=MessageRole.USER),
+    ]
+    expected = (
+        "<|start_header_id|>system<|end_header_id|>\n\n"
+        f"{prompt_style.DEFAULT_SYSTEM_PROMPT}<|eot_id|>"
+        "<|start_header_id|>user<|end_header_id|>\n\nHello!<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>\n\n"
+    )
+    assert prompt_style._messages_to_prompt(messages) == expected
+
+
+def test_llama3_prompt_style_with_assistant_response():
+    prompt_style = Llama3PromptStyle()
+    messages = [
+        ChatMessage(content="You are a helpful assistant", role=MessageRole.SYSTEM),
+        ChatMessage(content="What is the capital of France?", role=MessageRole.USER),
+        ChatMessage(
+            content="The capital of France is Paris.", role=MessageRole.ASSISTANT
+        ),
+    ]
+
+    expected_prompt = (
+        "<|start_header_id|>system<|end_header_id|>\n\n"
+        "You are a helpful assistant<|eot_id|>"
+        "<|start_header_id|>user<|end_header_id|>\n\n"
+        "What is the capital of France?<|eot_id|>"
+        "<|start_header_id|>assistant<|end_header_id|>\n\n"
+        "The capital of France is Paris.<|eot_id|>"
+    )
+
+    assert prompt_style.messages_to_prompt(messages) == expected_prompt
--- a/version.txt
+++ b/version.txt
@ -1 +1 @@
-0.5.0
+0.7.0
 @ -1 +1 @@
 .5.0
 .7.0