mirror of
https://github.com/zylon-ai/private-gpt.git
synced 2025-12-22 07:40:12 +01:00
122 lines
3.4 KiB
YAML
122 lines
3.4 KiB
YAML
kind: live
|
|
title: private-gpt
|
|
|
|
# other files from https://github.com/zylon-ai/private-gpt
|
|
|
|
defaults:
|
|
life_span: 5d
|
|
|
|
images:
|
|
privategpt:
|
|
ref: image:$[[ project.id ]]:v1
|
|
dockerfile: $[[ flow.workspace ]]/Dockerfile.external
|
|
context: $[[ flow.workspace ]]/
|
|
build_preset: cpu-large
|
|
|
|
volumes:
|
|
cache:
|
|
remote: storage:$[[ flow.project_id ]]/cache
|
|
mount: /root/.cache/huggingface
|
|
local: cache
|
|
data:
|
|
remote: storage:$[[ flow.project_id ]]/data
|
|
mount: /home/worker/app/local_data
|
|
local: local_data
|
|
pgdata:
|
|
remote: storage:$[[ flow.project_id ]]/pgdata
|
|
mount: /var/lib/postgresql/data
|
|
local: pgdata
|
|
pgdata_onprem:
|
|
remote: disk:pgdata
|
|
mount: /var/lib/postgresql/data
|
|
ollama_models:
|
|
remote: storage:$[[ flow.project_id ]]/ollama_models
|
|
mount: /root/.ollama
|
|
local: models
|
|
project:
|
|
remote: storage:$[[ flow.project_id ]]
|
|
mount: /project
|
|
local: .
|
|
settings:
|
|
remote: storage:$[[ flow.project_id ]]/settings
|
|
mount: /home/worker/app/settings
|
|
local: settings
|
|
tiktoken_cache:
|
|
remote: storage:$[[ flow.project_id ]]/tiktoken_cache
|
|
mount: /home/worker/app/tiktoken_cache
|
|
local: tiktoken_cache
|
|
|
|
jobs:
|
|
pgpt:
|
|
image: ${{ images.privategpt.ref }}
|
|
name: pgpt
|
|
preset: cpu-small
|
|
http_port: "8080"
|
|
# detach: true
|
|
browse: true
|
|
volumes:
|
|
- ${{ volumes.data.ref_rw }}
|
|
- ${{ upload(volumes.settings).ref_rw }}
|
|
- ${{ volumes.tiktoken_cache.ref_rw }}
|
|
env:
|
|
PORT: 8080
|
|
PGPT_PROFILES: vllm-pgvector
|
|
PGPT_SETTINGS_FOLDER: ${{ volumes.settings.mount }}
|
|
VLLM_API_BASE: http://${{ inspect_job('vllm').internal_hostname_named }}:8000/v1
|
|
OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434
|
|
POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }}
|
|
VLLM_MODEL: meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
VLLM_TOKENIZER: meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
HUGGINGFACE_TOKEN: secret:HF_TOKEN
|
|
|
|
vllm:
|
|
image: vllm/vllm-openai:v0.6.1.post2
|
|
name: vllm
|
|
preset: H100x1
|
|
detach: true
|
|
http_port: "8000"
|
|
volumes:
|
|
- ${{ volumes.cache.ref_rw }}
|
|
env:
|
|
HF_TOKEN: secret:HF_TOKEN
|
|
cmd: >
|
|
--model meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
--tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
--dtype=half
|
|
# cmd: >
|
|
# --model meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
# --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
# --dtype=half
|
|
# cmd: >
|
|
# --model TechxGenus/Meta-Llama-3-70B-AWQ
|
|
# --tokenizer TechxGenus/Meta-Llama-3-70B-AWQ
|
|
# -q=awq
|
|
# cmd: >
|
|
# --model mgoin/Meta-Llama-3-70B-Instruct-Marlin
|
|
# --tokenizer mgoin/Meta-Llama-3-70B-Instruct-Marlin
|
|
# --dtype=half
|
|
# -q=marlin
|
|
|
|
ollama:
|
|
image: ollama/ollama:0.1.35
|
|
volumes:
|
|
- ${{ volumes.ollama_models.ref_rw }}
|
|
preset: H100x1
|
|
detach: true
|
|
env:
|
|
MODEL: "nomic-embed-text"
|
|
http_port: "11434"
|
|
entrypoint: "bash -c 'ollama serve & sleep 10 && ollama pull ${MODEL} && sleep infinity'"
|
|
|
|
pgvector:
|
|
image: pgvector/pgvector:pg16
|
|
detach: true
|
|
preset: cpu-small
|
|
env:
|
|
POSTGRES_USER: postgres
|
|
POSTGRES_PASSWORD: postgres
|
|
POSTGRES_DB: postgres
|
|
PGDATA: ${{ volumes.pgdata.mount }}/pgdata
|
|
volumes:
|
|
- ${{ volumes.pgdata.ref_rw }}
|
|
# - ${{ volumes.pgdata_onprem.ref_rw }}
|