private-gpt/.neuro/live.yaml
2024-10-31 17:48:13 +02:00

122 lines
3.4 KiB
YAML

kind: live
title: private-gpt
# other files from https://github.com/zylon-ai/private-gpt
defaults:
life_span: 5d
images:
privategpt:
ref: image:$[[ project.id ]]:v1
dockerfile: $[[ flow.workspace ]]/Dockerfile.external
context: $[[ flow.workspace ]]/
build_preset: cpu-large
volumes:
cache:
remote: storage:$[[ flow.project_id ]]/cache
mount: /root/.cache/huggingface
local: cache
data:
remote: storage:$[[ flow.project_id ]]/data
mount: /home/worker/app/local_data
local: local_data
pgdata:
remote: storage:$[[ flow.project_id ]]/pgdata
mount: /var/lib/postgresql/data
local: pgdata
pgdata_onprem:
remote: disk:pgdata
mount: /var/lib/postgresql/data
ollama_models:
remote: storage:$[[ flow.project_id ]]/ollama_models
mount: /root/.ollama
local: models
project:
remote: storage:$[[ flow.project_id ]]
mount: /project
local: .
settings:
remote: storage:$[[ flow.project_id ]]/settings
mount: /home/worker/app/settings
local: settings
tiktoken_cache:
remote: storage:$[[ flow.project_id ]]/tiktoken_cache
mount: /home/worker/app/tiktoken_cache
local: tiktoken_cache
jobs:
pgpt:
image: ${{ images.privategpt.ref }}
name: pgpt
preset: cpu-small
http_port: "8080"
# detach: true
browse: true
volumes:
- ${{ volumes.data.ref_rw }}
- ${{ upload(volumes.settings).ref_rw }}
- ${{ volumes.tiktoken_cache.ref_rw }}
env:
PORT: 8080
PGPT_PROFILES: vllm-pgvector
PGPT_SETTINGS_FOLDER: ${{ volumes.settings.mount }}
VLLM_API_BASE: http://${{ inspect_job('vllm').internal_hostname_named }}:8000/v1
OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434
POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }}
VLLM_MODEL: meta-llama/Meta-Llama-3.1-8B-Instruct
VLLM_TOKENIZER: meta-llama/Meta-Llama-3.1-8B-Instruct
HUGGINGFACE_TOKEN: secret:HF_TOKEN
vllm:
image: vllm/vllm-openai:v0.6.1.post2
name: vllm
preset: H100x1
detach: true
http_port: "8000"
volumes:
- ${{ volumes.cache.ref_rw }}
env:
HF_TOKEN: secret:HF_TOKEN
cmd: >
--model meta-llama/Meta-Llama-3.1-8B-Instruct
--tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
--dtype=half
# cmd: >
# --model meta-llama/Meta-Llama-3.1-8B-Instruct
# --tokenizer meta-llama/Meta-Llama-3.1-8B-Instruct
# --dtype=half
# cmd: >
# --model TechxGenus/Meta-Llama-3-70B-AWQ
# --tokenizer TechxGenus/Meta-Llama-3-70B-AWQ
# -q=awq
# cmd: >
# --model mgoin/Meta-Llama-3-70B-Instruct-Marlin
# --tokenizer mgoin/Meta-Llama-3-70B-Instruct-Marlin
# --dtype=half
# -q=marlin
ollama:
image: ollama/ollama:0.1.35
volumes:
- ${{ volumes.ollama_models.ref_rw }}
preset: H100x1
detach: true
env:
MODEL: "nomic-embed-text"
http_port: "11434"
entrypoint: "bash -c 'ollama serve & sleep 10 && ollama pull ${MODEL} && sleep infinity'"
pgvector:
image: pgvector/pgvector:pg16
detach: true
preset: cpu-small
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
PGDATA: ${{ volumes.pgdata.mount }}/pgdata
volumes:
- ${{ volumes.pgdata.ref_rw }}
# - ${{ volumes.pgdata_onprem.ref_rw }}