diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index b41c1e6..26fc201 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -137,6 +137,8 @@ Follow these steps to set up a local TensorRT-powered PrivateGPT: - Nvidia Cuda 12.2 or higher is currently required to run TensorRT-LLM. +- Install tensorrt_llm via pip with pip install --no-cache-dir --extra-index-url https://pypi.nvidia.com tensorrt-llm as explained [here](https://pypi.org/project/tensorrt-llm/) + - For this example we will use Llama2. The Llama2 model files need to be created via scripts following the instructions [here](https://github.com/NVIDIA/trt-llm-rag-windows/blob/release/1.0/README.md#building-trt-engine). The following files will be created from following the steps in the link: diff --git a/poetry.lock b/poetry.lock index dbe2822..ebe0ffb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4906,16 +4906,6 @@ files = [ [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] -[[package]] -name = "tensorrt-llm" -version = "0.8.0" -description = "A fake package to warn the user they are not installing the correct package." -optional = true -python-versions = ">=3.7, <4" -files = [ - {file = "tensorrt-llm-0.8.0.tar.gz", hash = "sha256:8bd59bf59766bb16f81bd330ca38765a532a21a35d323fd33929c80a6ec53eaf"}, -] - [[package]] name = "tiktoken" version = "0.5.2" @@ -5971,7 +5961,7 @@ embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] -llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt", "tensorrt_llm"] +llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt"] llms-ollama = ["llama-index-llms-ollama"] llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] @@ -5984,4 +5974,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "da01e96bb8eb18aa3b6608cf60384771ad674b6ec7a26a685a62274c0302c8f9" +content-hash = "39f0ac666402807cde29f763c14dfb6b2fc9862c0cd31de398c67a1fedbb4b12" diff --git a/pyproject.toml b/pyproject.toml index 5db0d63..82c8775 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,6 @@ llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true} llama-index-llms-nvidia-tensorrt = {version ="^0.1.2", optional = true} # Optional Sagemaker dependency boto3 = {version ="^1.34.51", optional = true} -# Optional Nvidia TensorRT dependency -tensorrt_llm = {version ="^0.8.0", optional = true} # Optional UI gradio = {version ="^4.19.2", optional = true} @@ -41,7 +39,7 @@ llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] llms-ollama = ["llama-index-llms-ollama"] llms-sagemaker = ["boto3"] -llms-nvidia-tensorrt = ["tensorrt_llm", "llama-index-llms-nvidia-tensorrt"] +llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"]