diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 461fd87500..f67990afd9 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -2,7 +2,7 @@ # Run nvidia-smi to check the CUDA version and the corresponding driver version # Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags -FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS base +FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base # 1. Install dependencies only when needed FROM base AS builder @@ -73,7 +73,7 @@ RUN npm install -g serve@latest EXPOSE 1337 3000 3928 -ENV LD_LIBRARY_PATH=/usr/local/cuda-12.0/targets/x86_64-linux/lib:/usr/local/cuda-12.0/compat${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} +ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda-12.0/compat${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} ENV JAN_API_HOST 0.0.0.0 ENV JAN_API_PORT 1337 diff --git a/README.md b/README.md index 934fa2da83..e839f37cb6 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ This will build the app MacOS m1/m2 for production (with code signing already do - **Option 1**: Run Jan in CPU mode ```bash - docker compose --profile cpu up + docker compose --profile cpu up -d ``` - **Option 2**: Run Jan in GPU mode @@ -274,18 +274,19 @@ This will build the app MacOS m1/m2 for production (with code signing already do |=======================================================================================| ``` - - **Step 2**: Go to https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags and find the lates minor version of image tag that matches the cuda version from the output of `nvidia-smi` (e.g. 12.1 -> 12.1.0) + - **Step 2**: Go to https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags and find the smallest minor version of image tag that matches the cuda version from the output of `nvidia-smi` (e.g. 12.1 -> 12.1.0) - - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) + - **Step 3**: Update the `Dockerfile.gpu` line number 5 with the latest minor version of the image tag from step 2 (e.g. change `FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base` to `FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 AS base`) - **Step 4**: Run command to start Jan in GPU mode ```bash # GPU mode - docker compose --profile gpu up + docker compose --profile gpu up -d ``` This will start the web server and you can access Jan at `http://localhost:3000`. + > Note: Currently, Docker mode is only work for development and localhost, production is not supported yet. RAG feature is not supported in Docker mode yet. ## Acknowledgements diff --git a/docker-compose.yml b/docker-compose.yml index fd0f44096d..4195a32940 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,9 @@ +# Docker Compose file for setting up Minio, createbuckets, app_cpu, and app_gpu services + version: '3.7' services: + # Minio service for object storage minio: image: minio/minio volumes: @@ -9,6 +12,7 @@ services: - "9000:9000" - "9001:9001" environment: + # Set the root user and password for Minio MINIO_ROOT_USER: minioadmin # This acts as AWS_ACCESS_KEY MINIO_ROOT_PASSWORD: minioadmin # This acts as AWS_SECRET_ACCESS_KEY command: server --console-address ":9001" /data @@ -22,6 +26,7 @@ services: vpcbr: ipv4_address: 10.5.0.2 + # createbuckets service to create a bucket and set its policy createbuckets: image: minio/mc depends_on: @@ -36,7 +41,7 @@ services: networks: vpcbr: - + # app_cpu service for running the CPU version of the application app_cpu: image: jan:latest volumes: @@ -45,6 +50,7 @@ services: context: . dockerfile: Dockerfile environment: + # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_cpu AWS_ACCESS_KEY_ID: minioadmin AWS_SECRET_ACCESS_KEY: minioadmin S3_BUCKET_NAME: mybucket @@ -60,8 +66,8 @@ services: networks: vpcbr: ipv4_address: 10.5.0.3 - + # app_gpu service for running the GPU version of the application app_gpu: deploy: resources: @@ -78,12 +84,12 @@ services: dockerfile: Dockerfile.gpu restart: always environment: + # Set the AWS access key, secret access key, bucket name, endpoint, and region for app_gpu AWS_ACCESS_KEY_ID: minioadmin AWS_SECRET_ACCESS_KEY: minioadmin S3_BUCKET_NAME: mybucket AWS_ENDPOINT: http://10.5.0.2:9000 AWS_REGION: us-east-1 - profiles: - gpu ports: @@ -106,5 +112,6 @@ networks: - subnet: 10.5.0.0/16 gateway: 10.5.0.1 -# docker compose --profile cpu up -# docker compose --profile gpu up +# Usage: +# - Run 'docker-compose --profile cpu up -d' to start the app_cpu service +# - Run 'docker-compose --profile gpu up -d' to start the app_gpu service