build: Make target improvements (ollama#7499)

* llama: wire up builtin runner This adds a new entrypoint into the ollama CLI to run the cgo built runner. On Mac arm64, this will have GPU support, but on all other platforms it will be the lowest common denominator CPU build. After we fully transition to the new Go runners more tech-debt can be removed and we can stop building the "default" runner via make and rely on the builtin always. * build: Make target improvements Add a few new targets and help for building locally. This also adjusts the runner lookup to favor local builds, then runners relative to the executable, and finally payloads. * Support customized CPU flags for runners This implements a simplified custom CPU flags pattern for the runners. When built without overrides, the runner name contains the vector flag we check for (AVX) to ensure we don't try to run on unsupported systems and crash. If the user builds a customized set, we omit the naming scheme and don't check for compatibility. This avoids checking requirements at runtime, so that logic has been removed as well. This can be used to build GPU runners with no vector flags, or CPU/GPU runners with additional flags (e.g. AVX512) enabled. * Use relative paths If the user checks out the repo in a path that contains spaces, make gets really confused so use relative paths for everything in-repo to avoid breakage. * Remove payloads from main binary * install: clean up prior libraries This removes support for v0.3.6 and older versions (before the tar bundle) and ensures we clean up prior libraries before extracting the bundle(s). Without this change, runners and dependent libraries could leak when we update and lead to subtle runtime errors.
buyaka · Dec 10, 2024 · 4879a23 · 4879a23
1 parent 6326966
commit 4879a23
Show file tree

Hide file tree

Showing 58 changed files with 873 additions and 1,164 deletions.
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -85,13 +85,12 @@ jobs:
           import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
           Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
           if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make
+          make dist
         name: make
       - uses: actions/upload-artifact@v4
         with:
           name: generate-windows-cpu
           path: |
-            build/**/*
             dist/windows-amd64/**
 
   # ROCm generation step
@@ -143,13 +142,12 @@ jobs:
           import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
           Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
           if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make -C llama print-HIP_PATH print-HIP_LIB_DIR
-          make rocm
+          make help-runners
+          make dist_rocm
       - uses: actions/upload-artifact@v4
         with:
           name: generate-windows-rocm
           path: |
-            build/**/*
             dist/windows-amd64/**
 
   # CUDA generation step
@@ -226,12 +224,11 @@ jobs:
           import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
           Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
           if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
+          make dist_cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
       - uses: actions/upload-artifact@v4
         with:
           name: generate-windows-cuda-${{ matrix.cuda.version }}
           path: |
-            build/**/*
             dist/windows-amd64/**
 
   # windows arm64 generate, go build, and zip file (no installer)

diff --git a/.gitignore b/.gitignore
@@ -10,9 +10,6 @@ ollama
 .idea
 test_data
 *.crt
-llm/build
-build/*/*/*
-!build/**/placeholder
 llama/build
 __debug_bin*
 llama/vendor
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,6 @@
 ARG GOLANG_VERSION=1.22.8
-ARG CMAKE_VERSION=3.22.1
 ARG CUDA_VERSION_11=11.3.1
-ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
 ARG CUDA_VERSION_12=12.4.0
-ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
 ARG ROCM_VERSION=6.1.2
 ARG JETPACK_6=r36.2.0
 ARG JETPACK_5=r35.4.1
@@ -15,24 +12,22 @@ ARG JETPACK_5=r35.4.1
 #
 ### Then incremental builds will be much faster in this container
 #
-# make -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
+# make -j 10 dist
 #
 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
-ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 ARG CUDA_VERSION_11
 ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
 ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
+RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
 RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
     dnf clean all && \
     dnf install -y \
     zsh \
-    cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
-    cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
+    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
+    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
 # TODO intel oneapi goes here...
 ENV GOARCH amd64
 ENV CGO_ENABLED 1
@@ -46,12 +41,11 @@ ENTRYPOINT [ "zsh" ]
 # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
 #
 FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
-ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 ARG CUDA_VERSION_11
 ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
+RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
 RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
     dnf config-manager --set-enabled appstream && \
     dnf clean all && \
@@ -62,37 +56,28 @@ RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/
 ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
 ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
 ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
-ENV GOARCH amd64
+ENV GOARCH arm64
 ENV CGO_ENABLED 1
 WORKDIR /go/src/github.com/ollama/ollama/
 ENTRYPOINT [ "zsh" ]
 
-FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
+FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
 COPY . .
 ARG OLLAMA_SKIP_CUDA_GENERATE
-ARG OLLAMA_SKIP_CUDA_11_GENERATE
-ARG OLLAMA_SKIP_CUDA_12_GENERATE
 ARG OLLAMA_SKIP_ROCM_GENERATE
-ARG CUDA_V11_ARCHITECTURES
-ARG CUDA_V12_ARCHITECTURES
 ARG OLLAMA_FAST_BUILD
 RUN --mount=type=cache,target=/root/.ccache \
     if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
-        make -j $(expr $(nproc) / 2 ) ; \
+        make -j $(expr $(nproc) / 2 ) dist ; \
     else \
-        make -j 5 ; \
+        make -j 5 dist ; \
+    fi
+RUN cd dist/linux-$GOARCH && \
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
+RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
+    cd dist/linux-$GOARCH-rocm && \
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
     fi
-
-FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
-COPY . .
-ARG OLLAMA_SKIP_CUDA_GENERATE
-ARG OLLAMA_SKIP_CUDA_11_GENERATE
-ARG OLLAMA_SKIP_CUDA_12_GENERATE
-ARG CUDA_V11_ARCHITECTURES
-ARG CUDA_V12_ARCHITECTURES
-ARG OLLAMA_FAST_BUILD
-RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5
 
 # Jetsons need to be built in discrete stages
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
@@ -107,10 +92,9 @@ COPY . .
 ARG CGO_CFLAGS
 ENV GOARCH arm64
 RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5 cuda_v11 \
+    make -j 5 dist_cuda_v11 \
         CUDA_ARCHITECTURES="72;87" \
         GPU_RUNNER_VARIANT=_jetpack5 \
-        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
         DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
         DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
 
@@ -126,69 +110,26 @@ COPY . .
 ARG CGO_CFLAGS
 ENV GOARCH arm64
 RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5 cuda_v12 \
+    make -j 5 dist_cuda_v12 \
         CUDA_ARCHITECTURES="87" \
         GPU_RUNNER_VARIANT=_jetpack6 \
-        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
         DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
         DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
 
-
-# Intermediate stages used for ./scripts/build_linux.sh
-FROM --platform=linux/amd64 centos:7 AS builder-amd64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-ENV CGO_ENABLED 1
-ENV GOARCH amd64
-WORKDIR /go/src/github.com/ollama/ollama
-
-FROM --platform=linux/amd64 builder-amd64 AS build-amd64
+FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
 COPY . .
-COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-ARG OLLAMA_SKIP_ROCM_GENERATE
+ARG OLLAMA_SKIP_CUDA_GENERATE
+ARG OLLAMA_FAST_BUILD
 RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-amd64/bin/ollama .
-RUN cd dist/linux-$GOARCH && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
-RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
-    cd dist/linux-$GOARCH-rocm && \
-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
-    fi
-
-FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
-ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
-ENV CGO_ENABLED 1
-ENV GOARCH arm64
-WORKDIR /go/src/github.com/ollama/ollama
-
-FROM --platform=linux/arm64 builder-arm64 AS build-arm64
-COPY . .
-COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
+    make -j 5 dist
 COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/build/ build/
 COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/build/ build/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-arm64/bin/ollama .
 RUN cd dist/linux-$GOARCH && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
 RUN cd dist/linux-$GOARCH-jetpack5 && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
 RUN cd dist/linux-$GOARCH-jetpack6 && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
 
 FROM --platform=linux/amd64 scratch AS dist-amd64
 COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
@@ -197,30 +138,13 @@ COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz
 FROM dist-$TARGETARCH AS dist
 
 
-# Optimized container images do not cary nested payloads
-FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
-WORKDIR /go/src/github.com/ollama/ollama
-COPY . .
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-amd64/bin/ollama .
-
-FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
-WORKDIR /go/src/github.com/ollama/ollama
-COPY . .
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-arm64/bin/ollama .
-
 # For amd64 container images, filter out cuda/rocm to minimize size
-FROM runners-amd64 AS runners-cuda-amd64
+FROM build-amd64 AS runners-cuda-amd64
 RUN rm -rf \
     ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
     ./dist/linux-amd64/lib/ollama/runners/rocm*
 
-FROM runners-amd64 AS runners-rocm-amd64
+FROM build-amd64 AS runners-rocm-amd64
 RUN rm -rf \
     ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
     ./dist/linux-amd64/lib/ollama/libcu*.so* \
@@ -230,15 +154,15 @@ FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
 RUN apt-get update && \
     apt-get install -y ca-certificates && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
 COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
 
 FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
 RUN apt-get update && \
     apt-get install -y ca-certificates && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
-COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
 COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
 COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
 
@@ -251,7 +175,7 @@ COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/l
 RUN apt-get update && \
     apt-get install -y ca-certificates && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
 COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
 
 EXPOSE 11434