Skip to content

Commit

Permalink
feat: quantize llm on cpu by ggml (apecloud#4781)
Browse files Browse the repository at this point in the history
  • Loading branch information
lynnleelhl authored Aug 23, 2023
1 parent c855898 commit 86e4f88
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 0 deletions.
24 changes: 24 additions & 0 deletions deploy/ggml/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: ggml
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
Empty file added deploy/ggml/templates/NOTES.txt
Empty file.
33 changes: 33 additions & 0 deletions deploy/ggml/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "llm.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "llm.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "llm.labels" -}}
helm.sh/chart: {{ include "llm.chart" . }}
{{ include "llm.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "llm.selectorLabels" -}}
app.kubernetes.io/name: {{ include "llm.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
36 changes: 36 additions & 0 deletions deploy/ggml/templates/clusterdefinition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterDefinition
metadata:
name: ggml
labels:
{{- include "llm.labels" . | nindent 4}}
spec:
componentDefs:
- name: ggml
workloadType: Stateful
characterType: ggml
service:
ports:
- name: model
port: 8000
targetPort: model
podSpec:
volumes:
- name: models
emptyDir: {}
containers:
- name: ggml
imagePullPolicy: {{default .Values.image.pullPolicy "IfNotPresent"}}
securityContext:
runAsUser: 0
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- name: models
mountPath: /models
ports:
- name: model
containerPort: 8000
connectionCredential:
username: root
password: ""
27 changes: 27 additions & 0 deletions deploy/ggml/templates/clusterversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: ggml-baichuan-7b-q4
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: ggml
componentVersions:
- componentDefRef: ggml
versionsContext:
initContainers:
- name: download
image: apecloud/baichuan-llama-7b:ggmlv3.q4_0
command: ["sh", "-c", "cp /models/baichuan-llama-7b.ggmlv3.q4_0.bin /models-target/"]
volumeMounts:
- name: models
mountPath: /models-target
containers:
- name: ggml
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL
value: /models/baichuan-llama-7b.ggmlv3.q4_0.bin
volumeMounts:
- name: models
mountPath: /models
6 changes: 6 additions & 0 deletions deploy/ggml/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
image:
repository: apecloud/llama-cpp-python
pullPolicy: IfNotPresent
registry: docker.io
# Overrides the image tag whose default is the chart appVersion.
tag: latest

0 comments on commit 86e4f88

Please sign in to comment.