Skip to content

Commit

Permalink
optimize for default setting in alluxio (fluid-cloudnative#172)
Browse files Browse the repository at this point in the history
* Set default value, to #30565662

* Add test for setting default value, to #30565662

* Add test for setting default value, to #30565662

* Add test for setting default value, to #30565662

* Add test for setting default value, to #30565662

* Add test for setting default value, to #30565662

* fix worker issue, to #30421063
  • Loading branch information
cheyang authored Sep 20, 2020
1 parent fed68f1 commit b09c902
Show file tree
Hide file tree
Showing 12 changed files with 626 additions and 253 deletions.
5 changes: 5 additions & 0 deletions charts/alluxio/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,8 @@
- move passwdpath to init-user.passwdpath
- add init container for fuse

0.6.15

- Optimize for default mode
- remove default limits

40 changes: 20 additions & 20 deletions charts/alluxio/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ master:
resources:
# The default xmx is 8G
limits:
cpu: "4"
memory: "8G"
# cpu: "4"
# memory: "8G"
requests:
cpu: "1"
memory: "1G"
# cpu: "1"
# memory: "1G"
ports:
embedded: 19200
rpc: 19998
Expand All @@ -100,11 +100,11 @@ jobMaster:
properties:
resources:
limits:
cpu: "4"
memory: "8G"
# cpu: "4"
# memory: "8G"
requests:
cpu: "1"
memory: "1G"
# cpu: "1"
# memory: "1G"
ports:
embedded: 20003
rpc: 20001
Expand Down Expand Up @@ -167,11 +167,11 @@ worker:
properties:
resources:
limits:
cpu: "4"
memory: "4G"
# cpu: "4"
# memory: "4G"
requests:
cpu: "1"
memory: "2G"
# cpu: "1"
# memory: "2G"
ports:
rpc: 29999
web: 30000
Expand All @@ -193,11 +193,11 @@ jobWorker:
properties:
resources:
limits:
cpu: "4"
memory: "4G"
# cpu: "4"
# memory: "4G"
requests:
cpu: "1"
memory: "1G"
# cpu: "1"
# memory: "1G"
ports:
rpc: 30001
data: 30002
Expand Down Expand Up @@ -303,11 +303,11 @@ fuse:
mountPath: /mnt/alluxio-fuse
resources:
requests:
cpu: "0.5"
memory: "1G"
# cpu: "0.5"
# memory: "1G"
limits:
cpu: "4"
memory: "4G"
# cpu: "4"
# memory: "4G"


## Secrets ##
Expand Down
5 changes: 2 additions & 3 deletions charts/fluid/fluid/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
# Declare variables to be passed into your templates.

controller:
image: registry.cn-hangzhou.aliyuncs.com/fluid/runtime-controller:v0.3.0-e48ebca
image: registry.cn-hangzhou.aliyuncs.com/fluid/runtime-controller:v0.3.0-26699d3


csi:
registrar:
image: registry.cn-hangzhou.aliyuncs.com/acs/csi-node-driver-registrar:v1.2.0
plugins:
image: registry.cn-hangzhou.aliyuncs.com/fluid/fluid-csi:v0.3.0-e48ebca

image: registry.cn-hangzhou.aliyuncs.com/fluid/fluid-csi:v0.3.0-26699d3
10 changes: 8 additions & 2 deletions docs/zh/samples/hostpath.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ usermod -a -G root fluid-user-1
chown -R fluid-user-1:fluid-user-1 /mnt/test1
```

### 在这些缓存节点中,给本地的缓存目录赋予写权限

```
chmod -R 777 /var/lib/docker/alluxio
```

### 给这样的节点打label

```
Expand Down Expand Up @@ -75,8 +81,8 @@ spec:
replicas: 2
tieredstore:
levels:
- mediumtype: MEM
path: /dev/shm
- mediumtype: SSD
path: /var/lib/docker/alluxio
quota: 2Gi
high: "0.95"
low: "0.7"
Expand Down
98 changes: 11 additions & 87 deletions pkg/ddc/alluxio/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,8 @@ import (
"strings"

datav1alpha1 "github.com/fluid-cloudnative/fluid/api/v1alpha1"
"github.com/fluid-cloudnative/fluid/pkg/common"
"github.com/fluid-cloudnative/fluid/pkg/utils"
"github.com/fluid-cloudnative/fluid/pkg/utils/tieredstore"
corev1 "k8s.io/api/core/v1"

"k8s.io/apimachinery/pkg/api/resource"
)

func (e *AlluxioEngine) transform(runtime *datav1alpha1.AlluxioRuntime) (value *Alluxio, err error) {
Expand Down Expand Up @@ -201,9 +197,8 @@ func (e *AlluxioEngine) transformMasters(runtime *datav1alpha1.AlluxioRuntime, v
// if len(runtime.Spec.Master.JvmOptions) > 0 {
// value.Master.JvmOptions = strings.Join(runtime.Spec.Master.JvmOptions, " ")
// }
if len(value.Master.JvmOptions) > 0 {
value.Master.JvmOptions = runtime.Spec.Master.JvmOptions
}

e.optimizeDefaultForMaster(runtime, value)

if len(runtime.Spec.Master.Env) > 0 {
value.Master.Env = runtime.Spec.Master.Env
Expand Down Expand Up @@ -233,9 +228,7 @@ func (e *AlluxioEngine) transformMasters(runtime *datav1alpha1.AlluxioRuntime, v
// 3. Transform the workers
func (e *AlluxioEngine) transformWorkers(runtime *datav1alpha1.AlluxioRuntime, value *Alluxio) (err error) {
value.Worker = Worker{}
if len(runtime.Spec.Worker.JvmOptions) > 0 {
value.Worker.JvmOptions = runtime.Spec.Worker.JvmOptions
}
e.optimizeDefaultForWorker(runtime, value)

// labelName := common.LabelAnnotationStorageCapacityPrefix + e.runtimeType + "-" + e.name
labelName := e.getCommonLabelname()
Expand Down Expand Up @@ -267,40 +260,7 @@ func (e *AlluxioEngine) transformWorkers(runtime *datav1alpha1.AlluxioRuntime, v

value.Worker.HostNetwork = true

value.Worker.Resources = utils.TransformRequirementsToResources(runtime.Spec.Worker.Resources)

storageMap := tieredstore.GetLevelStorageMap(runtime)

e.Log.Info("transformWorkers", "storageMap", storageMap)

// TODO(iluoeli): it should be xmx + direct memory
memLimit := resource.MustParse("20Gi")
if quantity, exists := runtime.Spec.Worker.Resources.Limits[corev1.ResourceMemory]; exists && !quantity.IsZero() {
memLimit = quantity
}

for key, requirement := range storageMap {
if value.Worker.Resources.Limits == nil {
value.Worker.Resources.Limits = make(common.ResourceList)
}
if key == common.MemoryCacheStore {
req := requirement.DeepCopy()

memLimit.Add(req)

e.Log.Info("update the requirement for memory", "requirement", memLimit)

}
// } else if key == common.DiskCacheStore {
// req := requirement.DeepCopy()

// e.Log.Info("update the requiremnet for disk", "requirement", req)

// value.Worker.Resources.Limits[corev1.ResourceEphemeralStorage] = req.String()
// }
}

value.Worker.Resources.Limits[corev1.ResourceMemory] = memLimit.String()
e.transformResourcesForWorker(runtime, value)

return
}
Expand Down Expand Up @@ -328,11 +288,6 @@ func (e *AlluxioEngine) transformFuse(runtime *datav1alpha1.AlluxioRuntime, data
value.Fuse.Properties = runtime.Spec.Fuse.Properties
}

// TODO: support JVMOpitons from string to array
if len(runtime.Spec.Fuse.JvmOptions) > 0 {
value.Fuse.JvmOptions = runtime.Spec.Fuse.JvmOptions
}

if len(runtime.Spec.Fuse.Env) > 0 {
value.Fuse.Env = runtime.Spec.Fuse.Env
} else {
Expand All @@ -348,11 +303,12 @@ func (e *AlluxioEngine) transformFuse(runtime *datav1alpha1.AlluxioRuntime, data
value.Fuse.MountPath = e.getMountPoint()
value.Fuse.Env["MOUNT_POINT"] = value.Fuse.MountPath

if len(runtime.Spec.Fuse.Args) > 0 {
value.Fuse.Args = runtime.Spec.Fuse.Args
} else {
value.Fuse.Args = []string{"fuse", "--fuse-opts=kernel_cache"}
}
// if len(runtime.Spec.Fuse.Args) > 0 {
// value.Fuse.Args = runtime.Spec.Fuse.Args
// } else {
// value.Fuse.Args = []string{"fuse", "--fuse-opts=kernel_cache"}
// }
e.optimizeDefaultFuse(runtime, value)

if dataset.Spec.Owner != nil {
value.Fuse.Args[len(value.Fuse.Args)-1] = strings.Join([]string{value.Fuse.Args[len(value.Fuse.Args)-1], fmt.Sprintf("uid=%d,gid=%d", *dataset.Spec.Owner.UID, *dataset.Spec.Owner.GID)}, ",")
Expand All @@ -373,39 +329,7 @@ func (e *AlluxioEngine) transformFuse(runtime *datav1alpha1.AlluxioRuntime, data
value.Fuse.HostNetwork = true
value.Fuse.Enabled = true

value.Fuse.Resources = utils.TransformRequirementsToResources(runtime.Spec.Fuse.Resources)

storageMap := tieredstore.GetLevelStorageMap(runtime)

e.Log.Info("transformFuse", "storageMap", storageMap)

// TODO(iluoeli): it should be xmx + direct memory
memLimit := resource.MustParse("50Gi")
if quantity, exists := runtime.Spec.Fuse.Resources.Limits[corev1.ResourceMemory]; exists && !quantity.IsZero() {
memLimit = quantity
}

for key, requirement := range storageMap {
if value.Fuse.Resources.Limits == nil {
value.Fuse.Resources.Limits = make(common.ResourceList)
}
if key == common.MemoryCacheStore {
req := requirement.DeepCopy()

memLimit.Add(req)

e.Log.Info("update the requiremnet for memory", "requirement", memLimit)

}
// } else if key == common.DiskCacheStore {
// req := requirement.DeepCopy()
// e.Log.Info("update the requiremnet for disk", "requirement", req)
// value.Fuse.Resources.Limits[corev1.ResourceEphemeralStorage] = req.String()
// }
}
if value.Fuse.Resources.Limits != nil {
value.Fuse.Resources.Limits[corev1.ResourceMemory] = memLimit.String()
}
e.transformResourcesForFuse(runtime, value)

return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,53 @@ func setDefaultProperties(runtime *datav1alpha1.AlluxioRuntime, alluxioValue *Al
alluxioValue.Properties[key] = value
}
}

func (e *AlluxioEngine) optimizeDefaultForMaster(runtime *datav1alpha1.AlluxioRuntime, value *Alluxio) {
if len(runtime.Spec.Master.JvmOptions) > 0 {
value.Master.JvmOptions = runtime.Spec.Master.JvmOptions
}

if len(value.Master.JvmOptions) == 0 {
value.Master.JvmOptions = []string{
"-Xmx6G",
"-XX:+UnlockExperimentalVMOptions",
}
}
}

func (e *AlluxioEngine) optimizeDefaultForWorker(runtime *datav1alpha1.AlluxioRuntime, value *Alluxio) {
if len(runtime.Spec.Worker.JvmOptions) > 0 {
value.Worker.JvmOptions = runtime.Spec.Worker.JvmOptions
}
if len(value.Worker.JvmOptions) == 0 {
value.Worker.JvmOptions = []string{
"-Xmx12G",
"-XX:+UnlockExperimentalVMOptions",
"-XX:MaxDirectMemorySize=32g",
}
}
}

func (e *AlluxioEngine) optimizeDefaultFuse(runtime *datav1alpha1.AlluxioRuntime, value *Alluxio) {

if len(runtime.Spec.Fuse.JvmOptions) > 0 {
value.Fuse.JvmOptions = runtime.Spec.Fuse.JvmOptions
}

if len(value.Fuse.JvmOptions) == 0 {
value.Fuse.JvmOptions = []string{
"-Xmx16G",
"-Xms16G",
"-XX:+UseG1GC",
"-XX:MaxDirectMemorySize=32g",
"-XX:+UnlockExperimentalVMOptions",
}
}

if len(runtime.Spec.Fuse.Args) > 0 {
value.Fuse.Args = runtime.Spec.Fuse.Args
} else {
value.Fuse.Args = []string{"fuse", "--fuse-opts=kernel_cache,ro,max_read=131072,attr_timeout=7200,entry_timeout=7200,nonempty"}
}

}
Loading

0 comments on commit b09c902

Please sign in to comment.