diff --git a/cli/api/metaapi.go b/cli/api/metaapi.go
index 448b80e4a9..c040d6c351 100644
--- a/cli/api/metaapi.go
+++ b/cli/api/metaapi.go
@@ -87,6 +87,11 @@ type MetaHttpClient struct {
 // NewMasterHelper returns a new MasterClient instance.
 func NewMetaHttpClient(host string, useSSL bool) *MetaHttpClient {
 	mc := &MetaHttpClient{host: host, useSSL: useSSL}
+	var err error
+	_, err = log.InitLog("/tmp/cfs", "cli", log.DebugLevel, nil)
+	if err != nil {
+		fmt.Printf("init cli log err[%v]", err)
+	}
 	return mc
 }
 
@@ -213,6 +218,7 @@ func (mc *MetaHttpClient) GetMetaPartition(pid uint64) (cursor uint64, err error
 		if err != nil {
 			log.LogErrorf("action[GetMetaPartition],pid:%v,err:%v", pid, err)
 		}
+		log.LogFlush()
 	}()
 	request := newAPIRequest(http.MethodGet, "/getPartitionById")
 	request.params["pid"] = fmt.Sprintf("%v", pid)
@@ -236,6 +242,7 @@ func (mc *MetaHttpClient) GetAllDentry(pid uint64) (dentryMap map[string]*metano
 		if err != nil {
 			log.LogErrorf("action[GetAllDentry],pid:%v,err:%v", pid, err)
 		}
+		log.LogFlush()
 	}()
 	dentryMap = make(map[string]*metanode.Dentry, 0)
 	request := newAPIRequest(http.MethodGet, "/getAllDentry")
@@ -286,6 +293,7 @@ func (mc *MetaHttpClient) GetAllInodes(pid uint64) (rstMap map[uint64]*Inode, er
 		if err != nil {
 			log.LogErrorf("action[GetAllInodes],pid:%v,err:%v", pid, err)
 		}
+		log.LogFlush()
 	}()
 	reqURL := fmt.Sprintf("http://%v%v?pid=%v", mc.host, "/getAllInodes", pid)
 	log.LogDebugf("reqURL=%v", reqURL)
diff --git a/cli/build.sh b/cli/build.sh
index 0d693d1654..b57530b288 100755
--- a/cli/build.sh
+++ b/cli/build.sh
@@ -1,22 +1,8 @@
 #!/usr/bin/env bash
-RootPath=$(cd $(dirname $0)/..; pwd)
-
-Version=`git describe --abbrev=0 --tags 2>/dev/null`
-BranchName=`git rev-parse --abbrev-ref HEAD 2>/dev/null`
-CommitID=`git rev-parse HEAD 2>/dev/null`
+BranchName=`git rev-parse --abbrev-ref HEAD`
+CommitID=`git rev-parse HEAD`
 BuildTime=`date +%Y-%m-%d\ %H:%M`
 
-SrcPath=${RootPath}/cli
-TargetFile=${1:-$RootPath/cli/cfs-cli}
-
 [[ "-$GOPATH" == "-" ]] && { echo "GOPATH not set"; exit 1; }
 
-LDFlags="-X github.com/chubaofs/chubaofs/proto.Version=${Version} \
-    -X github.com/chubaofs/chubaofs/proto.CommitID=${CommitID} \
-    -X github.com/chubaofs/chubaofs/proto.BranchName=${BranchName} \
-    -X 'github.com/chubaofs/chubaofs/proto.BuildTime=${BuildTime}' "
-
-go build \
-    -ldflags "${LDFlags}" \
-    -o $TargetFile \
-    ${SrcPath}/*.go
+go build -ldflags "-X main.CommitID=${CommitID} -X main.BranchName=${BranchName} -X 'main.BuildTime=${BuildTime}'" -o cfs-cli
diff --git a/cli/cli.go b/cli/cli.go
index 50022ce7e3..c46d76d0b4 100644
--- a/cli/cli.go
+++ b/cli/cli.go
@@ -16,11 +16,11 @@ package main
 
 import (
 	"fmt"
+	"os"
+
 	"github.com/chubaofs/chubaofs/cli/cmd"
 	"github.com/chubaofs/chubaofs/sdk/master"
-	"github.com/chubaofs/chubaofs/util/log"
 	"github.com/spf13/cobra"
-	"os"
 )
 
 var (
@@ -32,19 +32,17 @@ var (
 func runCLI() (err error) {
 	var cfg *cmd.Config
 	if cfg, err = cmd.LoadConfig(); err != nil {
-		fmt.Printf("init cli log err[%v]", err)
 		return
 	}
-	cfsCli := setupCommands(cfg)
-	if err = cfsCli.Execute(); err != nil {
-		log.LogErrorf("Command fail, err:%v", err)
-	}
+	cfscli := setupCommands(cfg)
+	err = cfscli.Execute()
 	return
 }
 
 func setupCommands(cfg *cmd.Config) *cobra.Command {
 	var mc = master.NewMasterClient(cfg.MasterAddr, false)
-	mc.SetTimeout(cfg.Timeout)
+	mc.DataNodeProfPort = cfg.DataNodeProfPort
+	mc.MetaNodeProfPort = cfg.MetaNodeProfPort
 	cfsRootCmd := cmd.NewRootCmd(mc)
 	var completionCmd = &cobra.Command{
 		Use:   "completion",
@@ -77,10 +75,7 @@ following command to execute:
 
 func main() {
 	var err error
-	_, err = log.InitLog("/tmp/cfs", "cli", log.DebugLevel, nil)
-	defer log.LogFlush()
 	if err = runCLI(); err != nil {
-		log.LogFlush()
 		_, _ = fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		os.Exit(1)
 	}
diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
index dd0447de62..e9890e51a8 100644
--- a/cli/cmd/cluster.go
+++ b/cli/cmd/cluster.go
@@ -16,6 +16,7 @@ package cmd
 
 import (
 	"fmt"
+	"os"
 	"strconv"
 
 	"github.com/chubaofs/chubaofs/proto"
@@ -64,12 +65,14 @@ func newClusterInfoCmd(client *master.MasterClient) *cobra.Command {
 			var cv *proto.ClusterView
 			var delPara map[string]string
 			if cv, err = client.AdminAPI().GetCluster(); err != nil {
-				errout("Error: %v", err)
+				errout("Get cluster info fail:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout("[Cluster]\n")
 			stdout(formatClusterView(cv))
 			if delPara, err = client.AdminAPI().GetDeleteParas(); err != nil {
-				errout("Error: %v", err)
+				errout("Get delete param fail:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout(fmt.Sprintf("  BatchCount         : %v\n", delPara[nodeDeleteBatchCountKey]))
 			stdout(fmt.Sprintf("  MarkDeleteRate     : %v\n", delPara[nodeMarkDeleteRateKey]))
@@ -86,18 +89,11 @@ func newClusterStatCmd(client *master.MasterClient) *cobra.Command {
 		Use:   CliOpStatus,
 		Short: cmdClusterStatShort,
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err error
-			    cs  *proto.ClusterStatInfo
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+			var err error
+			var cs *proto.ClusterStatInfo
 			if cs, err = client.AdminAPI().GetClusterStat(); err != nil {
-				err = fmt.Errorf("Get cluster info fail:\n%v\n", err)
-				return
+				errout("Get cluster info fail:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout("[Cluster Status]\n")
 			stdout(formatClusterStat(cs))
@@ -109,10 +105,10 @@ func newClusterStatCmd(client *master.MasterClient) *cobra.Command {
 
 func newClusterFreezeCmd(client *master.MasterClient) *cobra.Command {
 	var cmd = &cobra.Command{
-		Use:   CliOpFreeze + " [ENABLE]",
+		Use:       CliOpFreeze + " [ENABLE]",
 		ValidArgs: []string{"true", "false"},
-		Short: cmdClusterFreezeShort,
-		Args:  cobra.MinimumNArgs(1),
+		Short:     cmdClusterFreezeShort,
+		Args:      cobra.MinimumNArgs(1),
 		Long: `Turn on or off the automatic allocation of the data partitions. 
 If 'freeze=false', ChubaoFS WILL automatically allocate new data partitions for the volume when:
   1. the used space is below the max capacity,
@@ -120,21 +116,15 @@ If 'freeze=false', ChubaoFS WILL automatically allocate new data partitions for
 		
 If 'freeze=true', ChubaoFS WILL NOT automatically allocate new data partitions `,
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err error
-			    enable bool
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+			var err error
+			var enable bool
 			if enable, err = strconv.ParseBool(args[0]); err != nil {
-				err = fmt.Errorf("Parse bool fail: %v\n", err)
-				return
+				errout("Parse bool fail: %v\n", err)
+				os.Exit(1)
 			}
 			if err = client.AdminAPI().IsFreezeCluster(enable); err != nil {
-				return
+				errout("Failed: %v\n", err)
+				os.Exit(1)
 			}
 			if enable {
 				stdout("Freeze cluster successful!\n")
@@ -154,25 +144,19 @@ func newClusterSetThresholdCmd(client *master.MasterClient) *cobra.Command {
 		Long: `Set the threshold of memory on each meta node.
 If the memory usage reaches this threshold, all the mata partition will be readOnly.`,
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err error
-			    threshold float64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+			var err error
+			var threshold float64
 			if threshold, err = strconv.ParseFloat(args[0], 64); err != nil {
-				err = fmt.Errorf("Parse Float fail: %v\n", err)
-				return
+				errout("Parse Float fail: %v\n", err)
+				os.Exit(1)
 			}
 			if threshold > 1.0 {
-				err = fmt.Errorf("Threshold too big\n")
-				return
+				errout("Threshold too big\n")
+				os.Exit(1)
 			}
 			if err = client.AdminAPI().SetMetaNodeThreshold(threshold); err != nil {
-				return
+				errout("Failed: %v\n", err)
+				os.Exit(1)
 			}
 			stdout("MetaNode threshold is set to %v!\n", threshold)
 		},
diff --git a/cli/cmd/compatibility.go b/cli/cmd/compatibility.go
index b1e402c6c8..9966f92f1c 100644
--- a/cli/cmd/compatibility.go
+++ b/cli/cmd/compatibility.go
@@ -15,13 +15,15 @@
 package cmd
 
 import (
-	"fmt"
+	"os"
 	"github.com/chubaofs/chubaofs/cli/api"
+	"github.com/spf13/cobra"
 	"github.com/chubaofs/chubaofs/metanode"
+	"fmt"
+	"strconv"
+	"github.com/chubaofs/chubaofs/util/log"
 	"github.com/chubaofs/chubaofs/proto"
-	"github.com/spf13/cobra"
 	"reflect"
-	"strconv"
 )
 
 const (
@@ -53,8 +55,8 @@ func newMetaCompatibilityCmd() *cobra.Command {
 		Aliases: []string{"meta"},
 		Args:    cobra.MinimumNArgs(3),
 		Run: func(cmd *cobra.Command, args []string) {
+			var err error
 			var (
-				err          error
 				snapshotPath = args[0]
 				host         = args[1]
 				pid          = args[2]
@@ -62,12 +64,15 @@ func newMetaCompatibilityCmd() *cobra.Command {
 			client := api.NewMetaHttpClient(host, false)
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("Verify metadata consistency failed: %v\n", err)
+					log.LogError(err)
+					log.LogFlush()
+					os.Exit(1)
 				}
 			}()
 			id, err := strconv.ParseUint(pid, 10, 64)
 			if err != nil {
-				err = fmt.Errorf("parse pid[%v] failed: %v\n", pid, err)
+				errout("parse pid[%v] failed: %v\n", pid, err)
 				return
 			}
 			cursor, err := client.GetMetaPartition(id)
@@ -85,9 +90,11 @@ func newMetaCompatibilityCmd() *cobra.Command {
 			}
 			stdout("[Meta partition is %v, verify result]\n", id)
 			if err = verifyDentry(client, mp); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			if err = verifyInode(client, mp); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			stdout("All meta has checked\n")
@@ -104,27 +111,25 @@ func verifyDentry(client *api.MetaHttpClient, mp metanode.MetaPartition) (err er
 	mp.GetDentryTree().Ascend(func(d metanode.BtreeItem) bool {
 		dentry, ok := d.(*metanode.Dentry)
 		if !ok {
-			stdout("item type is not *metanode.Dentry \n")
+			stdout("item type is not *metanode.Dentry")
 			err = fmt.Errorf("item type is not *metanode.Dentry")
-			return true
+			return false
 		}
 		key := fmt.Sprintf("%v_%v", dentry.ParentId, dentry.Name)
 		oldDentry, ok := dentryMap[key]
 		if !ok {
-			stdout("dentry %v is not in old version \n", key)
+			stdout("dentry %v is not in old version", key)
 			err = fmt.Errorf("dentry %v is not in old version", key)
 			return false
 		}
 		if !reflect.DeepEqual(dentry, oldDentry) {
-			stdout("dentry %v is not equal with old version \n", key)
+			stdout("dentry %v is not equal with old version", key)
 			err = fmt.Errorf("dentry %v is not equal with old version,dentry[%v],oldDentry[%v]", key, dentry, oldDentry)
 			return false
 		}
 		return true
 	})
-	if err == nil {
-		stdout("The number of dentry is %v, all dentry are consistent \n", mp.GetDentryTree().Len())
-	}
+	stdout("The number of dentry is %v, all dentry are consistent \n", mp.GetDentryTree().Len())
 	return
 }
 
@@ -137,15 +142,12 @@ func verifyInode(client *api.MetaHttpClient, mp metanode.MetaPartition) (err err
 	mp.GetInodeTree().Ascend(func(d metanode.BtreeItem) bool {
 		inode, ok := d.(*metanode.Inode)
 		if !ok {
-			stdout("item type is not *metanode.Inode \n")
-			err = fmt.Errorf("item type is not *metanode.Inode")
 			return true
 		}
 		oldInode, ok := inodesMap[inode.Inode]
 		if !ok {
 			stdout("inode %v is not in old version \n", inode.Inode)
-			err = fmt.Errorf("inode %v is not in old version", inode.Inode)
-			return false
+			return true
 		}
 		localInode = &api.Inode{
 			Inode:      inode.Inode,
@@ -169,13 +171,9 @@ func verifyInode(client *api.MetaHttpClient, mp metanode.MetaPartition) (err err
 		})
 		if !reflect.DeepEqual(oldInode, localInode) {
 			stdout("inode %v is not equal with old version,inode[%v],oldInode[%v]\n", inode.Inode, inode, oldInode)
-			err = fmt.Errorf("inode %v is not equal with old version,inode[%v],oldInode[%v]\n", inode.Inode, inode, oldInode)
-			return false
 		}
 		return true
 	})
-	if err == nil {
-		stdout("The number of inodes is %v, all inodes are consistent \n", mp.GetInodeTree().Len())
-	}
+	stdout("The number of inodes is %v, all inodes are consistent \n", mp.GetInodeTree().Len())
 	return
 }
diff --git a/cli/cmd/config.go b/cli/cmd/config.go
index 3e919c2d01..62c03bbc68 100644
--- a/cli/cmd/config.go
+++ b/cli/cmd/config.go
@@ -37,6 +37,8 @@ var (
   "masterAddr": [
     "master.chubao.io"
   ],
+  "dnProf": 17320,
+  "mnProf": 17220,
   "timeout": 60
 }
 `)
@@ -44,8 +46,10 @@ var (
 )
 
 type Config struct {
-	MasterAddr []string `json:"masterAddr"`
-	Timeout    uint16   `json:"timeout"`
+	MasterAddr       []string `json:"masterAddr"`
+	DataNodeProfPort uint16   `json:"dnProf"`
+	MetaNodeProfPort uint16   `json:"mnProf"`
+	Timeout          uint16   `json:"timeout"`
 }
 
 func newConfigCmd() *cobra.Command {
@@ -65,35 +69,50 @@ const (
 
 func newConfigSetCmd() *cobra.Command {
 	var optMasterHost string
+	var optDNProfPort uint16
+	var optMNProfPort uint16
 	var optTimeout uint16
 	var cmd = &cobra.Command{
 		Use:   CliOpSet,
 		Short: cmdConfigSetShort,
 		Long:  `Set the config file`,
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				masterHosts []string
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			if optMasterHost == "" && optTimeout == 0 {
-				stdout(fmt.Sprintf("No change. Input 'cfs-cli config set -h' for help.\n"))
+			var masterHosts []string
+			var config *Config
+			var err error
+			if optMasterHost == "" && optDNProfPort == 0 && optMNProfPort == 0 {
+				stdout(fmt.Sprintf("No changes has been set. Input 'cfs-cli config set -h' for help.\n"))
 				return
 			}
 			if len(optMasterHost) != 0 {
 				masterHosts = append(masterHosts, optMasterHost)
 			}
-			if err = setConfig(masterHosts, optTimeout); err != nil {
+			if config, err = LoadConfig(); err != nil {
+				stdout("load config file failed")
+				return
+			}
+			if len(masterHosts) > 0 {
+				config.MasterAddr = masterHosts
+			}
+			if optDNProfPort > 0 {
+				config.DataNodeProfPort = optDNProfPort
+			}
+			if optMNProfPort > 0 {
+				config.MetaNodeProfPort = optMNProfPort
+			}
+			if optTimeout > 0 {
+				config.Timeout = optTimeout
+			}
+			if _, err := setConfig(config); err != nil {
+				stdout("error: %v\n", err)
 				return
 			}
 			stdout(fmt.Sprintf("Config has been set successfully!\n"))
 		},
 	}
 	cmd.Flags().StringVar(&optMasterHost, "addr", "", "Specify master address [{HOST}:{PORT}]")
+	cmd.Flags().Uint16Var(&optDNProfPort, "dnProf", 0, "Specify prof port for DataNode")
+	cmd.Flags().Uint16Var(&optMNProfPort, "mnProf", 0, "Specify prof port for DataNode")
 	cmd.Flags().Uint16Var(&optTimeout, "timeout", 0, "Specify timeout for requests [Unit: s]")
 	return cmd
 }
@@ -107,9 +126,10 @@ func newConfigInfoCmd() *cobra.Command {
 			config, err := LoadConfig()
 			if err != nil {
 				_, _ = fmt.Fprintf(os.Stderr, "Error: %v\n", err)
-				OsExitWithLogFlush()
+				os.Exit(1)
 			}
-			printConfigInfo(config)
+			stdout(fmt.Sprintf("Config info:\n  %v\n", config.MasterAddr))
+
 		},
 	}
 	cmd.Flags().StringVar(&optFilterWritable, "filter-writable", "", "Filter node writable status")
@@ -117,33 +137,17 @@ func newConfigInfoCmd() *cobra.Command {
 	return cmd
 }
 
-func printConfigInfo(config *Config) {
-	stdout("Config info:\n")
-	stdout("  Master  Address    : %v\n", config.MasterAddr)
-	stdout("  Request Timeout [s]: %v\n", config.Timeout)
-}
-
-func setConfig(masterHosts []string, timeout uint16) (err error) {
-	var config *Config
-	if config, err = LoadConfig(); err != nil {
-		return
-	}
-	if len(masterHosts) > 0 {
-		config.MasterAddr = masterHosts
-	}
-	if timeout != 0 {
-		config.Timeout = timeout
-	}
+func setConfig(config *Config) (*Config, error) {
+	var err error
 	var configData []byte
 	if configData, err = json.Marshal(config); err != nil {
-		return
+		return nil, err
 	}
 	if err = ioutil.WriteFile(defaultConfigPath, configData, 0600); err != nil {
-		return
+		return nil, err
 	}
-	return nil
+	return config, nil
 }
-
 func LoadConfig() (*Config, error) {
 	var err error
 	var configData []byte
@@ -160,8 +164,5 @@ func LoadConfig() (*Config, error) {
 	if err = json.Unmarshal(configData, config); err != nil {
 		return nil, err
 	}
-	if config.Timeout == 0 {
-		config.Timeout = defaultConfigTimeout
-	}
 	return config, nil
 }
diff --git a/cli/cmd/const.go b/cli/cmd/const.go
index c96d0c3758..e46b1cfdff 100644
--- a/cli/cmd/const.go
+++ b/cli/cmd/const.go
@@ -34,8 +34,8 @@ const (
 	CliOpReset             = "reset"
 	CliOpReplicate         = "add-replica"
 	CliOpDelReplica        = "del-replica"
-	CliOpExpand              = "expand"
-	CliOpShrink              = "shrink"
+	CliOpExpand            = "expand"
+	CliOpShrink            = "shrink"
 
 	//Shorthand format of operation name
 	CliOpDecommissionShortHand = "dec"
@@ -59,8 +59,10 @@ const (
 	CliFlagReplicas           = "replicas"
 	CliFlagEnable             = "enable"
 	CliFlagEnableFollowerRead = "follower-read"
+	CliFlagAutoRepair         = "auto-repair"
 	CliFlagAuthenticate       = "authenticate"
 	CliFlagEnableToken        = "enable-token"
+	CliFlagEnableAutoFill     = "auto-fill"
 	CliFlagCapacity           = "capacity"
 	CliFlagThreshold          = "threshold"
 	CliFlagAddress            = "addr"
@@ -82,10 +84,3 @@ const (
 	ResourceDataPartitionShortHand = "dp"
 	ResourceMetaPartitionShortHand = "mp"
 )
-type MasterOp int
-const (
-	OpExpandVol MasterOp = iota
-	OpShrinkVol
-	OpCreateVol
-	OpDeleteVol
-)
diff --git a/cli/cmd/datanode.go b/cli/cmd/datanode.go
index 05d5e5a27e..013b8ed401 100644
--- a/cli/cmd/datanode.go
+++ b/cli/cmd/datanode.go
@@ -15,6 +15,7 @@
 package cmd
 
 import (
+	"os"
 	"sort"
 	"strings"
 
@@ -57,7 +58,8 @@ func newDataNodeListCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("List cluster data nodes failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			var view *proto.ClusterView
@@ -98,7 +100,8 @@ func newDataNodeInfoCmd(client *master.MasterClient) *cobra.Command {
 			var datanodeInfo *proto.DataNodeInfo
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("Show data node info failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			nodeAddr = args[0]
@@ -129,7 +132,8 @@ func newDataNodeDecommissionCmd(client *master.MasterClient) *cobra.Command {
 			var nodeAddr string
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("decommission data node failed, err[%v]\n", err)
+					os.Exit(1)
 				}
 			}()
 			nodeAddr = args[0]
diff --git a/cli/cmd/datapartition.go b/cli/cmd/datapartition.go
index b5a0ef31cd..248c328f30 100644
--- a/cli/cmd/datapartition.go
+++ b/cli/cmd/datapartition.go
@@ -21,6 +21,9 @@ import (
 	"github.com/spf13/cobra"
 	"sort"
 	"strconv"
+	"strings"
+	"sync"
+	"time"
 )
 
 const (
@@ -44,12 +47,12 @@ func newDataPartitionCmd(client *master.MasterClient) *cobra.Command {
 }
 
 const (
-	cmdDataPartitionGetShort              = "Display detail information of a data partition"
-	cmdCheckCorruptDataPartitionShort     = "Check and list unhealthy data partitions"
-	cmdDataPartitionDecommissionShort     = "Decommission a replication of the data partition to a new address"
-	cmdDataPartitionReplicateShort        = "Add a replication of the data partition on a new address"
-	cmdDataPartitionDeleteReplicaShort    = "Delete a replication of the data partition on a fixed address"
-	)
+	cmdDataPartitionGetShort           = "Display detail information of a data partition"
+	cmdCheckCorruptDataPartitionShort  = "Check out corrupt data partitions"
+	cmdDataPartitionDecommissionShort  = "Decommission a replication of the data partition to a new address"
+	cmdDataPartitionReplicateShort     = "Add a replication of the data partition on a new address"
+	cmdDataPartitionDeleteReplicaShort = "Delete a replication of the data partition on a fixed address"
+)
 
 func newDataPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 	var cmd = &cobra.Command{
@@ -58,16 +61,10 @@ func newDataPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 		Args:  cobra.MinimumNArgs(1),
 		Run: func(cmd *cobra.Command, args []string) {
 			var (
-				err         error
-				partitionID uint64
-				partition   *proto.DataPartitionInfo
+				partition *proto.DataPartitionInfo
 			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			if partitionID, err = strconv.ParseUint(args[0], 10, 64); err != nil {
+			partitionID, err := strconv.ParseUint(args[0], 10, 64)
+			if err != nil {
 				return
 			}
 			if partition, err = client.AdminAPI().GetDataPartition("", partitionID); err != nil {
@@ -80,6 +77,8 @@ func newDataPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 }
 
 func newListCorruptDataPartitionCmd(client *master.MasterClient) *cobra.Command {
+	var optEnableAutoFullfill bool
+	var optCheckAll bool
 	var cmd = &cobra.Command{
 		Use:   CliOpCheck,
 		Short: cmdCheckCorruptDataPartitionShort,
@@ -91,25 +90,26 @@ you can use the "reset" command to fix the problem.The "reset" command may lead
 The "reset" command will be released in next version`,
 		Run: func(cmd *cobra.Command, args []string) {
 			var (
-				diagnosis     *proto.DataPartitionDiagnosis
-				dataNodes     []*proto.DataNodeInfo
-				err           error
+				diagnosis *proto.DataPartitionDiagnosis
+				dataNodes []*proto.DataNodeInfo
+				err       error
 			)
-			defer func() {
+			if optCheckAll {
+				err = checkAllDataPartitions(client)
 				if err != nil {
-					errout("Error: %v", err)
+					stdout("%v\n", err)
 				}
-			}()
+				return
+			}
 			if diagnosis, err = client.AdminAPI().DiagnoseDataPartition(); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			stdout("[Inactive Data nodes]:\n")
 			stdout("%v\n", formatDataNodeDetailTableHeader())
 			for _, addr := range diagnosis.InactiveDataNodes {
 				var node *proto.DataNodeInfo
-				if node, err = client.NodeAPI().GetDataNode(addr); err != nil {
-					return
-				}
+				node, err = client.NodeAPI().GetDataNode(addr)
 				dataNodes = append(dataNodes, node)
 			}
 			sort.SliceStable(dataNodes, func(i, j int) bool {
@@ -118,7 +118,7 @@ The "reset" command will be released in next version`,
 			for _, node := range dataNodes {
 				stdout("%v\n", formatDataNodeDetail(node, true))
 			}
-			stdout("\n")
+			/*stdout("\n")
 			stdout("[Corrupt data partitions](no leader):\n")
 			stdout("%v\n", partitionInfoTableHeader)
 			sort.SliceStable(diagnosis.CorruptDataPartitionIDs, func(i, j int) bool {
@@ -127,11 +127,11 @@ The "reset" command will be released in next version`,
 			for _, pid := range diagnosis.CorruptDataPartitionIDs {
 				var partition *proto.DataPartitionInfo
 				if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
-					err = fmt.Errorf("Partition not found, err:[%v] ", err)
+					stdout("Partition not found, err:[%v]", err)
 					return
 				}
 				stdout("%v\n", formatDataPartitionInfoRow(partition))
-			}
+			}*/
 
 			stdout("\n")
 			stdout("%v\n", "[Partition lack replicas]:")
@@ -139,57 +139,206 @@ The "reset" command will be released in next version`,
 			sort.SliceStable(diagnosis.LackReplicaDataPartitionIDs, func(i, j int) bool {
 				return diagnosis.LackReplicaDataPartitionIDs[i] < diagnosis.LackReplicaDataPartitionIDs[j]
 			})
+			cv, _ := client.AdminAPI().GetCluster()
+			dns := cv.DataNodes
+			var sb = strings.Builder{}
+
 			for _, pid := range diagnosis.LackReplicaDataPartitionIDs {
 				var partition *proto.DataPartitionInfo
 				if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
-					err = fmt.Errorf("Partition not found, err:[%v] ", err)
+					stdout("Partition is not found, err:[%v]", err)
 					return
 				}
 				if partition != nil {
 					stdout("%v\n", formatDataPartitionInfoRow(partition))
-				}
-			}
+					sort.Strings(partition.Hosts)
+					if len(partition.MissingNodes) > 0 || partition.Status == -1 {
+						stdoutRed(fmt.Sprintf("partition not ready to repair"))
+						continue
+					}
+					var leaderRps map[uint64]*proto.ReplicaStatus
+					var canAutoRepair bool
+					var peerStrings []string
+					canAutoRepair = true
+					for i, r := range partition.Replicas {
+						var rps map[uint64]*proto.ReplicaStatus
+						var dnPartition *proto.DNDataPartitionInfo
+						var err error
+						addr := strings.Split(r.Addr, ":")[0]
+						if dnPartition, err = client.NodeAPI().DataNodeGetPartition(addr, partition.PartitionID); err != nil {
+							fmt.Printf(partitionInfoColorTablePattern+"\n",
+								"", "", "", fmt.Sprintf("%v(hosts)", r.Addr), fmt.Sprintf("%v/%v", "nil", partition.ReplicaNum), "get partition info failed")
+							continue
+						}
+						sort.Strings(dnPartition.Replicas)
+						fmt.Printf(partitionInfoColorTablePattern+"\n",
+							"", "", "", fmt.Sprintf("%v(hosts)", r.Addr), fmt.Sprintf("%v/%v", len(dnPartition.Replicas), partition.ReplicaNum), strings.Join(dnPartition.Replicas, "; "))
 
-
-			stdout("\n")
-			stdout("%v\n", "[Bad data partitions(decommission not completed)]:")
-			badPartitionTablePattern := "%-8v    %-10v\n"
-			stdout(badPartitionTablePattern, "PATH", "PARTITION ID")
-			for _, bdpv := range diagnosis.BadDataPartitionIDs {
-				sort.SliceStable(bdpv.PartitionIDs, func(i, j int) bool {
-					return bdpv.PartitionIDs[i] < bdpv.PartitionIDs[j]
-				})
-				for _, pid := range bdpv.PartitionIDs {
-					stdout(badPartitionTablePattern, bdpv.Path, pid)
+						if rps = dnPartition.RaftStatus.Replicas; rps != nil {
+							leaderRps = rps
+						}
+						peers := convertPeersToArray(dnPartition.Peers)
+						sort.Strings(peers)
+						if i == 0 {
+							peerStrings = peers
+						} else {
+							if !isEqualStrings(peers, peerStrings) {
+								canAutoRepair = false
+							}
+						}
+						fmt.Printf(partitionInfoColorTablePattern+"\n",
+							"", "", "", fmt.Sprintf("%v(peers)", r.Addr), fmt.Sprintf("%v/%v", len(peers), partition.ReplicaNum), strings.Join(peers, "; "))
+					}
+					if len(leaderRps) != 3 || len(partition.Hosts) != 2 {
+						stdoutRed(fmt.Sprintf("raft peer number(expected is 3, but is %v) or replica number(expected is 2, but is %v) not match ", len(leaderRps), len(partition.Hosts)))
+						continue
+					}
+					var lackAddr []string
+					for _, dn := range dns {
+						if _, ok := leaderRps[dn.ID]; ok {
+							if !contains(partition.Hosts, dn.Addr) {
+								lackAddr = append(lackAddr, dn.Addr)
+							}
+						}
+					}
+					if len(lackAddr) != 1 {
+						stdoutRed(fmt.Sprintf("Not classic partition, please check and repair it manually"))
+						continue
+					}
+					stdoutGreen(fmt.Sprintf(" The Lack Address is: %v", lackAddr))
+					if canAutoRepair {
+						sb.WriteString(fmt.Sprintf("cfs-cli datapartition add-replica %v %v\n", lackAddr[0], partition.PartitionID))
+					}
+					if optEnableAutoFullfill && canAutoRepair {
+						stdoutGreen("     Auto Repair Begin:")
+						if err = client.AdminAPI().AddDataReplica(partition.PartitionID, lackAddr[0]); err != nil {
+							stdoutRed(fmt.Sprintf("%v err:%v", "     Failed.", err))
+							continue
+						}
+						stdoutGreen("     Done.")
+						time.Sleep(2 * time.Second)
+					}
+					stdoutGreen(strings.Repeat("_ ", len(partitionInfoTableHeader)/2+20) + "\n")
 				}
 			}
+			if !optEnableAutoFullfill {
+				stdout(sb.String())
+			}
 			return
 		},
 	}
+	cmd.Flags().BoolVar(&optEnableAutoFullfill, CliFlagEnableAutoFill, false, "true - automatically full fill the missing replica")
+	cmd.Flags().BoolVar(&optCheckAll, "all", false, "true - check all partitions; false - only check partitions which lack of replica")
 	return cmd
 }
+func checkAllDataPartitions(client *master.MasterClient) (err error) {
+	var volInfo []*proto.VolInfo
+	if volInfo, err = client.AdminAPI().ListVols(""); err != nil {
+		stdout("%v\n", err)
+		return
+	}
+	stdout("\n")
+	stdout("%v\n", "[Partition peer info not valid]:")
+	stdout("%v\n", partitionInfoTableHeader)
+	for _, vol := range volInfo {
+		var volView *proto.VolView
+		if volView, err = client.ClientAPI().GetVolume(vol.Name, calcAuthKey(vol.Owner)); err != nil {
+			stdout("Found an invalid vol: %v\n", vol.Name)
+			continue
+		}
+		sort.SliceStable(volView.DataPartitions, func(i, j int) bool {
+			return volView.DataPartitions[i].PartitionID < volView.DataPartitions[j].PartitionID
+		})
+		var wg sync.WaitGroup
+		for _, dp := range volView.DataPartitions {
+			wg.Add(1)
+			go func(dp *proto.DataPartitionResponse) {
+				defer wg.Done()
+				var outPut string
+				var isHealthy bool
+				outPut, isHealthy, _ = checkDataPartition(dp.PartitionID, client)
+				if !isHealthy {
+					fmt.Printf(outPut)
+					stdoutGreen(strings.Repeat("_ ", len(partitionInfoTableHeader)/2+20) + "\n")
+				}
+			}(dp)
+		}
+		wg.Wait()
+	}
+	return
+}
+func checkDataPartition(pid uint64, client *master.MasterClient) (outPut string, isHealthy bool, err error) {
+	var partition *proto.DataPartitionInfo
+	var sb = strings.Builder{}
+	isHealthy = true
+	if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
+		sb.WriteString(fmt.Sprintf("Partition is not found, err:[%v]", err))
+		return
+	}
+	if partition != nil {
+		sb.WriteString(fmt.Sprintf("%v\n", formatDataPartitionInfoRow(partition)))
+		sort.Strings(partition.Hosts)
+		if len(partition.MissingNodes) > 0 || partition.Status == -1 || len(partition.Hosts) != int(partition.ReplicaNum) {
+			errMsg := fmt.Sprintf("The partition is not healthy according to the report message from master")
+			sb.WriteString(fmt.Sprintf("\033[1;40;31m%-8v\033[0m\n", errMsg))
+			isHealthy = false
+		}
+		var leaderRps map[uint64]*proto.ReplicaStatus
+		for _, r := range partition.Replicas {
+			var rps map[uint64]*proto.ReplicaStatus
+			var dnPartition *proto.DNDataPartitionInfo
+			var err error
+			addr := strings.Split(r.Addr, ":")[0]
+			if dnPartition, err = client.NodeAPI().DataNodeGetPartition(addr, partition.PartitionID); err != nil {
+				sb.WriteString(fmt.Sprintf(partitionInfoColorTablePattern+"\n",
+					"", "", "", fmt.Sprintf("%v", r.Addr), fmt.Sprintf("%v/%v", "nil", partition.ReplicaNum), fmt.Sprintf("get partition info failed, err:%v", err)))
+				isHealthy = false
+				continue
+			}
+			sort.Strings(dnPartition.Replicas)
+			sb.WriteString(fmt.Sprintf(partitionInfoColorTablePattern+"\n",
+				"", "", "", fmt.Sprintf("%v(hosts)", r.Addr), fmt.Sprintf("%v/%v", len(dnPartition.Replicas), partition.ReplicaNum), strings.Join(dnPartition.Replicas, "; ")))
 
+			if rps = dnPartition.RaftStatus.Replicas; rps != nil {
+				leaderRps = rps
+			}
+			peerStrings := convertPeersToArray(dnPartition.Peers)
+			sort.Strings(peerStrings)
+			sb.WriteString(fmt.Sprintf(partitionInfoColorTablePattern+"\n",
+				"", "", "", fmt.Sprintf("%v(peers)", r.Addr), fmt.Sprintf("%v/%v", len(peerStrings), partition.ReplicaNum), strings.Join(peerStrings, "; ")))
+			if !isEqualStrings(peerStrings, dnPartition.Replicas) {
+				isHealthy = false
+			}
+			if !isEqualStrings(partition.Hosts, peerStrings) {
+				isHealthy = false
+			}
+			if len(peerStrings) != int(partition.ReplicaNum) || len(dnPartition.Replicas) != int(partition.ReplicaNum) {
+				isHealthy = false
+			}
+		}
+		if len(leaderRps) == 0 {
+			isHealthy = false
+			errMsg := fmt.Sprintf("no raft leader")
+			sb.WriteString(fmt.Sprintf("\033[1;40;31m%-8v\033[0m\n", errMsg))
+		}
+	}
+	outPut = sb.String()
+	return
+}
 func newDataPartitionDecommissionCmd(client *master.MasterClient) *cobra.Command {
 	var cmd = &cobra.Command{
 		Use:   CliOpDecommission + " [ADDRESS] [DATA PARTITION ID]",
 		Short: cmdDataPartitionDecommissionShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			partitionID, err = strconv.ParseUint(args[1], 10, 64)
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
 			if err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			if err = client.AdminAPI().DecommissionDataPartition(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
@@ -209,20 +358,14 @@ func newDataPartitionReplicateCmd(client *master.MasterClient) *cobra.Command {
 		Short: cmdDataPartitionReplicateShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			if err = client.AdminAPI().AddDataReplica(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
@@ -242,20 +385,14 @@ func newDataPartitionDeleteReplicaCmd(client *master.MasterClient) *cobra.Comman
 		Short: cmdDataPartitionDeleteReplicaShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			if err = client.AdminAPI().DeleteDataReplica(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
diff --git a/cli/cmd/fmt.go b/cli/cmd/fmt.go
index caafb86600..4fa6e6bedc 100644
--- a/cli/cmd/fmt.go
+++ b/cli/cmd/fmt.go
@@ -17,6 +17,7 @@ package cmd
 import (
 	"fmt"
 	"math"
+	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -26,16 +27,18 @@ import (
 
 func formatClusterView(cv *proto.ClusterView) string {
 	var sb = strings.Builder{}
-	sb.WriteString(fmt.Sprintf("  Cluster name       : %v\n", cv.Name))
-	sb.WriteString(fmt.Sprintf("  Master leader      : %v\n", cv.LeaderAddr))
-	sb.WriteString(fmt.Sprintf("  Auto allocate      : %v\n", formatEnabledDisabled(!cv.DisableAutoAlloc)))
-	sb.WriteString(fmt.Sprintf("  MetaNode count     : %v\n", len(cv.MetaNodes)))
-	sb.WriteString(fmt.Sprintf("  MetaNode used      : %v GB\n", cv.MetaNodeStatInfo.UsedGB))
-	sb.WriteString(fmt.Sprintf("  MetaNode total     : %v GB\n", cv.MetaNodeStatInfo.TotalGB))
-	sb.WriteString(fmt.Sprintf("  DataNode count     : %v\n", len(cv.DataNodes)))
-	sb.WriteString(fmt.Sprintf("  DataNode used      : %v GB\n", cv.DataNodeStatInfo.UsedGB))
-	sb.WriteString(fmt.Sprintf("  DataNode total     : %v GB\n", cv.DataNodeStatInfo.TotalGB))
-	sb.WriteString(fmt.Sprintf("  Volume count       : %v\n", len(cv.VolStatInfo)))
+	sb.WriteString(fmt.Sprintf("  Cluster name     : %v\n", cv.Name))
+	sb.WriteString(fmt.Sprintf("  Master leader    : %v\n", cv.LeaderAddr))
+	sb.WriteString(fmt.Sprintf("  Auto allocate    : %v\n", formatEnabledDisabled(!cv.DisableAutoAlloc)))
+	sb.WriteString(fmt.Sprintf("  MetaNode count   : %v\n", len(cv.MetaNodes)))
+	sb.WriteString(fmt.Sprintf("  MetaNode used    : %v GB\n", cv.MetaNodeStatInfo.UsedGB))
+	sb.WriteString(fmt.Sprintf("  MetaNode total   : %v GB\n", cv.MetaNodeStatInfo.TotalGB))
+	sb.WriteString(fmt.Sprintf("  DataNode count   : %v\n", len(cv.DataNodes)))
+	sb.WriteString(fmt.Sprintf("  DataNode used    : %v GB\n", cv.DataNodeStatInfo.UsedGB))
+	sb.WriteString(fmt.Sprintf("  DataNode total   : %v GB\n", cv.DataNodeStatInfo.TotalGB))
+	sb.WriteString(fmt.Sprintf("  Volume count     : %v\n", len(cv.VolStatInfo)))
+	sb.WriteString(fmt.Sprintf("  Dp recover pool  : %v\n", cv.DpRecoverPool))
+	sb.WriteString(fmt.Sprintf("  Mp recover pool  : %v\n", cv.MpRecoverPool))
 	return sb.String()
 }
 
@@ -99,8 +102,8 @@ func formatSimpleVolView(svv *proto.SimpleVolView) string {
 	sb.WriteString(fmt.Sprintf("  Create time          : %v\n", svv.CreateTime))
 	sb.WriteString(fmt.Sprintf("  Authenticate         : %v\n", formatEnabledDisabled(svv.Authenticate)))
 	sb.WriteString(fmt.Sprintf("  Follower read        : %v\n", formatEnabledDisabled(svv.FollowerRead)))
-	sb.WriteString(fmt.Sprintf("  Enable token         : %v\n", formatEnabledDisabled(svv.EnableToken)))
 	sb.WriteString(fmt.Sprintf("  Cross zone           : %v\n", formatEnabledDisabled(svv.CrossZone)))
+	sb.WriteString(fmt.Sprintf("  Auto repair          : %v\n", formatEnabledDisabled(svv.AutoRepair)))
 	sb.WriteString(fmt.Sprintf("  Inode count          : %v\n", svv.InodeCount))
 	sb.WriteString(fmt.Sprintf("  Dentry count         : %v\n", svv.DentryCount))
 	sb.WriteString(fmt.Sprintf("  Max metaPartition ID : %v\n", svv.MaxMetaPartitionID))
@@ -133,6 +136,17 @@ func formatVolInfoTableRow(vi *proto.VolInfo) string {
 		formatVolumeStatus(vi.Status), time.Unix(vi.CreateTime, 0).Local().Format(time.RFC1123))
 }
 
+var (
+	volumeDetailInfoTablePattern = "%-63v    %-20v    %-30v    %-10v    %-12v    %-8v    %-8v    %-8v    %-8v    %-10v"
+	volumeDetailInfoTableHeader  = fmt.Sprintf(volumeDetailInfoTablePattern, "VOLUME", "OWNER", "ZONE NAME", "CROSS ZONE", "INODE COUNT", "DP COUNT", "USED", "TOTAL", "STATUS", "CREATE TIME")
+)
+
+func formatVolDetailInfoTableRow(vv *proto.SimpleVolView, vi *proto.VolInfo) string {
+	return fmt.Sprintf(volumeDetailInfoTablePattern,
+		vv.Name, vv.Owner, vv.ZoneName, vv.CrossZone, vv.InodeCount, vv.DpCnt, formatSize(vi.UsedSize), formatSize(vi.TotalSize),
+		formatVolumeStatus(vi.Status), time.Unix(vi.CreateTime, 0).Local().Format(time.RFC1123))
+}
+
 var (
 	dataPartitionTablePattern = "%-8v    %-8v    %-10v    %-10v     %-18v    %-18v"
 	dataPartitionTableHeader  = fmt.Sprintf(dataPartitionTablePattern,
@@ -146,19 +160,25 @@ func formatDataPartitionTableRow(view *proto.DataPartitionResponse) string {
 }
 
 var (
-	partitionInfoTablePattern = "%-8v    %-8v    %-10v     %-18v    %-18v"
-	partitionInfoTableHeader  = fmt.Sprintf(partitionInfoTablePattern,
-		"ID", "VOLUME", "REPLICAS", "STATUS", "MEMBERS")
+	partitionInfoTablePattern      = "%-8v    %-25v    %-10v    %-28v    %-10v    %-18v"
+	partitionInfoColorTablePattern = "%-8v    %-25v    %-10v    %-28v    \033[1;40;32m%-10v\033[0m    %-18v"
+	partitionInfoTableHeader       = fmt.Sprintf(partitionInfoTablePattern,
+		"ID", "VOLUME", "STATUS", "POSITION", "REPLICANUM", "HOSTS")
 )
 
 func formatDataPartitionInfoRow(partition *proto.DataPartitionInfo) string {
-	return fmt.Sprintf(partitionInfoTablePattern,
-		partition.PartitionID, partition.VolName, partition.ReplicaNum, formatDataPartitionStatus(partition.Status), strings.Join(partition.Hosts, ", "))
+	var sb = strings.Builder{}
+	sort.Strings(partition.Hosts)
+	sb.WriteString(fmt.Sprintf(partitionInfoTablePattern,
+		partition.PartitionID, partition.VolName, formatDataPartitionStatus(partition.Status), "Master", fmt.Sprintf("%v/%v", len(partition.Hosts), partition.ReplicaNum), strings.Join(partition.Hosts, "; ")))
+	return sb.String()
 }
 
 func formatMetaPartitionInfoRow(partition *proto.MetaPartitionInfo) string {
-	return fmt.Sprintf(partitionInfoTablePattern,
-		partition.PartitionID, partition.VolName, partition.ReplicaNum, formatDataPartitionStatus(partition.Status), strings.Join(partition.Hosts, ", "))
+	var sb = strings.Builder{}
+	sb.WriteString(fmt.Sprintf(partitionInfoTablePattern,
+		partition.PartitionID, partition.VolName, formatDataPartitionStatus(partition.Status), "Master", fmt.Sprintf("%v/%v", len(partition.Hosts), partition.ReplicaNum), strings.Join(partition.Hosts, "; ")))
+	return sb.String()
 }
 
 func formatDataPartitionInfo(partition *proto.DataPartitionInfo) string {
@@ -179,7 +199,7 @@ func formatDataPartitionInfo(partition *proto.DataPartitionInfo) string {
 	sb.WriteString(fmt.Sprintf("Peers :\n"))
 	sb.WriteString(fmt.Sprintf("%v\n", formatPeerTableHeader()))
 	for _, peer := range partition.Peers {
-		sb.WriteString(fmt.Sprintf("%v\n", formatPeer( peer)))
+		sb.WriteString(fmt.Sprintf("%v\n", formatPeer(peer)))
 	}
 	sb.WriteString("\n")
 	sb.WriteString(fmt.Sprintf("Hosts :\n"))
@@ -223,7 +243,7 @@ func formatMetaPartitionInfo(partition *proto.MetaPartitionInfo) string {
 	sb.WriteString("\n")
 	sb.WriteString(fmt.Sprintf("Peers :\n"))
 	for _, peer := range partition.Peers {
-		sb.WriteString(fmt.Sprintf("%v\n", formatPeer( peer)))
+		sb.WriteString(fmt.Sprintf("%v\n", formatPeer(peer)))
 	}
 	sb.WriteString("\n")
 	sb.WriteString(fmt.Sprintf("Hosts :\n"))
@@ -245,7 +265,7 @@ func formatMetaPartitionInfo(partition *proto.MetaPartitionInfo) string {
 }
 
 var (
-	metaPartitionTablePattern = "%-8v    %-12v    %-10v    %-12v    %-12v    %-12v    %-8v    %-12v    %-18v"
+	metaPartitionTablePattern = "%-8v    %-12v    %-12v    %-12v    %-12v    %-12v    %-10v    %-20v    %-18v"
 	metaPartitionTableHeader  = fmt.Sprintf(metaPartitionTablePattern,
 		"ID", "MAX INODE", "DENTRY COUNT", "INODE COUNT", "START", "END", "STATUS", "LEADER", "MEMBERS")
 )
@@ -361,11 +381,11 @@ func formatTime(timeUnix int64) string {
 	return time.Unix(timeUnix, 0).Format("2006-01-02 15:04:05")
 }
 
-func formatTimeToString(t time.Time) string{
+func formatTimeToString(t time.Time) string {
 	return t.Format("2006-01-02 15:04:05")
 }
 
-var dataReplicaTableRowPattern = "%-18v    %-6v    %-6v    %-6v    %-6v    %-6v    %-10v"
+var dataReplicaTableRowPattern = "%-20v    %-8v    %-8v    %-8v    %-12v    %-10v    %-12v"
 
 func formatDataReplicaTableHeader() string {
 	return fmt.Sprintf(dataReplicaTableRowPattern, "ADDRESS", "USED", "TOTAL", "ISLEADER", "FILECOUNT", "STATUS", "REPORT TIME")
@@ -374,7 +394,7 @@ func formatDataReplicaTableHeader() string {
 func formatDataReplica(indentation string, replica *proto.DataReplica, rowTable bool) string {
 	if rowTable {
 		return fmt.Sprintf(dataReplicaTableRowPattern, replica.Addr, formatSize(replica.Used), formatSize(replica.Total),
-		replica.IsLeader, replica.FileCount, formatDataPartitionStatus(replica.Status), formatTime(replica.ReportTime))
+			replica.IsLeader, replica.FileCount, formatDataPartitionStatus(replica.Status), formatTime(replica.ReportTime))
 	}
 	var sb = strings.Builder{}
 	sb.WriteString(fmt.Sprintf("%v- Addr           : %v\n", indentation, replica.Addr))
@@ -390,7 +410,7 @@ func formatDataReplica(indentation string, replica *proto.DataReplica, rowTable
 	return sb.String()
 }
 
-var metaReplicaTableRowPattern = "%-18v    %-6v    %-6v    %-10v"
+var metaReplicaTableRowPattern = "%-20v    %-8v    %-10v    %-12v"
 
 func formatMetaReplicaTableHeader() string {
 	return fmt.Sprintf(metaReplicaTableRowPattern, "ADDRESS", "ISLEADER", "STATUS", "REPORT TIME")
@@ -399,7 +419,7 @@ func formatMetaReplicaTableHeader() string {
 func formatMetaReplica(indentation string, replica *proto.MetaReplicaInfo, rowTable bool) string {
 	if rowTable {
 		return fmt.Sprintf(metaReplicaTableRowPattern, replica.Addr, replica.IsLeader, formatMetaPartitionStatus(replica.Status),
-		formatTime(replica.ReportTime))
+			formatTime(replica.ReportTime))
 	}
 	var sb = strings.Builder{}
 	sb.WriteString(fmt.Sprintf("%v- Addr           : %v\n", indentation, replica.Addr))
@@ -409,8 +429,6 @@ func formatMetaReplica(indentation string, replica *proto.MetaReplicaInfo, rowTa
 	return sb.String()
 }
 
-
-
 var peerTableRowPattern = "%-6v    %-18v"
 
 func formatPeerTableHeader() string {
@@ -420,7 +438,6 @@ func formatPeer(peer proto.Peer) string {
 	return fmt.Sprintf(peerTableRowPattern, peer.ID, peer.Addr)
 }
 
-
 var dataNodeDetailTableRowPattern = "%-6v    %-6v    %-18v    %-6v    %-6v    %-6v    %-10v"
 
 func formatDataNodeDetailTableHeader() string {
@@ -475,24 +492,37 @@ func formatMetaNodeDetail(mn *proto.MetaNodeInfo, rowTable bool) string {
 	return sb.String()
 }
 
-func formatZoneView(zv *proto.ZoneView) string {
-	var sb = strings.Builder{}
-	sb.WriteString(fmt.Sprintf("Zone Name:   %v\n", zv.Name))
-	sb.WriteString(fmt.Sprintf("Status:      %v\n", zv.Status))
-	sb.WriteString(fmt.Sprintf("\n"))
-	for index, ns := range zv.NodeSet {
-		sb.WriteString(fmt.Sprintf("NodeSet-%v:\n", index))
-		sb.WriteString(fmt.Sprintf("  DataNodes[%v]:\n", ns.DataNodeLen))
-		sb.WriteString(fmt.Sprintf("    %v\n", formatNodeViewTableHeader()))
-		for _, nv := range ns.DataNodes{
-			sb.WriteString(fmt.Sprintf("    %v\n", formatNodeView(&nv, true)))
+func contains(arr []string, element string) (ok bool) {
+	if arr == nil || len(arr) == 0 {
+		return
+	}
+
+	for _, e := range arr {
+		if e == element {
+			ok = true
+			break
 		}
-		sb.WriteString(fmt.Sprintf("\n"))
-		sb.WriteString(fmt.Sprintf("  MetaNodes[%v]:\n", ns.MetaNodeLen))
-		sb.WriteString(fmt.Sprintf("    %v\n", formatNodeViewTableHeader()))
-		for _, nv := range ns.MetaNodes{
-			sb.WriteString(fmt.Sprintf("    %v\n", formatNodeView(&nv, true)))
+	}
+	return
+}
+func convertPeersToArray(peers []*proto.Peer) (addrs []string) {
+	addrs = make([]string, 0)
+	for _, peer := range peers {
+		addrs = append(addrs, peer.Addr)
+	}
+	return
+}
+
+func isEqualStrings(strs1, strs2 []string) bool {
+	sort.Strings(strs1)
+	sort.Strings(strs2)
+	if len(strs1) != len(strs2) {
+		return false
+	}
+	for i, s := range strs1 {
+		if strs2[i] != s {
+			return false
 		}
 	}
-	return sb.String()
+	return true
 }
diff --git a/cli/cmd/http_service.go b/cli/cmd/http_service.go
deleted file mode 100644
index 3748ff9432..0000000000
--- a/cli/cmd/http_service.go
+++ /dev/null
@@ -1,50 +0,0 @@
-package cmd
-
-import (
-	"github.com/chubaofs/chubaofs/sdk/master"
-	"github.com/chubaofs/chubaofs/proto"
-)
-
-type clientHandler interface {
-	excuteHttp() (err error)
-}
-
-type volumeClient struct {
-	name string
-	capacity uint64
-	opCode MasterOp
-	client *master.MasterClient
-}
-
-func NewVolumeClient(opCode MasterOp, client *master.MasterClient) (vol *volumeClient){
-	vol = new(volumeClient)
-    vol.opCode = opCode
-	vol.client = client
-	return
-}
-
-func (vol *volumeClient) excuteHttp() (err error) {
-	switch vol.opCode {
-	case OpExpandVol:
-		var vv *proto.SimpleVolView
-		if vv, err = vol.client.AdminAPI().GetVolumeSimpleInfo(vol.name); err != nil {
-			return
-		}
-		if err = vol.client.AdminAPI().VolExpand(vol.name, vol.capacity, calcAuthKey(vv.Owner)); err != nil {
-			return
-		}
-	case OpShrinkVol:
-		var vv *proto.SimpleVolView
-		if vv, err = vol.client.AdminAPI().GetVolumeSimpleInfo(vol.name); err != nil {
-			return
-		}
-		if err = vol.client.AdminAPI().VolShrink(vol.name, vol.capacity, calcAuthKey(vv.Owner)); err != nil {
-			return
-		}
-	case OpDeleteVol:
-	default:
-
-	}
-
-	return
-}
diff --git a/cli/cmd/metanode.go b/cli/cmd/metanode.go
index 4459c15255..6e7f0e5bf5 100644
--- a/cli/cmd/metanode.go
+++ b/cli/cmd/metanode.go
@@ -15,6 +15,7 @@
 package cmd
 
 import (
+	"os"
 	"sort"
 	"strings"
 
@@ -58,7 +59,8 @@ func newMetaNodeListCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("List cluster meta nodes failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			var view *proto.ClusterView
@@ -99,7 +101,8 @@ func newMetaNodeInfoCmd(client *master.MasterClient) *cobra.Command {
 			var metanodeInfo *proto.MetaNodeInfo
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("Show meta node info failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			nodeAddr = args[0]
@@ -129,7 +132,8 @@ func newMetaNodeDecommissionCmd(client *master.MasterClient) *cobra.Command {
 			var nodeAddr string
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("decommission meta node failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			nodeAddr = args[0]
diff --git a/cli/cmd/metapartition.go b/cli/cmd/metapartition.go
index c735842102..aaf0b46c3b 100644
--- a/cli/cmd/metapartition.go
+++ b/cli/cmd/metapartition.go
@@ -21,6 +21,9 @@ import (
 	"github.com/spf13/cobra"
 	"sort"
 	"strconv"
+	"strings"
+	"sync"
+	"time"
 )
 
 const (
@@ -44,12 +47,12 @@ func newMetaPartitionCmd(client *master.MasterClient) *cobra.Command {
 }
 
 const (
-	cmdMetaPartitionGetShort              = "Display detail information of a meta partition"
-	cmdCheckCorruptMetaPartitionShort     = "Check out corrupt meta partitions"
-	cmdMetaPartitionDecommissionShort     = "Decommission a replication of the meta partition to a new address"
-	cmdMetaPartitionReplicateShort        = "Add a replication of the meta partition on a new address"
-	cmdMetaPartitionDeleteReplicaShort    = "Delete a replication of the meta partition on a fixed address"
-	)
+	cmdMetaPartitionGetShort           = "Display detail information of a meta partition"
+	cmdCheckCorruptMetaPartitionShort  = "Check out corrupt meta partitions"
+	cmdMetaPartitionDecommissionShort  = "Decommission a replication of the meta partition to a new address"
+	cmdMetaPartitionReplicateShort     = "Add a replication of the meta partition on a new address"
+	cmdMetaPartitionDeleteReplicaShort = "Delete a replication of the meta partition on a fixed address"
+)
 
 func newMetaPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 	var cmd = &cobra.Command{
@@ -58,16 +61,10 @@ func newMetaPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 		Args:  cobra.MinimumNArgs(1),
 		Run: func(cmd *cobra.Command, args []string) {
 			var (
-				err          error
-				partitionID  uint64
 				partition *proto.MetaPartitionInfo
 			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			if partitionID, err = strconv.ParseUint(args[0], 10, 64); err != nil {
+			partitionID, err := strconv.ParseUint(args[0], 10, 64)
+			if err != nil {
 				return
 			}
 			if partition, err = client.ClientAPI().GetMetaPartition(partitionID); err != nil {
@@ -80,6 +77,7 @@ func newMetaPartitionGetCmd(client *master.MasterClient) *cobra.Command {
 }
 
 func newListCorruptMetaPartitionCmd(client *master.MasterClient) *cobra.Command {
+	var optCheckAll bool
 	var cmd = &cobra.Command{
 		Use:   CliOpCheck,
 		Short: cmdCheckCorruptMetaPartitionShort,
@@ -91,18 +89,20 @@ the corrupt nodes, the few remaining replicas can not reach an agreement with on
 "reset" command will be released in next version.`,
 		Run: func(cmd *cobra.Command, args []string) {
 			var (
-				diagnosis     *proto.MetaPartitionDiagnosis
-				metaNodes     []*proto.MetaNodeInfo
-				err           error
+				diagnosis *proto.MetaPartitionDiagnosis
+				metaNodes []*proto.MetaNodeInfo
+				err       error
 			)
-			defer func() {
+			if optCheckAll {
+				err = checkAllMetaPartitions(client)
 				if err != nil {
-					errout("Error: %v", err)
+					stdout("%v\n", err)
 				}
-			}()
-			if diagnosis, err = client.AdminAPI().DiagnoseMetaPartition(); err != nil {
 				return
 			}
+			if diagnosis, err = client.AdminAPI().DiagnoseMetaPartition(); err != nil {
+				stdout("%v\n", err)
+			}
 			stdout("[Inactive Meta nodes]:\n")
 			stdout("%v\n", formatMetaNodeDetailTableHeader())
 			sort.SliceStable(diagnosis.InactiveMetaNodes, func(i, j int) bool {
@@ -129,65 +129,146 @@ the corrupt nodes, the few remaining replicas can not reach an agreement with on
 			for _, pid := range diagnosis.CorruptMetaPartitionIDs {
 				var partition *proto.MetaPartitionInfo
 				if partition, err = client.ClientAPI().GetMetaPartition(pid); err != nil {
-					err = fmt.Errorf("Partition not found, err:[%v] ", err)
+					stdout("Partition not found, err:[%v]", err)
 					return
 				}
 				stdout("%v\n", formatMetaPartitionInfoRow(partition))
 			}
 
 			stdout("\n")
-			stdout("%v\n", "[Meta partition lack replicas]:")
+			stdout("%v\n", "[Partition lack replicas]:")
 			stdout("%v\n", partitionInfoTableHeader)
 			sort.SliceStable(diagnosis.LackReplicaMetaPartitionIDs, func(i, j int) bool {
 				return diagnosis.LackReplicaMetaPartitionIDs[i] < diagnosis.LackReplicaMetaPartitionIDs[j]
 			})
 			for _, pid := range diagnosis.LackReplicaMetaPartitionIDs {
 				var partition *proto.MetaPartitionInfo
-				if partition, err = client.ClientAPI().GetMetaPartition( pid); err != nil {
-					err = fmt.Errorf("Partition not found, err:[%v] ", err)
+				if partition, err = client.ClientAPI().GetMetaPartition(pid); err != nil {
+					stdout("Partition not found, err:[%v]", err)
 					return
 				}
 				if partition != nil {
 					stdout("%v\n", formatMetaPartitionInfoRow(partition))
-				}
-			}
-
-			stdout("\n")
-			stdout("%v\n", "[Bad meta partitions(decommission not completed)]:")
-			badPartitionTablePattern := "%-8v    %-10v\n"
-			stdout(badPartitionTablePattern, "PATH", "PARTITION ID")
-			for _, bmpv := range diagnosis.BadMetaPartitionIDs {
-				sort.SliceStable(bmpv.PartitionIDs, func(i, j int) bool {
-					return bmpv.PartitionIDs[i] < bmpv.PartitionIDs[j]
-				})
-				for _, pid := range bmpv.PartitionIDs {
-					stdout(badPartitionTablePattern, bmpv.Path, pid)
+					sort.Strings(partition.Hosts)
+					for _, r := range partition.Replicas {
+						var mnPartition *proto.MNMetaPartitionInfo
+						var err error
+						addr := strings.Split(r.Addr, ":")[0]
+						if mnPartition, err = client.NodeAPI().MetaNodeGetPartition(addr, partition.PartitionID); err != nil {
+							fmt.Printf(partitionInfoColorTablePattern+"\n",
+								"", "", "", r.Addr, fmt.Sprintf("%v/%v", 0, partition.ReplicaNum), "no data")
+							continue
+						}
+						mnHosts := make([]string, 0)
+						for _, peer := range mnPartition.Peers {
+							mnHosts = append(mnHosts, peer.Addr)
+						}
+						sort.Strings(mnHosts)
+						fmt.Printf(partitionInfoColorTablePattern+"\n",
+							"", "", "", r.Addr, fmt.Sprintf("%v/%v", len(mnPartition.Peers), partition.ReplicaNum), strings.Join(mnHosts, "; "))
+					}
+					fmt.Printf("\033[1;40;32m%-8v\033[0m", strings.Repeat("_ ", len(partitionInfoTableHeader)/2+5)+"\n")
 				}
 			}
 			return
 		},
 	}
+	cmd.Flags().BoolVar(&optCheckAll, "all", false, "true - check all partitions; false - only check partitions which lack of replica")
 	return cmd
 }
+func checkAllMetaPartitions(client *master.MasterClient) (err error) {
+	var volInfo []*proto.VolInfo
+	if volInfo, err = client.AdminAPI().ListVols(""); err != nil {
+		stdout("%v\n", err)
+		return
+	}
+	stdout("\n")
+	stdout("%v\n", "[Partition peer info not valid]:")
+	stdout("%v\n", partitionInfoTableHeader)
+	for _, vol := range volInfo {
+		var volView *proto.VolView
+		if volView, err = client.ClientAPI().GetVolume(vol.Name, calcAuthKey(vol.Owner)); err != nil {
+			stdout("Found an invalid vol: %v\n", vol.Name)
+			continue
+		}
+		sort.SliceStable(volView.MetaPartitions, func(i, j int) bool {
+			return volView.MetaPartitions[i].PartitionID < volView.MetaPartitions[j].PartitionID
+		})
+		var wg sync.WaitGroup
+		for _, mp := range volView.MetaPartitions {
+			wg.Add(1)
+			go func(mp *proto.MetaPartitionView) {
+				defer wg.Done()
+				var outPut string
+				var isHealthy bool
+				outPut, isHealthy, _ = checkMetaPartition(mp.PartitionID, client)
+				if !isHealthy {
+					fmt.Printf(outPut)
+					stdoutGreen(strings.Repeat("_ ", len(partitionInfoTableHeader)/2+20) + "\n")
+				}
+				time.Sleep(time.Millisecond * 10)
+			}(mp)
+		}
+		wg.Wait()
+	}
+	return
+}
+func checkMetaPartition(pid uint64, client *master.MasterClient) (outPut string, isHealthy bool, err error) {
+	var partition *proto.MetaPartitionInfo
+	var sb = strings.Builder{}
+	isHealthy = true
+	if partition, err = client.ClientAPI().GetMetaPartition(pid); err != nil {
+		sb.WriteString(fmt.Sprintf("Partition is not found, err:[%v]", err))
+		return
+	}
+	if partition != nil {
+		sb.WriteString(fmt.Sprintf("%v\n", formatMetaPartitionInfoRow(partition)))
+		sort.Strings(partition.Hosts)
+		if len(partition.MissNodes) > 0 || partition.Status == -1 || len(partition.Hosts) != int(partition.ReplicaNum) {
+			errMsg := fmt.Sprintf("The partition is unhealthy according to the report message from master")
+			sb.WriteString(fmt.Sprintf("\033[1;40;31m%-8v\033[0m\n", errMsg))
+			isHealthy = false
+		}
+		for _, r := range partition.Replicas {
+			var mnPartition *proto.MNMetaPartitionInfo
+			var err error
+			addr := strings.Split(r.Addr, ":")[0]
+			if mnPartition, err = client.NodeAPI().MetaNodeGetPartition(addr, partition.PartitionID); err != nil {
+				sb.WriteString(fmt.Sprintf(partitionInfoColorTablePattern+"\n",
+					"", "", "", fmt.Sprintf("%v", r.Addr), fmt.Sprintf("%v/%v", "nil", partition.ReplicaNum), fmt.Sprintf("get partition info failed, err:%v", err)))
+				isHealthy = false
+				continue
+			}
 
+			peerStrings := convertPeersToArray(mnPartition.Peers)
+			sort.Strings(peerStrings)
+			sb.WriteString(fmt.Sprintf(partitionInfoColorTablePattern+"\n",
+				"", "", "", fmt.Sprintf("%v(peers)", r.Addr), fmt.Sprintf("%v/%v", len(peerStrings), partition.ReplicaNum), strings.Join(peerStrings, "; ")))
+			if !isEqualStrings(partition.Hosts, peerStrings) {
+				isHealthy = false
+			}
+			if len(peerStrings) != int(partition.ReplicaNum) {
+				isHealthy = false
+			}
+		}
+	}
+	outPut = sb.String()
+	return
+}
 func newMetaPartitionDecommissionCmd(client *master.MasterClient) *cobra.Command {
 	var cmd = &cobra.Command{
 		Use:   CliOpDecommission + " [ADDRESS] [META PARTITION ID]",
 		Short: cmdMetaPartitionDecommissionShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			partitionID, err = strconv.ParseUint(args[1], 10, 64)
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				stdout("%v\n", err)
+				return
+			}
 			if err = client.AdminAPI().DecommissionMetaPartition(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
@@ -207,18 +288,14 @@ func newMetaPartitionReplicateCmd(client *master.MasterClient) *cobra.Command {
 		Short: cmdMetaPartitionReplicateShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			partitionID, err = strconv.ParseUint(args[1], 10, 64)
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
+			if err != nil {
+				stdout("%v\n", err)
+				return
+			}
 			if err = client.AdminAPI().AddMetaReplica(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
@@ -238,21 +315,14 @@ func newMetaPartitionDeleteReplicaCmd(client *master.MasterClient) *cobra.Comman
 		Short: cmdMetaPartitionDeleteReplicaShort,
 		Args:  cobra.MinimumNArgs(2),
 		Run: func(cmd *cobra.Command, args []string) {
-			var (
-				err         error
-				partitionID uint64
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			address := args[0]
-			partitionID, err = strconv.ParseUint(args[1], 10, 64)
+			partitionID, err := strconv.ParseUint(args[1], 10, 64)
 			if err != nil {
+				stdout("%v\n", err)
 				return
 			}
 			if err = client.AdminAPI().DeleteMetaReplica(partitionID, address); err != nil {
+				stdout("%v\n", err)
 				return
 			}
 		},
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index 17332e848c..10adb35b02 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -16,11 +16,9 @@ package cmd
 
 import (
 	"fmt"
-	"github.com/chubaofs/chubaofs/util/log"
 	"os"
 	"path"
-
-	"github.com/chubaofs/chubaofs/proto"
+	"strings"
 
 	"github.com/chubaofs/chubaofs/sdk/master"
 	"github.com/spf13/cobra"
@@ -35,23 +33,14 @@ type ChubaoFSCmd struct {
 }
 
 func NewRootCmd(client *master.MasterClient) *ChubaoFSCmd {
-	var optShowVersion bool
 	var cmd = &ChubaoFSCmd{
 		CFSCmd: &cobra.Command{
 			Use:   path.Base(os.Args[0]),
 			Short: cmdRootShort,
 			Args:  cobra.MinimumNArgs(0),
-			Run: func(cmd *cobra.Command, args []string) {
-				if optShowVersion {
-					stdout(proto.DumpVersion("CLI"))
-					return
-				}
-			},
 		},
 	}
 
-	cmd.CFSCmd.Flags().BoolVarP(&optShowVersion, "version", "v", false, "Show version information")
-
 	cmd.CFSCmd.AddCommand(
 		cmd.newClusterCmd(client),
 		newVolCmd(client),
@@ -62,7 +51,6 @@ func NewRootCmd(client *master.MasterClient) *ChubaoFSCmd {
 		newMetaPartitionCmd(client),
 		newConfigCmd(),
 		newCompatibilityCmd(),
-		newZoneCmd(client),
 	)
 	return cmd
 }
@@ -71,13 +59,15 @@ func stdout(format string, a ...interface{}) {
 	_, _ = fmt.Fprintf(os.Stdout, format, a...)
 }
 
-func errout(format string, a ...interface{}) {
-	log.LogErrorf(format + "\n", a...)
-	_, _ = fmt.Fprintf(os.Stderr, format, a...)
-	OsExitWithLogFlush()
+func stdoutGreen(str string) {
+	fmt.Printf("\033[1;40;32m%-8v\033[0m\n", str)
 }
 
-func OsExitWithLogFlush() {
-	log.LogFlush()
-	os.Exit(1)
+func stdoutRed(str string) {
+	fmt.Printf("\033[1;40;31m%-8v\033[0m\n", str)
+	stdoutGreen(strings.Repeat("_ ", len(partitionInfoTableHeader)/2+10) + "\n")
+}
+
+func errout(format string, a ...interface{}) {
+	_, _ = fmt.Fprintf(os.Stderr, format, a...)
 }
diff --git a/cli/cmd/user.go b/cli/cmd/user.go
index 10d4b4d7ac..eaa26a40e6 100644
--- a/cli/cmd/user.go
+++ b/cli/cmd/user.go
@@ -16,6 +16,7 @@ package cmd
 
 import (
 	"fmt"
+	"os"
 	"strings"
 
 	"github.com/chubaofs/chubaofs/proto"
@@ -67,14 +68,10 @@ func newUserCreateCmd(client *master.MasterClient) *cobra.Command {
 			var accessKey = optAccessKey
 			var secretKey = optSecretKey
 			var userType = proto.UserTypeFromString(optUserType)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+
 			if !userType.Valid() {
-				err = fmt.Errorf("Invalid user type. ")
-				return
+				errout("Invalid user type.")
+				os.Exit(1)
 			}
 
 			// ask user for confirm
@@ -101,7 +98,7 @@ func newUserCreateCmd(client *master.MasterClient) *cobra.Command {
 				var userConfirm string
 				_, _ = fmt.Scanln(&userConfirm)
 				if userConfirm != "yes" && len(userConfirm) != 0 {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
 					return
 				}
 			}
@@ -115,8 +112,8 @@ func newUserCreateCmd(client *master.MasterClient) *cobra.Command {
 			}
 			var userInfo *proto.UserInfo
 			if userInfo, err = client.UserAPI().CreateUser(&param); err != nil {
-				err = fmt.Errorf("Create user failed: %v\n", err)
-				return
+				errout("Create user failed: %v\n", err)
+				os.Exit(1)
 			}
 
 			// display operation result
@@ -153,16 +150,11 @@ func newUserUpdateCmd(client *master.MasterClient) *cobra.Command {
 			var accessKey = optAccessKey
 			var secretKey = optSecretKey
 			var userType proto.UserType
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			if optUserType != "" {
 				userType = proto.UserTypeFromString(optUserType)
 				if !userType.Valid() {
-					err = fmt.Errorf("Invalid user type ")
-					return
+					errout("Invalid user type.\n")
+					os.Exit(1)
 				}
 			}
 
@@ -188,12 +180,14 @@ func newUserUpdateCmd(client *master.MasterClient) *cobra.Command {
 				var userConfirm string
 				_, _ = fmt.Scanln(&userConfirm)
 				if userConfirm != "yes" && len(userConfirm) != 0 {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
+					os.Exit(1)
 					return
 				}
 			}
 			if accessKey == "" && secretKey == "" && optUserType == "" {
-				err = fmt.Errorf("no update")
+				stdout("No update.\n")
+				os.Exit(1)
 				return
 			}
 			var param = proto.UserUpdateParam{
@@ -204,7 +198,8 @@ func newUserUpdateCmd(client *master.MasterClient) *cobra.Command {
 			}
 			var userInfo *proto.UserInfo
 			if userInfo, err = client.UserAPI().UpdateUser(&param); err != nil {
-				return
+				errout("Update user failed: %v\n", err)
+				os.Exit(1)
 			}
 
 			stdout("Update user success:\n")
@@ -234,24 +229,21 @@ func newUserDeleteCmd(client *master.MasterClient) *cobra.Command {
 		Run: func(cmd *cobra.Command, args []string) {
 			var err error
 			var userID = args[0]
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+
 			if !optYes {
 				stdout("Delete user [%v] (yes/no)[no]:", userID)
 				var userConfirm string
 				_, _ = fmt.Scanln(&userConfirm)
 				if userConfirm != "yes" {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
+					os.Exit(1)
 					return
 				}
 			}
 
 			if err = client.UserAPI().DeleteUser(userID); err != nil {
-				err = fmt.Errorf("Delete user failed:\n%v\n", err)
-				return
+				errout("Delete user failed:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout("Delete user success.\n")
 			return
@@ -282,14 +274,9 @@ func newUserInfoCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			var userID = args[0]
 			var userInfo *proto.UserInfo
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			if userInfo, err = client.UserAPI().GetUserInfo(userID); err != nil {
-				err = fmt.Errorf("Get user info failed: %v\n", err)
-				return
+				errout("Get user info failed: %v\n", err)
+				os.Exit(1)
 			}
 			printUserInfo(userInfo)
 		},
@@ -310,42 +297,28 @@ const (
 )
 
 func newUserPermCmd(client *master.MasterClient) *cobra.Command {
-	var subdir string
 	var cmd = &cobra.Command{
 		Use:   cmdUserPermUse,
 		Short: cmdUserPermShort,
 		Args:  cobra.MinimumNArgs(3),
 		Run: func(cmd *cobra.Command, args []string) {
-			var err error
 			var userID = args[0]
 			var volume = args[1]
 			var perm proto.Permission
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-
-			perm = proto.BuiltinPermissionPrefix
-			if subdir != "" && subdir != "/" {
-				perm = proto.Permission(string(perm) + subdir + ":")
-			}
-
 			switch strings.ToLower(args[2]) {
 			case "ro", "readonly":
-				perm = perm + "ReadOnly"
+				perm = proto.BuiltinPermissionReadOnly
 			case "rw", "readwrite":
-				perm = perm + "Writable"
+				perm = proto.BuiltinPermissionWritable
 			case "none":
 				perm = proto.NonePermission
 			default:
-				err = fmt.Errorf("Permission must be on of ro, rw, none ")
+				stdout("Permission must be on of ro, rw, none")
 				return
 			}
 			stdout("Setup volume permission\n")
 			stdout("  User ID   : %v\n", userID)
 			stdout("  Volume    : %v\n", volume)
-			stdout("  Subdir    : %v\n", subdir)
 			stdout("  Permission: %v\n", perm.ReadableString())
 
 			// ask user for confirm
@@ -353,9 +326,16 @@ func newUserPermCmd(client *master.MasterClient) *cobra.Command {
 			var userConfirm string
 			_, _ = fmt.Scanln(&userConfirm)
 			if userConfirm != "yes" && len(userConfirm) != 0 {
-				err = fmt.Errorf("Abort by user.\n")
+				stdout("Abort by user.\n")
 				return
 			}
+			var err error
+			defer func() {
+				if err != nil {
+					errout("Setup permission failed:\n%v\n", err)
+					os.Exit(1)
+				}
+			}()
 			var userInfo *proto.UserInfo
 			if userInfo, err = client.UserAPI().GetUserInfo(userID); err != nil {
 				return
@@ -383,7 +363,6 @@ func newUserPermCmd(client *master.MasterClient) *cobra.Command {
 			return validUsers(client, toComplete), cobra.ShellCompDirectiveNoFileComp
 		},
 	}
-	cmd.Flags().StringVar(&subdir, "subdir", "", "Subdir")
 	return cmd
 }
 
@@ -402,7 +381,8 @@ func newUserListCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("List cluster user failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			if users, err = client.UserAPI().ListUsers(optKeyword); err != nil {
@@ -437,3 +417,18 @@ func printUserInfo(userInfo *proto.UserInfo) {
 		stdout("%-20v    %-12v\n", vol, strings.Join(perms, ","))
 	}
 }
+
+func validUsers(client *master.MasterClient, toComplete string) []string {
+	var (
+		validUsers []string
+		users      []*proto.UserInfo
+		err        error
+	)
+	if users, err = client.UserAPI().ListUsers(toComplete); err != nil {
+		errout("Get user list failed:\n%v\n", err)
+	}
+	for _, user := range users {
+		validUsers = append(validUsers, user.UserID)
+	}
+	return validUsers
+}
diff --git a/cli/cmd/valid.go b/cli/cmd/valid.go
index 1878000605..b8f5ed46ea 100644
--- a/cli/cmd/valid.go
+++ b/cli/cmd/valid.go
@@ -16,19 +16,17 @@ package cmd
 
 import (
 	"github.com/chubaofs/chubaofs/proto"
-	sdk "github.com/chubaofs/chubaofs/sdk/master"
+	"github.com/chubaofs/chubaofs/sdk/master"
 )
 
-func validVols(client, complete interface{}) []string {
+func validVols(client *master.MasterClient, toComplete string) []string {
 	var (
 		validVols []string
 		vols      []*proto.VolInfo
 		err       error
 	)
-	clientSdk := client.(*sdk.MasterClient)
-	completeStr := complete.(string)
-	if vols, err = clientSdk.AdminAPI().ListVols(completeStr); err != nil {
-		errout("Error: %v", err)
+	if vols, err = client.AdminAPI().ListVols(toComplete); err != nil {
+		errout("Get volume list failed:\n%v\n", err)
 	}
 	for _, vol := range vols {
 		validVols = append(validVols, vol.Name)
@@ -36,7 +34,7 @@ func validVols(client, complete interface{}) []string {
 	return validVols
 }
 
-func validDataNodes(client *sdk.MasterClient, toComplete string) []string {
+func validDataNodes(client *master.MasterClient, toComplete string) []string {
 	var (
 		validDataNodes []string
 		clusterView    *proto.ClusterView
@@ -44,7 +42,7 @@ func validDataNodes(client *sdk.MasterClient, toComplete string) []string {
 		err error
 	)
 	if clusterView, err = client.AdminAPI().GetCluster(); err != nil {
-		errout("Error: %v", err)
+		errout("Get data node list failed:\n%v\n", err)
 	}
 	for _, dn := range clusterView.DataNodes {
 		validDataNodes = append(validDataNodes, dn.Addr)
@@ -52,47 +50,18 @@ func validDataNodes(client *sdk.MasterClient, toComplete string) []string {
 	return validDataNodes
 }
 
-func validMetaNodes(client *sdk.MasterClient, toComplete string) []string {
+func validMetaNodes(client *master.MasterClient, toComplete string) []string {
 	var (
 		validMetaNodes []string
 		clusterView    *proto.ClusterView
+
 		err error
 	)
 	if clusterView, err = client.AdminAPI().GetCluster(); err != nil {
-		errout("Error: %v", err)
+		errout("Get meta node list failed:\n%v\n", err)
 	}
 	for _, mn := range clusterView.MetaNodes {
 		validMetaNodes = append(validMetaNodes, mn.Addr)
 	}
 	return validMetaNodes
 }
-
-func validUsers(client *sdk.MasterClient, toComplete string) []string {
-	var (
-		validUsers []string
-		users      []*proto.UserInfo
-		err        error
-	)
-	if users, err = client.UserAPI().ListUsers(toComplete); err != nil {
-		errout("Error: %v", err)
-	}
-	for _, user := range users {
-		validUsers = append(validUsers, user.UserID)
-	}
-	return validUsers
-}
-
-func validZones(client *sdk.MasterClient, toComplete string) []string {
-	var (
-		validZones []string
-		zones      []*proto.ZoneView
-		err        error
-	)
-	if zones, err = client.AdminAPI().ListZones(); err != nil {
-		errout("Error: %v", err)
-	}
-	for _, zone := range zones {
-		validZones = append(validZones, zone.Name)
-	}
-	return validZones
-}
diff --git a/cli/cmd/vol.go b/cli/cmd/vol.go
index 446c7cb3f8..ec2487cbc9 100644
--- a/cli/cmd/vol.go
+++ b/cli/cmd/vol.go
@@ -18,10 +18,13 @@ import (
 	"crypto/md5"
 	"encoding/hex"
 	"fmt"
+	"os"
 	"sort"
 	"strconv"
 	"strings"
 
+	"github.com/chubaofs/chubaofs/util/errors"
+
 	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/sdk/master"
 	"github.com/spf13/cobra"
@@ -42,13 +45,11 @@ func newVolCmd(client *master.MasterClient) *cobra.Command {
 	cmd.AddCommand(
 		newVolListCmd(client),
 		newVolCreateCmd(client),
-		newVolExpandCmd(client),
-		newVolShrinkCmd(client),
-		newVolSetCmd(client),
 		newVolInfoCmd(client),
 		newVolDeleteCmd(client),
 		newVolTransferCmd(client),
 		newVolAddDPCmd(client),
+		newVolSetCmd(client),
 	)
 	return cmd
 }
@@ -59,6 +60,7 @@ const (
 
 func newVolListCmd(client *master.MasterClient) *cobra.Command {
 	var optKeyword string
+	var optDetailMod bool
 	var cmd = &cobra.Command{
 		Use:     CliOpList,
 		Short:   cmdVolListShort,
@@ -68,19 +70,34 @@ func newVolListCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("List cluster volume failed:\n%v\n", err)
+					os.Exit(1)
 				}
 			}()
 			if vols, err = client.AdminAPI().ListVols(optKeyword); err != nil {
 				return
 			}
-			stdout("%v\n", volumeInfoTableHeader)
+			if optDetailMod {
+				stdout("%v\n", volumeDetailInfoTableHeader)
+			} else {
+				stdout("%v\n", volumeInfoTableHeader)
+			}
 			for _, vol := range vols {
-				stdout("%v\n", formatVolInfoTableRow(vol))
+				var vv *proto.SimpleVolView
+				if vv, err = client.AdminAPI().GetVolumeSimpleInfo(vol.Name); err != nil {
+					return
+				}
+				if optDetailMod {
+					stdout("%v\n", formatVolDetailInfoTableRow(vv, vol))
+				} else {
+					stdout("%v\n", formatVolInfoTableRow(vol))
+				}
 			}
 		},
 	}
+	cmd.Flags().BoolVarP(&optDetailMod, "detail-mod", "d", false, "list the volumes with empty zone name")
 	cmd.Flags().StringVar(&optKeyword, "keyword", "", "Specify keyword of volume name to filter")
+
 	return cmd
 }
 
@@ -92,7 +109,7 @@ const (
 	cmdVolDefaultCapacity       = 10 // 100GB
 	cmdVolDefaultReplicas       = 3
 	cmdVolDefaultFollowerReader = true
-	cmdVolDefaultZoneName = "default"
+	cmdVolDefaultZoneName       = "default"
 )
 
 func newVolCreateCmd(client *master.MasterClient) *cobra.Command {
@@ -101,6 +118,7 @@ func newVolCreateCmd(client *master.MasterClient) *cobra.Command {
 	var optCapacity uint64
 	var optReplicas int
 	var optFollowerRead bool
+	var optAutoRepair bool
 	var optYes bool
 	var optZoneName string
 	var cmd = &cobra.Command{
@@ -111,11 +129,7 @@ func newVolCreateCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			var volumeName = args[0]
 			var userID = args[1]
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+
 			// ask user for confirm
 			if !optYes {
 				stdout("Create a new volume:\n")
@@ -126,22 +140,22 @@ func newVolCreateCmd(client *master.MasterClient) *cobra.Command {
 				stdout("  Capacity            : %v GB\n", optCapacity)
 				stdout("  Replicas            : %v\n", optReplicas)
 				stdout("  Allow follower read : %v\n", formatEnabledDisabled(optFollowerRead))
+				stdout("  Auto repair         : %v\n", formatEnabledDisabled(optAutoRepair))
+
 				stdout("  ZoneName            : %v\n", optZoneName)
 				stdout("\nConfirm (yes/no)[yes]: ")
 				var userConfirm string
 				_, _ = fmt.Scanln(&userConfirm)
 				if userConfirm != "yes" && len(userConfirm) != 0 {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
 					return
 				}
 			}
 
-			err = client.AdminAPI().CreateVolume(
-				volumeName, userID, optMPCount, optDPSize,
-				optCapacity, optReplicas, optFollowerRead, optZoneName)
+			err = client.AdminAPI().CreateVolume(volumeName, userID, optMPCount, optDPSize, optCapacity, optReplicas, optFollowerRead, optAutoRepair, optZoneName)
 			if err != nil {
-				err = fmt.Errorf("Create volume failed case:\n%v\n", err)
-				return
+				errout("Create volume failed case:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout("Create volume success.\n")
 			return
@@ -152,23 +166,31 @@ func newVolCreateCmd(client *master.MasterClient) *cobra.Command {
 	cmd.Flags().Uint64Var(&optCapacity, CliFlagCapacity, cmdVolDefaultCapacity, "Specify volume capacity [Unit: GB]")
 	cmd.Flags().IntVar(&optReplicas, CliFlagReplicas, cmdVolDefaultReplicas, "Specify data partition replicas number")
 	cmd.Flags().BoolVar(&optFollowerRead, CliFlagEnableFollowerRead, cmdVolDefaultFollowerReader, "Enable read form replica follower")
+	cmd.Flags().BoolVar(&optAutoRepair, CliFlagAutoRepair, false, "Enable auto balance partition distribution according to zoneName")
 	cmd.Flags().StringVar(&optZoneName, CliFlagZoneName, cmdVolDefaultZoneName, "Specify volume zone name")
 	cmd.Flags().BoolVarP(&optYes, "yes", "y", false, "Answer yes for all questions")
 	return cmd
 }
+
 const (
-	cmdVolSetShort           = "Set configuration of the volume"
+	cmdVolInfoUse   = "info [VOLUME NAME]"
+	cmdVolInfoShort = "Show volume information"
+	cmdVolSetShort  = "Set configuration of the volume"
 )
+
 func newVolSetCmd(client *master.MasterClient) *cobra.Command {
-	var optCapacity uint64
-	var optReplicas int
-	var optFollowerRead string
-	var optAuthenticate string
-	var optEnableToken string
-	var optZoneName string
-	var optYes bool
-	var confirmString = strings.Builder{}
-	var vv *proto.SimpleVolView
+	var (
+		optCapacity     uint64
+		optReplicas     int
+		optFollowerRead string
+		optAuthenticate string
+		optEnableToken  string
+		optAutoRepair   string
+		optZoneName     string
+		optYes          bool
+		confirmString   = strings.Builder{}
+		vv              *proto.SimpleVolView
+	)
 	var cmd = &cobra.Command{
 		Use:   CliOpSet + " [VOLUME NAME]",
 		Short: cmdVolSetShort,
@@ -190,7 +212,7 @@ func newVolSetCmd(client *master.MasterClient) *cobra.Command {
 			if optCapacity > 0 {
 				isChange = true
 				confirmString.WriteString(fmt.Sprintf("  Capacity            : %v GB -> %v GB\n", vv.Capacity, optCapacity))
-					vv.Capacity = optCapacity
+				vv.Capacity = optCapacity
 			} else {
 				confirmString.WriteString(fmt.Sprintf("  Capacity            : %v GB\n", vv.Capacity))
 			}
@@ -235,16 +257,24 @@ func newVolSetCmd(client *master.MasterClient) *cobra.Command {
 			} else {
 				confirmString.WriteString(fmt.Sprintf("  EnableToken         : %v\n", formatEnabledDisabled(vv.EnableToken)))
 			}
-			if vv.CrossZone == false && "" != optZoneName {
+			if optAutoRepair != "" {
+				isChange = true
+				var enable bool
+				if enable, err = strconv.ParseBool(optAutoRepair); err != nil {
+					return
+				}
+				confirmString.WriteString(fmt.Sprintf("  AutoRepair          : %v -> %v\n", formatEnabledDisabled(vv.AutoRepair), formatEnabledDisabled(enable)))
+				vv.AutoRepair = enable
+			} else {
+				confirmString.WriteString(fmt.Sprintf("  AutoRepair          : %v\n", formatEnabledDisabled(vv.AutoRepair)))
+			}
+			if "" != optZoneName {
 				isChange = true
 				confirmString.WriteString(fmt.Sprintf("  ZoneName            : %v -> %v\n", vv.ZoneName, optZoneName))
 				vv.ZoneName = optZoneName
 			} else {
 				confirmString.WriteString(fmt.Sprintf("  ZoneName            : %v\n", vv.ZoneName))
 			}
-			if vv.CrossZone == true && "" != optZoneName {
-				err = fmt.Errorf("Can not set zone name of the volume that cross zone\n")
-			}
 			if err != nil {
 				return
 			}
@@ -264,7 +294,7 @@ func newVolSetCmd(client *master.MasterClient) *cobra.Command {
 				}
 			}
 			err = client.AdminAPI().UpdateVolume(vv.Name, vv.Capacity, int(vv.DpReplicaNum),
-				vv.FollowerRead, vv.Authenticate, vv.EnableToken, calcAuthKey(vv.Owner), vv.ZoneName)
+				vv.FollowerRead, vv.Authenticate, vv.EnableToken, vv.AutoRepair, calcAuthKey(vv.Owner), vv.ZoneName)
 			if err != nil {
 				return
 			}
@@ -285,14 +315,10 @@ func newVolSetCmd(client *master.MasterClient) *cobra.Command {
 	cmd.Flags().StringVar(&optEnableToken, CliFlagEnableToken, "", "ReadOnly/ReadWrite token validation for fuse client")
 	cmd.Flags().StringVar(&optZoneName, CliFlagZoneName, "", "Specify volume zone name")
 	cmd.Flags().BoolVarP(&optYes, "yes", "y", false, "Answer yes for all questions")
+	cmd.Flags().StringVar(&optAutoRepair, CliFlagAutoRepair, "", "Enable auto balance partition distribution according to zoneName")
+
 	return cmd
 }
-
-const (
-	cmdVolInfoUse   = "info [VOLUME NAME]"
-	cmdVolInfoShort = "Show volume information"
-)
-
 func newVolInfoCmd(client *master.MasterClient) *cobra.Command {
 	var (
 		optMetaDetail bool
@@ -307,14 +333,10 @@ func newVolInfoCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			var volumeName = args[0]
 			var svv *proto.SimpleVolView
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
+
 			if svv, err = client.AdminAPI().GetVolumeSimpleInfo(volumeName); err != nil {
-				err = fmt.Errorf("Get volume info failed:\n%v\n", err)
-				return
+				errout("Get volume info failed:\n%v\n", err)
+				os.Exit(1)
 			}
 			// print summary info
 			stdout("Summary:\n%s\n", formatSimpleVolView(svv))
@@ -323,8 +345,8 @@ func newVolInfoCmd(client *master.MasterClient) *cobra.Command {
 			if optMetaDetail {
 				var views []*proto.MetaPartitionView
 				if views, err = client.ClientAPI().GetMetaPartitions(volumeName); err != nil {
-					err = fmt.Errorf("Get volume metadata detail information failed:\n%v\n", err)
-					return
+					errout("Get volume metadata detail information failed:\n%v\n", err)
+					os.Exit(1)
 				}
 				stdout("Meta partitions:\n")
 				stdout("%v\n", metaPartitionTableHeader)
@@ -340,8 +362,8 @@ func newVolInfoCmd(client *master.MasterClient) *cobra.Command {
 			if optDataDetail {
 				var view *proto.DataPartitionsView
 				if view, err = client.ClientAPI().GetDataPartitions(volumeName); err != nil {
-					err = fmt.Errorf("Get volume data detail information failed:\n%v\n", err)
-					return
+					errout("Get volume data detail information failed:\n%v\n", err)
+					os.Exit(1)
 				}
 				stdout("Data partitions:\n")
 				stdout("%v\n", dataPartitionTableHeader)
@@ -382,31 +404,26 @@ func newVolDeleteCmd(client *master.MasterClient) *cobra.Command {
 		Run: func(cmd *cobra.Command, args []string) {
 			var err error
 			var volumeName = args[0]
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
 			// ask user for confirm
 			if !optYes {
 				stdout("Delete volume [%v] (yes/no)[no]:", volumeName)
 				var userConfirm string
 				_, _ = fmt.Scanln(&userConfirm)
 				if userConfirm != "yes" {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
 					return
 				}
 			}
 
 			var svv *proto.SimpleVolView
 			if svv, err = client.AdminAPI().GetVolumeSimpleInfo(volumeName); err != nil {
-				err = fmt.Errorf("Delete volume failed:\n%v\n", err)
-				return
+				errout("Delete volume failed:\n%v\n", err)
+				os.Exit(1)
 			}
 
 			if err = client.AdminAPI().DeleteVolume(volumeName, calcAuthKey(svv.Owner)); err != nil {
-				err = fmt.Errorf("Delete volume failed:\n%v\n", err)
-				return
+				errout("Delete volume failed:\n%v\n", err)
+				os.Exit(1)
 			}
 			stdout("Delete volume success.\n")
 		},
@@ -441,7 +458,8 @@ func newVolTransferCmd(client *master.MasterClient) *cobra.Command {
 
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("Transfer volume [%v] to user [%v] failed: %v\n", volume, userID, err)
+					os.Exit(1)
 				}
 			}()
 
@@ -451,7 +469,7 @@ func newVolTransferCmd(client *master.MasterClient) *cobra.Command {
 				var confirm string
 				_, _ = fmt.Scanln(&confirm)
 				if confirm != "yes" {
-					err = fmt.Errorf("Abort by user.\n")
+					stdout("Abort by user.\n")
 					return
 				}
 			}
@@ -501,7 +519,8 @@ func newVolAddDPCmd(client *master.MasterClient) *cobra.Command {
 			var err error
 			defer func() {
 				if err != nil {
-					errout("Error: %v", err)
+					errout("Create data partition failed: %v\n", err)
+					os.Exit(1)
 				}
 			}()
 			var count int64
@@ -509,7 +528,7 @@ func newVolAddDPCmd(client *master.MasterClient) *cobra.Command {
 				return
 			}
 			if count < 1 {
-				err = fmt.Errorf("number must be larger than 0")
+				err = errors.New("number must be larger than 0")
 				return
 			}
 			if err = client.AdminAPI().CreateDataPartition(volume, int(count)); err != nil {
@@ -527,56 +546,6 @@ func newVolAddDPCmd(client *master.MasterClient) *cobra.Command {
 	return cmd
 }
 
-const (
-	cmdExpandVolCmdShort = "Expand capacity of a volume"
-	cmdShrinkVolCmdShort = "Shrink capacity of a volume"
-)
-
-func newVolExpandCmd(client *master.MasterClient) *cobra.Command {
-	volClient := NewVolumeClient(OpExpandVol, client)
-	return newVolSetCapacityCmd(CliOpExpand, cmdExpandVolCmdShort, volClient)
-}
-
-func newVolShrinkCmd(client *master.MasterClient) *cobra.Command {
-	volClient := NewVolumeClient(OpShrinkVol, client)
-	return newVolSetCapacityCmd(CliOpShrink, cmdShrinkVolCmdShort, volClient)
-}
-
-func newVolSetCapacityCmd(use, short string, r clientHandler) *cobra.Command {
-	var cmd = &cobra.Command{
-		Use:   use + " [VOLUME] [CAPACITY]",
-		Short: short,
-		Args:  cobra.MinimumNArgs(2),
-		Run: func(cmd *cobra.Command, args []string) {
-			var name = args[0]
-			var capacityStr = args[1]
-			var err error
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			volume := r.(*volumeClient)
-			if volume.capacity, err = strconv.ParseUint(capacityStr, 10, 64); err != nil {
-				return
-			}
-			volume.name = name
-			if err = volume.excuteHttp(); err != nil {
-				return
-			}
-			return
-		},
-		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
-			if len(args) != 0 {
-				return nil, cobra.ShellCompDirectiveNoFileComp
-			}
-			volume := r.(*volumeClient)
-			return validVols(volume.client, toComplete), cobra.ShellCompDirectiveNoFileComp
-		},
-	}
-	return cmd
-}
-
 func calcAuthKey(key string) (authKey string) {
 	h := md5.New()
 	_, _ = h.Write([]byte(key))
diff --git a/cli/cmd/zone.go b/cli/cmd/zone.go
deleted file mode 100644
index d0258d6596..0000000000
--- a/cli/cmd/zone.go
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2018 The Chubao Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-// implied. See the License for the specific language governing
-// permissions and limitations under the License.
-
-package cmd
-
-import (
-	"fmt"
-	"github.com/chubaofs/chubaofs/proto"
-	sdk "github.com/chubaofs/chubaofs/sdk/master"
-	"github.com/spf13/cobra"
-)
-
-const (
-	cmdZoneUse   = "zone [COMMAND]"
-	cmdZoneShort = "Manage zone"
-)
-
-func newZoneCmd(client *sdk.MasterClient) *cobra.Command {
-	var cmd = &cobra.Command{
-		Use:     cmdZoneUse,
-		Short:   cmdZoneShort,
-		Args:    cobra.MinimumNArgs(0),
-	}
-	cmd.AddCommand(
-		newZoneListCmd(client),
-		newZoneInfoCmd(client),
-	)
-	return cmd
-}
-
-const (
-	cmdZoneListShort = "List cluster zones"
-	cmdZoneInfoShort = "Show zone information"
-)
-
-func newZoneListCmd(client *sdk.MasterClient) *cobra.Command {
-	var cmd = &cobra.Command{
-		Use:     CliOpList,
-		Short:   cmdZoneListShort,
-		Aliases: []string{"ls"},
-		Run: func(cmd *cobra.Command, args []string) {
-			var zones []*proto.ZoneView
-			var err error
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			if zones, err = client.AdminAPI().ListZones(); err != nil {
-				return
-			}
-			zoneTablePattern := "%-8v    %-10v\n"
-			stdout(zoneTablePattern, "ZONE", "STATUS")
-			for _, zone := range zones {
-				stdout(zoneTablePattern, zone.Name, zone.Status)
-			}
-			return
-		},
-	}
-	return cmd
-}
-
-
-func newZoneInfoCmd(client *sdk.MasterClient) *cobra.Command {
-	var cmd = &cobra.Command{
-		Use:     CliOpInfo + " [NAME]",
-		Short:   cmdZoneInfoShort,
-		Args:  cobra.MinimumNArgs(1),
-		Run: func(cmd *cobra.Command, args []string) {
-			var topo *proto.TopologyView
-			var (
-				err      error
-				zoneName string
-				zoneView *proto.ZoneView
-			)
-			defer func() {
-				if err != nil {
-					errout("Error: %v", err)
-				}
-			}()
-			zoneName = args[0]
-			if topo, err = client.AdminAPI().Topo(); err != nil {
-				return
-			}
-
-			for _, zone := range topo.Zones {
-				if zoneName == zone.Name {
-					zoneView = zone
-				}
-			}
-			if zoneView == nil {
-				err = fmt.Errorf("Zone[%v] not exists in cluster\n ", zoneName)
-				return
-			}
-			stdout(formatZoneView(zoneView))
-			return
-		},
-		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
-			if len(args) != 0 {
-				return nil, cobra.ShellCompDirectiveNoFileComp
-			}
-			return validZones(client, toComplete), cobra.ShellCompDirectiveNoFileComp
-		},
-	}
-	return cmd
-}
diff --git a/datanode/data_partition_repair.go b/datanode/data_partition_repair.go
index 39309a85d7..59aa633ac0 100644
--- a/datanode/data_partition_repair.go
+++ b/datanode/data_partition_repair.go
@@ -413,7 +413,7 @@ func (dp *DataPartition) notifyFollower(wg *sync.WaitGroup, index int, members [
 	if err = p.WriteToConn(conn); err != nil {
 		return err
 	}
-	if err = p.ReadFromConn(conn, proto.NoReadDeadlineTime); err != nil {
+	if err = p.ReadFromConn(conn, proto.MaxWaitFollowerRepairTime); err != nil {
 		return err
 	}
 	return err
diff --git a/datanode/disk.go b/datanode/disk.go
index e092499bc4..aea77d1f84 100644
--- a/datanode/disk.go
+++ b/datanode/disk.go
@@ -26,10 +26,11 @@ import (
 	"syscall"
 	"time"
 
+	"os"
+
 	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/util/exporter"
 	"github.com/chubaofs/chubaofs/util/log"
-	"os"
 )
 
 var (
diff --git a/datanode/partition.go b/datanode/partition.go
index 53f335b4aa..9d0c9d923a 100644
--- a/datanode/partition.go
+++ b/datanode/partition.go
@@ -794,6 +794,10 @@ func (dp *DataPartition) doStreamFixTinyDeleteRecord(repairTask *DataPartitionRe
 
 // ChangeRaftMember is a wrapper function of changing the raft member.
 func (dp *DataPartition) ChangeRaftMember(changeType raftProto.ConfChangeType, peer raftProto.Peer, context []byte) (resp interface{}, err error) {
+	log.LogErrorf("[DataPartition->ChangeRaftMember] [partitionID: %v] start [changeType: %v, peer: %v]", dp.partitionID, changeType, peer)
+	defer func() {
+		log.LogErrorf("[DataPartition->ChangeRaftMember] [partitionID: %v] finish [changeType: %v, peer: %v]", dp.partitionID, changeType, peer)
+	}()
 	resp, err = dp.raftPartition.ChangeMember(changeType, peer, context)
 	return
 }
diff --git a/datanode/partition_raft.go b/datanode/partition_raft.go
index 9198587347..1e7013d138 100644
--- a/datanode/partition_raft.go
+++ b/datanode/partition_raft.go
@@ -361,8 +361,12 @@ func (dp *DataPartition) removeRaftNode(req *proto.RemoveDataPartitionRaftMember
 	}
 	dp.config.Peers = append(dp.config.Peers[:peerIndex], dp.config.Peers[peerIndex+1:]...)
 	if dp.config.NodeID == req.RemovePeer.ID && !dp.isLoadingDataPartition && canRemoveSelf {
-		dp.raftPartition.Delete()
-		dp.Disk().space.DeletePartition(dp.partitionID)
+		if req.ReserveResource {
+			dp.Disk().space.DeletePartitionFromCache(dp.partitionID)
+		} else {
+			dp.raftPartition.Expired()
+			dp.Disk().space.ExpiredPartition(dp.partitionID)
+		}
 		isUpdated = false
 	}
 	log.LogInfof("Fininsh RemoveRaftNode  PartitionID(%v) nodeID(%v)  do RaftLog (%v) ",
@@ -475,6 +479,7 @@ func (s *DataNode) startRaftServer(cfg *config.Config) (err error) {
 	raftConf := &raftstore.Config{
 		NodeID:            s.nodeID,
 		RaftPath:          s.raftDir,
+		TickInterval:      s.tickInterval,
 		IPAddr:            LocalIP,
 		HeartbeatPort:     heartbeatPort,
 		ReplicaPort:       replicatePort,
diff --git a/datanode/partition_raftfsm.go b/datanode/partition_raftfsm.go
index c5483b40d6..cb6719846f 100644
--- a/datanode/partition_raftfsm.go
+++ b/datanode/partition_raftfsm.go
@@ -44,6 +44,8 @@ func (dp *DataPartition) ApplyMemberChange(confChange *raftproto.ConfChange, ind
 		dp.uploadApplyID(index)
 	}(index)
 
+	defer log.LogErrorf("[DataPartition->ApplyMemberChange] [partitionID: %v] finish apply [index: %v, changeType: %v, peer: %v]",
+		dp.partitionID, index, confChange.Type, confChange.Peer)
 	// Change memory the status
 	var (
 		isUpdated bool
diff --git a/datanode/server.go b/datanode/server.go
index 2a9e37f00f..fa43a95cd4 100644
--- a/datanode/server.go
+++ b/datanode/server.go
@@ -65,14 +65,15 @@ const (
 )
 
 const (
-	ConfigKeyLocalIP       = "localIP"       // string
-	ConfigKeyPort          = "port"          // int
-	ConfigKeyMasterAddr    = "masterAddr"    // array
-	ConfigKeyZone          = "zoneName"      // string
-	ConfigKeyDisks         = "disks"         // array
-	ConfigKeyRaftDir       = "raftDir"       // string
-	ConfigKeyRaftHeartbeat = "raftHeartbeat" // string
-	ConfigKeyRaftReplica   = "raftReplica"   // string
+	ConfigKeyLocalIP       = "localIP"        // string
+	ConfigKeyPort          = "port"           // int
+	ConfigKeyMasterAddr    = "masterAddr"     // array
+	ConfigKeyZone          = "zoneName"       // string
+	ConfigKeyDisks         = "disks"          // array
+	ConfigKeyRaftDir       = "raftDir"        // string
+	ConfigKeyRaftHeartbeat = "raftHeartbeat"  // string
+	ConfigKeyRaftReplica   = "raftReplica"    // string
+	cfgTickIntervalMs      = "tickIntervalMs" // int
 )
 
 // DataNode defines the structure of a data node.
@@ -88,6 +89,7 @@ type DataNode struct {
 	raftHeartbeat   string
 	raftReplica     string
 	raftStore       raftstore.RaftStore
+	tickInterval    int
 
 	tcpListener net.Listener
 	stopC       chan bool
@@ -197,6 +199,12 @@ func (s *DataNode) parseConfig(cfg *config.Config) (err error) {
 		s.zoneName = DefaultZoneName
 	}
 
+	s.tickInterval = int(cfg.GetFloat(cfgTickIntervalMs))
+	if s.tickInterval <= 300 {
+		log.LogWarnf("get config [%s]:[%v] less than 300 so set it to 500 ", cfgTickIntervalMs, cfg.GetString(cfgTickIntervalMs))
+		s.tickInterval = 500
+	}
+
 	log.LogDebugf("action[parseConfig] load masterAddrs(%v).", MasterClient.Nodes())
 	log.LogDebugf("action[parseConfig] load port(%v).", s.port)
 	log.LogDebugf("action[parseConfig] load zoneName(%v).", s.zoneName)
diff --git a/datanode/server_handler.go b/datanode/server_handler.go
index 28e082d058..d76a83a1a2 100644
--- a/datanode/server_handler.go
+++ b/datanode/server_handler.go
@@ -181,6 +181,7 @@ func (s *DataNode) getPartitionAPI(w http.ResponseWriter, r *http.Request) {
 		Files                []*storage.ExtentInfo `json:"extents"`
 		FileCount            int                   `json:"fileCount"`
 		Replicas             []string              `json:"replicas"`
+		Peers                []proto.Peer          `json:"peers"`
 		TinyDeleteRecordSize int64                 `json:"tinyDeleteRecordSize"`
 		RaftStatus           *raft.Status          `json:"raftStatus"`
 	}{
@@ -195,6 +196,7 @@ func (s *DataNode) getPartitionAPI(w http.ResponseWriter, r *http.Request) {
 		Replicas:             partition.Replicas(),
 		TinyDeleteRecordSize: tinyDeleteRecordSize,
 		RaftStatus:           partition.raftPartition.Status(),
+		Peers:                partition.config.Peers,
 	}
 	s.buildSuccessResp(w, result)
 }
diff --git a/datanode/space_manager.go b/datanode/space_manager.go
index c5c0be5279..d3de1a3d4f 100644
--- a/datanode/space_manager.go
+++ b/datanode/space_manager.go
@@ -16,15 +16,18 @@ package datanode
 
 import (
 	"fmt"
+	"path"
+	"strconv"
 	"sync"
 	"time"
 
+	"math"
+	"os"
+
 	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/raftstore"
 	"github.com/chubaofs/chubaofs/util"
 	"github.com/chubaofs/chubaofs/util/log"
-	"math"
-	"os"
 )
 
 // SpaceManager manages the disk space.
@@ -300,6 +303,51 @@ func (manager *SpaceManager) DeletePartition(dpID uint64) {
 	os.RemoveAll(dp.Path())
 }
 
+// ExpiredPartition marks specified partition as expired.
+// It renames data path to a new name which add 'expired_' as prefix and operation timestamp as suffix.
+// (e.g. '/disk0/datapartition_1_128849018880' to '/disk0/deleted_datapartition_1_128849018880_1600054521')
+func (manager *SpaceManager) ExpiredPartition(partitionID uint64) {
+	dp := manager.Partition(partitionID)
+	if dp == nil {
+		return
+	}
+	manager.partitionMutex.Lock()
+	delete(manager.partitions, partitionID)
+	manager.partitionMutex.Unlock()
+	dp.Stop()
+	dp.Disk().DetachDataPartition(dp)
+	var currentPath = path.Clean(dp.Path())
+	var newPath = path.Join(path.Dir(currentPath),
+		ExpiredPartitionPrefix+path.Base(currentPath)+"_"+strconv.FormatInt(time.Now().Unix(), 10))
+	if err := os.Rename(currentPath, newPath); err != nil {
+		log.LogErrorf("ExpiredPartition: mark expired partition fail: volume(%v) partitionID(%v) path(%v) newPath(%v) err(%v)",
+			dp.volumeID,
+			dp.partitionID,
+			dp.path,
+			newPath,
+			err)
+		return
+	}
+	log.LogInfof("ExpiredPartition: mark expired partition: volume(%v) partitionID(%v) path(%v) newPath(%v)",
+		dp.volumeID,
+		dp.partitionID,
+		dp.path,
+		newPath)
+}
+
+// DeletePartition deletes a partition from cache based on the partition id.
+func (manager *SpaceManager) DeletePartitionFromCache(dpID uint64) {
+	dp := manager.Partition(dpID)
+	if dp == nil {
+		return
+	}
+	manager.partitionMutex.Lock()
+	delete(manager.partitions, dpID)
+	manager.partitionMutex.Unlock()
+	dp.Stop()
+	dp.Disk().DetachDataPartition(dp)
+}
+
 func (s *DataNode) buildHeartBeatResponse(response *proto.DataNodeHeartbeatResponse) {
 	response.Status = proto.TaskSucceeds
 	stat := s.space.Stats()
diff --git a/datanode/wrap_operator.go b/datanode/wrap_operator.go
index b96dd6d4ce..039982986a 100644
--- a/datanode/wrap_operator.go
+++ b/datanode/wrap_operator.go
@@ -249,7 +249,7 @@ func (s *DataNode) handlePacketToDeleteDataPartition(p *repl.Packet) {
 		if err != nil {
 			return
 		} else {
-			s.space.DeletePartition(request.PartitionId)
+			s.space.ExpiredPartition(request.PartitionId)
 		}
 	} else {
 		err = fmt.Errorf("illegal opcode ")
@@ -939,7 +939,6 @@ func (s *DataNode) handlePacketToRemoveDataPartitionRaftMember(p *repl.Packet) {
 	if err = decode.Decode(adminTask); err != nil {
 		return
 	}
-
 	reqData, err = json.Marshal(adminTask.Request)
 	p.AddMesgLog(string(reqData))
 	if err != nil {
@@ -948,7 +947,7 @@ func (s *DataNode) handlePacketToRemoveDataPartitionRaftMember(p *repl.Packet) {
 	if err = json.Unmarshal(reqData, req); err != nil {
 		return
 	}
-
+	req.ReserveResource = adminTask.ReserveResource
 	dp := s.space.Partition(req.PartitionId)
 	if dp == nil {
 		return
diff --git a/docker/conf/datanode.json b/docker/conf/datanode.json
index b527a88037..1dd52993dc 100644
--- a/docker/conf/datanode.json
+++ b/docker/conf/datanode.json
@@ -7,7 +7,8 @@
   "raftDir": "/cfs/log",
   "consulAddr": "http://192.168.0.101:8500",
   "exporterPort": 9500,
-  "cell": "cell-01",
+  "tickIntervalMs": 500,
+  "zoneName": "zone-01",
   "logDir": "/cfs/log",
   "logLevel": "info",
   "disks": [
diff --git a/docker/conf/datanode2.json b/docker/conf/datanode2.json
new file mode 100644
index 0000000000..5d06c68dae
--- /dev/null
+++ b/docker/conf/datanode2.json
@@ -0,0 +1,22 @@
+{
+  "role": "datanode",
+  "listen": "17310",
+  "prof": "17320",
+  "raftHeartbeat": "17330",
+  "raftReplica": "17340",
+  "raftDir": "/cfs/log",
+  "consulAddr": "http://192.168.0.101:8500",
+  "exporterPort": 9500,
+  "tickIntervalMs": 500,
+  "zoneName": "zone-02",
+  "logDir": "/cfs/log",
+  "logLevel": "info",
+  "disks": [
+    "/cfs/disk:10737418240"
+  ],
+  "masterAddr": [
+    "192.168.0.11:17010",
+    "192.168.0.12:17010",
+    "192.168.0.13:17010"
+  ]
+}
diff --git a/docker/conf/datanode3.json b/docker/conf/datanode3.json
new file mode 100644
index 0000000000..f91e84db1b
--- /dev/null
+++ b/docker/conf/datanode3.json
@@ -0,0 +1,22 @@
+{
+  "role": "datanode",
+  "listen": "17310",
+  "prof": "17320",
+  "raftHeartbeat": "17330",
+  "raftReplica": "17340",
+  "raftDir": "/cfs/log",
+  "consulAddr": "http://192.168.0.101:8500",
+  "exporterPort": 9500,
+  "tickIntervalMs": 500,
+  "zoneName": "zone-03",
+  "logDir": "/cfs/log",
+  "logLevel": "info",
+  "disks": [
+    "/cfs/disk:10737418240"
+  ],
+  "masterAddr": [
+    "192.168.0.11:17010",
+    "192.168.0.12:17010",
+    "192.168.0.13:17010"
+  ]
+}
diff --git a/docker/conf/metanode.json b/docker/conf/metanode.json
index 4b2e5de0c7..f34b3ff503 100644
--- a/docker/conf/metanode.json
+++ b/docker/conf/metanode.json
@@ -6,9 +6,11 @@
   "raftReplicaPort": "17240",
   "consulAddr": "http://192.168.0.101:8500",
   "exporterPort": 9500,
+  "tickIntervalMs": 500,
   "logLevel": "info",
   "logDir": "/cfs/log",
   "warnLogDir": "/cfs/log",
+  "zoneName": "zone-01",
   "totalMem": "536870912",
   "metadataDir": "/cfs/data/meta",
   "raftDir": "/cfs/data/raft",
diff --git a/docker/conf/metanode2.json b/docker/conf/metanode2.json
new file mode 100644
index 0000000000..e1bd8f69c9
--- /dev/null
+++ b/docker/conf/metanode2.json
@@ -0,0 +1,22 @@
+{
+  "role": "metanode",
+  "listen": "17210",
+  "prof": "17220",
+  "raftHeartbeatPort": "17230",
+  "raftReplicaPort": "17240",
+  "consulAddr": "http://192.168.0.101:8500",
+  "exporterPort": 9500,
+  "tickIntervalMs": 500,
+  "logLevel": "info",
+  "logDir": "/cfs/log",
+  "warnLogDir": "/cfs/log",
+  "zoneName": "zone-02",
+  "totalMem": "536870912",
+  "metadataDir": "/cfs/data/meta",
+  "raftDir": "/cfs/data/raft",
+  "masterAddr": [
+    "192.168.0.11:17010",
+    "192.168.0.12:17010",
+    "192.168.0.13:17010"
+  ]
+}
diff --git a/docker/conf/metanode3.json b/docker/conf/metanode3.json
new file mode 100644
index 0000000000..93be973434
--- /dev/null
+++ b/docker/conf/metanode3.json
@@ -0,0 +1,22 @@
+{
+  "role": "metanode",
+  "listen": "17210",
+  "prof": "17220",
+  "raftHeartbeatPort": "17230",
+  "raftReplicaPort": "17240",
+  "consulAddr": "http://192.168.0.101:8500",
+  "exporterPort": 9500,
+  "tickIntervalMs": 500,
+  "logLevel": "info",
+  "logDir": "/cfs/log",
+  "warnLogDir": "/cfs/log",
+  "zoneName": "zone-03",
+  "totalMem": "536870912",
+  "metadataDir": "/cfs/data/meta",
+  "raftDir": "/cfs/data/raft",
+  "masterAddr": [
+    "192.168.0.11:17010",
+    "192.168.0.12:17010",
+    "192.168.0.13:17010"
+  ]
+}
diff --git a/docker/docker-compose-multi-zone.yaml b/docker/docker-compose-multi-zone.yaml
new file mode 100644
index 0000000000..02f7894be1
--- /dev/null
+++ b/docker/docker-compose-multi-zone.yaml
@@ -0,0 +1,572 @@
+version: '2.1'
+
+networks:
+  extnetwork:
+    ipam:
+      config:
+        - subnet: 192.168.0.0/24
+          gateway: 192.168.0.1
+
+services:
+  monitor:
+    image: chubaofs/cfs-base:1.1
+    depends_on:
+      - consul
+      - prometheus
+      - grafana
+    networks:
+      extnetwork:
+
+  servers:
+    image: chubaofs/cfs-base:1.1
+    depends_on:
+      - master1
+      - master2
+      - master3
+      - metanode1
+      - metanode2
+      - metanode3
+      - metanode4
+      - metanode5
+      - metanode6
+      - metanode7
+      - datanode1
+      - datanode2
+      - datanode3
+      - datanode4
+      - datanode5
+      - datanode6
+      - datanode7
+      - objectnode1
+      - objectnode2
+      - objectnode3
+      - console1
+      - nginx
+    networks:
+      extnetwork:
+
+  master1:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "5901"
+      - "5902"
+      - "17010"
+      - "17020"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/master1/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/master1/log:/cfs/log
+      - ./conf/master1.json:/cfs/conf/master.json
+      - ./script/start_master.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.11
+
+  master2:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "5901"
+      - "5902"
+      - "17010"
+      - "17020"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/master2/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/master2/log:/cfs/log
+      - ./conf/master2.json:/cfs/conf/master.json
+      - ./script/start_master.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.12
+  master3:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "5901"
+      - "5902"
+      - "17010"
+      - "17020"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/master3/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/master3/log:/cfs/log
+      - ./conf/master3.json:/cfs/conf/master.json
+      - ./script/start_master.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.13
+
+  metanode1:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode1/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode1/log:/cfs/log
+      - ./conf/metanode.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.21
+
+  metanode2:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode2/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode2/log:/cfs/log
+      - ./conf/metanode.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.22
+
+  metanode3:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode3/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode3/log:/cfs/log
+      - ./conf/metanode.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.23
+
+  metanode4:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode4/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode4/log:/cfs/log
+      - ./conf/metanode2.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.24
+
+  metanode5:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode5/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode5/log:/cfs/log
+      - ./conf/metanode2.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.25
+
+  metanode6:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode6/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode6/log:/cfs/log
+      - ./conf/metanode3.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.26
+
+  metanode7:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17210"
+      - "17220"
+      - "17230"
+      - "17240"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/metanode7/data:/cfs/data
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/metanode7/log:/cfs/log
+      - ./conf/metanode3.json:/cfs/conf/metanode.json
+      - ./script/start_meta.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.27
+
+  datanode1:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode1/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode1/log:/cfs/log
+      - ./conf/datanode.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/bash /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.31
+
+  datanode2:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode2/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode2/log:/cfs/log
+      - ./conf/datanode.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.32
+
+  datanode3:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode3/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode3/log:/cfs/log
+      - ./conf/datanode.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.33
+
+  datanode4:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode4/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode4/log:/cfs/log
+      - ./conf/datanode2.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.34
+
+  datanode5:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode5/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode5/log:/cfs/log
+      - ./conf/datanode2.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.35
+
+  datanode6:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode6/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode6/log:/cfs/log
+      - ./conf/datanode3.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.36
+
+  datanode7:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "17310"
+      - "17320"
+      - "17330"
+      - "17340"
+      - 9500
+    volumes:
+      - ${DiskPath:-./docker_data}/datanode7/disk:/cfs/disk
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/datanode7/log:/cfs/log
+      - ./conf/datanode3.json:/cfs/conf/datanode.json
+      - ./script/start_datanode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.37
+
+  objectnode1:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "80"
+      - 9500
+    volumes:
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/objectnode1/log:/cfs/log
+      - ./conf/objectnode.json:/cfs/conf/objectnode.json
+      - ./script/start_objectnode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    environment:
+      - TZ=Asia/Shanghai
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.41
+
+  objectnode2:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "80"
+      - 9500
+    volumes:
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/objectnode2/log:/cfs/log
+      - ./conf/objectnode.json:/cfs/conf/objectnode.json
+      - ./script/start_objectnode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    environment:
+      - TZ=Asia/Shanghai
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.42
+
+  objectnode3:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "80"
+      - 9500
+    volumes:
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/objectnode3/log:/cfs/log
+      - ./conf/objectnode.json:/cfs/conf/objectnode.json
+      - ./script/start_objectnode.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    environment:
+      - TZ=Asia/Shanghai
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.43
+
+
+  console1:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - "80"
+    volumes:
+      - ./bin:/cfs/bin:ro
+      - ${DiskPath:-./docker_data}/console/log:/cfs/log
+      - ./conf/console.json:/cfs/conf/console.json
+      - ./script/start_console.sh:/cfs/script/start.sh
+    command: /bin/sh /cfs/script/start.sh
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.50
+
+  client:
+    image: chubaofs/cfs-base:1.1
+    ports:
+      - 9500
+    volumes:
+      - ./bin:/cfs/bin:ro
+      - ./conf/hosts:/etc/hosts:ro
+      - ./conf/client.json:/cfs/conf/client.json
+      - ${DiskPath:-./docker_data}/client/log:/cfs/log
+      - ./script/run_test.sh:/cfs/script/start.sh
+      - ./script/start_client.sh:/cfs/script/start_client.sh
+      - ./ltp/runtest/fs:/opt/ltp/runtest/fs
+      - ./s3tests:/opt/s3tests:ro
+    privileged: true
+    devices:
+      - /dev/fuse:/dev/fuse:rwm
+    cap_add:
+      - SYS_ADMIN
+    command: /bin/bash /cfs/script/start.sh
+    networks:
+      extnetwork:
+
+  consul:
+    image: consul:1.5
+    ports:
+      - 8500:8500
+    volumes:
+      - ./monitor:/monitor
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.101
+
+  prometheus:
+    image: prom/prometheus
+    ports:
+      - 9090:9090
+    volumes:
+      - ./monitor/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.102
+
+  grafana:
+    image: grafana/grafana:6.4.4
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=123456
+    ports:
+      - 3000:3000
+    volumes:
+      - ./monitor/grafana/grafana.ini:/etc/grafana/grafna.ini
+      - ./monitor/grafana/provisioning:/etc/grafana/provisioning
+      - ./monitor/grafana/init.sh:/grafana/init.sh
+    privileged: true
+    #command: /bin/bash
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.103
+
+  nginx:
+    image: nginx:1.17.8
+    ports:
+      - "80:80"
+    volumes:
+      - ./conf/nginx.conf:/etc/nginx/nginx.conf:ro
+    command: /bin/bash -c "nginx -g 'daemon off;'"
+    restart: on-failure
+    privileged: true
+    networks:
+      extnetwork:
+        ipv4_address: 192.168.0.104
+
+  build:
+    image: chubaofs/cfs-base:1.1
+    volumes:
+      - ../:/go/src/github.com/chubaofs/chubaofs
+    command:
+      /bin/bash /go/src/github.com/chubaofs/chubaofs/docker/script/build.sh
+    networks:
+      extnetwork:
+
+  unit_test:
+    image: chubaofs/cfs-base:1.1
+    volumes:
+      - ../:/go/src/github.com/chubaofs/chubaofs
+    command:
+      - bash
+      - "-c"
+      - >-
+        set -e;
+        mkdir -p /go/src/github.com/chubaofs/chubaofs/docker/bin &&
+        cd /go/src/github.com/chubaofs/chubaofs && make test
+    networks:
+      extnetwork:
diff --git a/docker/script/run_test.sh b/docker/script/run_test.sh
index 8455abe26c..7906346929 100755
--- a/docker/script/run_test.sh
+++ b/docker/script/run_test.sh
@@ -31,8 +31,10 @@ AuthKey="0e20229116d5a9a4a9e876806b514a85"
 init_cli() {
     cp ${cli} /usr/bin/
     cd ${conf_path}
-    ${cli} completion
+    ${cli} completion &> /dev/null
     echo 'source '${conf_path}'/cfs-cli.sh' >> ~/.bashrc
+    echo -n "cli tool  ... "
+    echo -e "\033[32mdone\033[0m"
 }
 check_cluster() {
     echo -n "Checking cluster  ... "
@@ -55,7 +57,7 @@ ensure_node_writable() {
     for i in $(seq 1 300) ; do
         ${cli} ${node} list &> /tmp/cli_${node}_list;
         res=`cat /tmp/cli_${node}_list | grep "Yes" | grep "Active" | wc -l`
-        if [[ ${res} -eq 4 ]]; then
+        if [[ ${res} -ge 4 ]]; then
             echo -e "\033[32mdone\033[0m"
             return
         fi
@@ -95,7 +97,7 @@ create_volume() {
         echo -e "\033[32mdone\033[0m"
         return
     fi
-    ${cli} volume create ${VolName} ${Owner} --capacity=30 -y > /dev/null
+    ${cli} volume create ${VolName} ${Owner} --zonename=zone-01 --capacity=30 -y > /dev/null
     if [[ $? -ne 0 ]]; then
         echo -e "\033[31mfail\033[0m"
         exit 1
@@ -146,7 +148,7 @@ print_error_info() {
 start_client() {
     echo -n "Starting client   ... "
     nohup /cfs/bin/cfs-client -c /cfs/conf/client.json >/cfs/log/cfs.out 2>&1 &
-    sleep 10
+    sleep 5
     res=$( stat $MntPoint | grep -q "Inode: 1" ; echo $? )
     if [[ $res -ne 0 ]] ; then
         echo -e "\033[31mfail\033[0m"
@@ -269,6 +271,6 @@ add_data_partitions ; sleep 3
 show_cluster_info
 start_client ; sleep 2
 run_ltptest
-run_s3_test
+#run_s3_test
 stop_client
 delete_volume
diff --git a/docker/script/start_client.sh b/docker/script/start_client.sh
index beab6dfd6d..3ca606d50c 100755
--- a/docker/script/start_client.sh
+++ b/docker/script/start_client.sh
@@ -77,7 +77,7 @@ ensure_node_writable() {
     for i in $(seq 1 300) ; do
         ${cli} ${node} list &> /tmp/cli_${node}_list;
         res=`cat /tmp/cli_${node}_list | grep "Yes" | grep "Active" | wc -l`
-        if [[ ${res} -eq 4 ]]; then
+        if [[ ${res} -ge 3 ]]; then
             echo -e "\033[32mdone\033[0m"
             return
         fi
@@ -96,7 +96,7 @@ create_volume() {
         echo -e "\033[32mdone\033[0m"
         return
     fi
-    ${cli} volume create ${VolName} ${Owner} --capacity=30 -y > /dev/null
+    ${cli} volume create ${VolName} ${Owner} --zonename=zone-01 --capacity=30 -y > /dev/null
     if [[ $? -ne 0 ]]; then
         echo -e "\033[31mfail\033[0m"
         exit 1
diff --git a/master/api_service.go b/master/api_service.go
index ba0c0e4551..a6561f5a2c 100644
--- a/master/api_service.go
+++ b/master/api_service.go
@@ -202,19 +202,23 @@ func (m *Server) clusterStat(w http.ResponseWriter, r *http.Request) {
 
 func (m *Server) getCluster(w http.ResponseWriter, r *http.Request) {
 	cv := &proto.ClusterView{
-		Name:                m.cluster.Name,
-		LeaderAddr:          m.leaderInfo.addr,
-		DisableAutoAlloc:    m.cluster.DisableAutoAllocate,
-		MetaNodeThreshold:   m.cluster.cfg.MetaNodeThreshold,
-		Applied:             m.fsm.applied,
-		MaxDataPartitionID:  m.cluster.idAlloc.dataPartitionID,
-		MaxMetaNodeID:       m.cluster.idAlloc.commonID,
-		MaxMetaPartitionID:  m.cluster.idAlloc.metaPartitionID,
-		MetaNodes:           make([]proto.NodeView, 0),
-		DataNodes:           make([]proto.NodeView, 0),
-		VolStatInfo:         make([]*proto.VolStatInfo, 0),
-		BadPartitionIDs:     make([]proto.BadPartitionView, 0),
-		BadMetaPartitionIDs: make([]proto.BadPartitionView, 0),
+		Name:                   m.cluster.Name,
+		LeaderAddr:             m.leaderInfo.addr,
+		DisableAutoAlloc:       m.cluster.DisableAutoAllocate,
+		MetaNodeThreshold:      m.cluster.cfg.MetaNodeThreshold,
+		DpRecoverPool:          m.cluster.cfg.DataPartitionsRecoverPoolSize,
+		MpRecoverPool:          m.cluster.cfg.MetaPartitionsRecoverPoolSize,
+		Applied:                m.fsm.applied,
+		MaxDataPartitionID:     m.cluster.idAlloc.dataPartitionID,
+		MaxMetaNodeID:          m.cluster.idAlloc.commonID,
+		MaxMetaPartitionID:     m.cluster.idAlloc.metaPartitionID,
+		MetaNodes:              make([]proto.NodeView, 0),
+		DataNodes:              make([]proto.NodeView, 0),
+		VolStatInfo:            make([]*proto.VolStatInfo, 0),
+		BadPartitionIDs:        make([]proto.BadPartitionView, 0),
+		BadMetaPartitionIDs:    make([]proto.BadPartitionView, 0),
+		MigratedDataPartitions: make([]proto.BadPartitionView, 0),
+		MigratedMetaPartitions: make([]proto.BadPartitionView, 0),
 	}
 
 	vols := m.cluster.allVolNames()
@@ -230,8 +234,35 @@ func (m *Server) getCluster(w http.ResponseWriter, r *http.Request) {
 		}
 		cv.VolStatInfo = append(cv.VolStatInfo, stat.(*volStatInfo))
 	}
-	cv.BadPartitionIDs = m.cluster.getBadDataPartitionsView()
-	cv.BadMetaPartitionIDs = m.cluster.getBadMetaPartitionsView()
+
+	m.cluster.BadDataPartitionIds.Range(func(key, value interface{}) bool {
+		badDataPartitionIds := value.([]uint64)
+		path := key.(string)
+		bpv := badPartitionView{Path: path, PartitionIDs: badDataPartitionIds}
+		cv.BadPartitionIDs = append(cv.BadPartitionIDs, bpv)
+		return true
+	})
+	m.cluster.BadMetaPartitionIds.Range(func(key, value interface{}) bool {
+		badPartitionIds := value.([]uint64)
+		path := key.(string)
+		bpv := badPartitionView{Path: path, PartitionIDs: badPartitionIds}
+		cv.BadMetaPartitionIDs = append(cv.BadMetaPartitionIDs, bpv)
+		return true
+	})
+	m.cluster.MigratedDataPartitionIds.Range(func(key, value interface{}) bool {
+		badPartitionIds := value.([]uint64)
+		path := key.(string)
+		bpv := badPartitionView{Path: path, PartitionIDs: badPartitionIds}
+		cv.MigratedDataPartitions = append(cv.MigratedDataPartitions, bpv)
+		return true
+	})
+	m.cluster.MigratedMetaPartitionIds.Range(func(key, value interface{}) bool {
+		badPartitionIds := value.([]uint64)
+		path := key.(string)
+		bpv := badPartitionView{Path: path, PartitionIDs: badPartitionIds}
+		cv.MigratedMetaPartitions = append(cv.MigratedMetaPartitions, bpv)
+		return true
+	})
 
 	sendOkReply(w, r, newSuccessHTTPReply(cv))
 }
@@ -392,6 +423,47 @@ func (m *Server) addDataReplica(w http.ResponseWriter, r *http.Request) {
 	sendOkReply(w, r, newSuccessHTTPReply(msg))
 }
 
+func (m *Server) resetDataPartitionHosts(w http.ResponseWriter, r *http.Request) {
+	var (
+		msg         string
+		addr        string
+		dp          *DataPartition
+		partitionID uint64
+		err         error
+	)
+	if partitionID, addr, err = parseRequestToRemoveDataReplica(r); err != nil {
+		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
+		return
+	}
+
+	if dp, err = m.cluster.getDataPartitionByID(partitionID); err != nil {
+		sendErrReply(w, r, newErrHTTPReply(proto.ErrDataPartitionNotExists))
+		return
+	}
+
+	hosts := make([]string, 0)
+	peers := make([]proto.Peer, 0)
+
+	for _, host := range dp.Hosts {
+		if host == addr {
+			continue
+		}
+		hosts = append(hosts, host)
+	}
+	for _, peer := range dp.Peers {
+		if peer.Addr == addr {
+			continue
+		}
+		peers = append(peers, peer)
+	}
+
+	if err = dp.update("resetDataPartitionHosts", dp.VolName, peers, hosts, m.cluster); err != nil {
+		return
+	}
+	msg = fmt.Sprintf("data partitionID :%v  reset hosts [%v] successfully", partitionID, addr)
+	sendOkReply(w, r, newSuccessHTTPReply(msg))
+}
+
 func (m *Server) deleteDataReplica(w http.ResponseWriter, r *http.Request) {
 	var (
 		msg         string
@@ -410,8 +482,9 @@ func (m *Server) deleteDataReplica(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrDataPartitionNotExists))
 		return
 	}
-
-	if err = m.cluster.removeDataReplica(dp, addr, true); err != nil {
+	dp.offlineMutex.Lock()
+	defer dp.offlineMutex.Unlock()
+	if err = m.cluster.removeDataReplica(dp, addr, true, false); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -466,8 +539,9 @@ func (m *Server) deleteMetaReplica(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrMetaPartitionNotExists))
 		return
 	}
-
-	if err = m.cluster.deleteMetaReplica(mp, addr, true); err != nil {
+	mp.offlineMutex.Lock()
+	defer mp.offlineMutex.Unlock()
+	if err = m.cluster.deleteMetaReplica(mp, addr, true, false); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -494,7 +568,7 @@ func (m *Server) decommissionDataPartition(w http.ResponseWriter, r *http.Reques
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrDataPartitionNotExists))
 		return
 	}
-	if err = m.cluster.decommissionDataPartition(addr, dp, handleDataPartitionOfflineErr); err != nil {
+	if err = m.cluster.decommissionDataPartition(addr, dp, getTargetAddressForDataPartitionDecommission, handleDataPartitionOfflineErr, "", false); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -502,6 +576,63 @@ func (m *Server) decommissionDataPartition(w http.ResponseWriter, r *http.Reques
 	sendOkReply(w, r, newSuccessHTTPReply(rstMsg))
 }
 
+func (m *Server) setNodeToOfflineState(w http.ResponseWriter, r *http.Request) {
+	var (
+		err      error
+		startID  uint64
+		endID    uint64
+		nodeType string
+		zoneName string
+		state    bool
+	)
+	if startID, endID, nodeType, zoneName, state, err = parseRequestToSetNodeToOfflineState(r); err != nil {
+		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
+		return
+	}
+
+	if nodeType == nodeTypeAll {
+		m.cluster.setDataNodeToOfflineState(startID, endID, state, zoneName)
+		m.cluster.setMetaNodeToOfflineState(startID, endID, state, zoneName)
+	} else {
+		if nodeType == nodeTypeDataNode {
+			m.cluster.setDataNodeToOfflineState(startID, endID, state, zoneName)
+		} else {
+			m.cluster.setMetaNodeToOfflineState(startID, endID, state, zoneName)
+		}
+	}
+	sendOkReply(w, r, newSuccessHTTPReply("success"))
+}
+
+func parseRequestToSetNodeToOfflineState(r *http.Request) (startID, endID uint64, nodeType, zoneName string, state bool, err error) {
+	var value string
+	if value = r.FormValue(startKey); value == "" {
+		err = keyNotFound(startKey)
+		return
+	}
+	startID, err = strconv.ParseUint(value, 10, 64)
+	if err != nil {
+		return
+	}
+	if value = r.FormValue(endKey); value == "" {
+		err = keyNotFound(endKey)
+		return
+	}
+	endID, err = strconv.ParseUint(value, 10, 64)
+	if err != nil {
+		return
+	}
+	nodeType = r.FormValue(nodeTypeKey)
+	if !(nodeType == nodeTypeDataNode || nodeType == nodeTypeMetaNode || nodeType == nodeTypeAll) {
+		err = fmt.Errorf("nodeType must be dataNode or metaNode or all")
+		return
+	}
+	if zoneName, err = extractZoneName(r); err != nil {
+		return
+	}
+	state, err = strconv.ParseBool(r.FormValue(stateKey))
+	return
+}
+
 func (m *Server) diagnoseDataPartition(w http.ResponseWriter, r *http.Request) {
 	var (
 		err               error
@@ -569,23 +700,26 @@ func (m *Server) markDeleteVol(w http.ResponseWriter, r *http.Request) {
 
 func (m *Server) updateVol(w http.ResponseWriter, r *http.Request) {
 	var (
-		name           string
-		authKey        string
-		err            error
-		msg            string
-		capacity       uint64
-		replicaNum     int
-		followerRead   bool
-		authenticate   bool
-		enableToken    bool
-		zoneName       string
-		description    string
-		dpSelectorName string
-		dpSelectorParm string
-		vol            *Vol
+		name         string
+		authKey      string
+		err          error
+		msg          string
+		capacity     int
+		replicaNum   int
+		followerRead bool
+		authenticate bool
+		enableToken  bool
+		autoRepair   bool
+		zoneName     string
+		description  string
+		vol          *Vol
 	)
-
-	if name, authKey, description, err = parseRequestToUpdateVol(r); err != nil {
+	if name, authKey, replicaNum, err = parseRequestToUpdateVol(r); err != nil {
+		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
+		return
+	}
+	if replicaNum != 0 && !(replicaNum == 2 || replicaNum == 3) {
+		err = fmt.Errorf("replicaNum can only be 2 and 3,received replicaNum is[%v]", replicaNum)
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
@@ -593,34 +727,18 @@ func (m *Server) updateVol(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeVolNotExists, Msg: err.Error()})
 		return
 	}
-	if zoneName, capacity, replicaNum, enableToken, dpSelectorName, dpSelectorParm, err =
-		parseDefaultInfoToUpdateVol(r, vol); err != nil {
+	if zoneName, capacity, description, err = parseDefaultInfoToUpdateVol(r, vol); err != nil {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
-	if replicaNum != 0 && !(replicaNum == 2 || replicaNum == 3) {
-		err = fmt.Errorf("replicaNum can only be 2 and 3,received replicaNum is[%v]", replicaNum)
-		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
-		return
+	if replicaNum == 0 {
+		replicaNum = int(vol.dpReplicaNum)
 	}
-
-	if followerRead, authenticate, err = parseBoolFieldToUpdateVol(r, vol); err != nil {
+	if followerRead, authenticate, enableToken, autoRepair, err = parseBoolFieldToUpdateVol(r, vol); err != nil {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
-
-	newArgs := getVolVarargs(vol)
-
-	newArgs.zoneName = zoneName
-	newArgs.description = description
-	newArgs.capacity = capacity
-	newArgs.followerRead = followerRead
-	newArgs.authenticate = authenticate
-	newArgs.enableToken = enableToken
-	newArgs.dpSelectorName = dpSelectorName
-	newArgs.dpSelectorParm = dpSelectorParm
-
-	if err = m.cluster.updateVol(name, authKey, newArgs); err != nil {
+	if err = m.cluster.updateVol(name, authKey, zoneName, description, uint64(capacity), uint8(replicaNum), followerRead, authenticate, enableToken, autoRepair); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -651,14 +769,12 @@ func (m *Server) volExpand(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	newArgs := getVolVarargs(vol)
-	newArgs.capacity = uint64(capacity)
-
-	if err = m.cluster.updateVol(name, authKey, newArgs); err != nil {
+	if err = m.cluster.updateVol(name, authKey, vol.zoneName,vol.description,uint64(capacity),
+		vol.dpReplicaNum,vol.FollowerRead,vol.authenticate,vol.enableToken,vol.autoRepair); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
-	msg = fmt.Sprintf("update vol[%v] successfully\n", name)
+	msg = fmt.Sprintf("expand vol[%v] successfully\n", name)
 	sendOkReply(w, r, newSuccessHTTPReply(msg))
 }
 
@@ -684,18 +800,14 @@ func (m *Server) volShrink(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
-
-	newArgs := getVolVarargs(vol)
-	newArgs.capacity = uint64(capacity)
-
-	if err = m.cluster.updateVol(name, authKey, newArgs); err != nil {
+	if err = m.cluster.updateVol(name, authKey, vol.zoneName,vol.description,uint64(capacity),
+		vol.dpReplicaNum,vol.FollowerRead,vol.authenticate,vol.enableToken,vol.autoRepair); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
-	msg = fmt.Sprintf("update vol[%v] successfully\n", name)
+	msg = fmt.Sprintf("shrink vol[%v] successfully\n", name)
 	sendOkReply(w, r, newSuccessHTTPReply(msg))
 }
-
 func (m *Server) createVol(w http.ResponseWriter, r *http.Request) {
 	var (
 		name         string
@@ -709,13 +821,13 @@ func (m *Server) createVol(w http.ResponseWriter, r *http.Request) {
 		vol          *Vol
 		followerRead bool
 		authenticate bool
-		crossZone    bool
 		enableToken  bool
+		autoRepair   bool
 		zoneName     string
 		description  string
 	)
 
-	if name, owner, zoneName, description, mpCount, dpReplicaNum, size, capacity, followerRead, authenticate, crossZone, enableToken, err = parseRequestToCreateVol(r); err != nil {
+	if name, owner, zoneName, description, mpCount, dpReplicaNum, size, capacity, followerRead, authenticate, enableToken, autoRepair, err = parseRequestToCreateVol(r); err != nil {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
@@ -724,7 +836,7 @@ func (m *Server) createVol(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
-	if vol, err = m.cluster.createVol(name, owner, zoneName, description, mpCount, dpReplicaNum, size, capacity, followerRead, authenticate, crossZone, enableToken); err != nil {
+	if vol, err = m.cluster.createVol(name, owner, zoneName, description, mpCount, dpReplicaNum, size, capacity, followerRead, authenticate, enableToken, autoRepair); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -781,8 +893,9 @@ func newSimpleView(vol *Vol) *proto.SimpleVolView {
 		FollowerRead:       vol.FollowerRead,
 		NeedToLowerReplica: vol.NeedToLowerReplica,
 		Authenticate:       vol.authenticate,
-		CrossZone:          vol.crossZone,
 		EnableToken:        vol.enableToken,
+		CrossZone:          vol.crossZone,
+		AutoRepair:         vol.autoRepair,
 		Tokens:             vol.tokens,
 		RwDpCnt:            vol.dataPartitions.readableAndWritableCnt,
 		MpCnt:              len(vol.MetaPartitions),
@@ -847,6 +960,8 @@ func (m *Server) getDataNode(w http.ResponseWriter, r *http.Request) {
 		NodeSetID:                 dataNode.NodeSetID,
 		PersistenceDataPartitions: dataNode.PersistenceDataPartitions,
 		BadDisks:                  dataNode.BadDisks,
+		ToBeOffline:               dataNode.ToBeOffline,
+		ToBeMigrated:              dataNode.ToBeMigrated,
 	}
 
 	sendOkReply(w, r, newSuccessHTTPReply(dataNodeInfo))
@@ -855,13 +970,15 @@ func (m *Server) getDataNode(w http.ResponseWriter, r *http.Request) {
 // Decommission a data node. This will decommission all the data partition on that node.
 func (m *Server) decommissionDataNode(w http.ResponseWriter, r *http.Request) {
 	var (
-		node        *DataNode
-		rstMsg      string
-		offLineAddr string
-		err         error
+		node         *DataNode
+		rstMsg       string
+		offLineAddr  string
+		destZoneName string
+		strictFlag   bool
+		err          error
 	)
 
-	if offLineAddr, err = parseAndExtractNodeAddr(r); err != nil {
+	if offLineAddr, destZoneName, err = parseRequestForDecommissionDataNode(r); err != nil {
 		sendErrReply(w, r, &proto.HTTPReply{Code: proto.ErrCodeParamError, Msg: err.Error()})
 		return
 	}
@@ -870,7 +987,13 @@ func (m *Server) decommissionDataNode(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrDataNodeNotExists))
 		return
 	}
-	if err = m.cluster.decommissionDataNode(node); err != nil {
+
+	if strictFlag, err = extractStrictFlag(r); err != nil {
+		sendErrReply(w, r, newErrHTTPReply(err))
+		return
+	}
+
+	if err = m.cluster.decommissionDataNode(node, destZoneName, strictFlag); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -922,6 +1045,23 @@ func (m *Server) setNodeInfoHandler(w http.ResponseWriter, r *http.Request) {
 			}
 		}
 	}
+	if val, ok := params[dpRecoverPoolSizeKey]; ok {
+		if v, ok := val.(int64); ok {
+			if err = m.cluster.setDpRecoverPoolSize(int32(v)); err != nil {
+				sendErrReply(w, r, newErrHTTPReply(err))
+				return
+			}
+		}
+	}
+	if val, ok := params[mpRecoverPoolSizeKey]; ok {
+		if v, ok := val.(int64); ok {
+			if err = m.cluster.setMpRecoverPoolSize(int32(v)); err != nil {
+				sendErrReply(w, r, newErrHTTPReply(err))
+				return
+			}
+		}
+	}
+
 	sendOkReply(w, r, newSuccessHTTPReply(fmt.Sprintf("set nodeinfo params %v successfully", params)))
 
 }
@@ -1115,6 +1255,8 @@ func (m *Server) getMetaNode(w http.ResponseWriter, r *http.Request) {
 		MetaPartitionCount:        metaNode.MetaPartitionCount,
 		NodeSetID:                 metaNode.NodeSetID,
 		PersistenceMetaPartitions: metaNode.PersistenceMetaPartitions,
+		ToBeOffline:               metaNode.ToBeOffline,
+		ToBeMigrated:              metaNode.ToBeMigrated,
 	}
 	sendOkReply(w, r, newSuccessHTTPReply(metaNodeInfo))
 }
@@ -1135,7 +1277,7 @@ func (m *Server) decommissionMetaPartition(w http.ResponseWriter, r *http.Reques
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrMetaPartitionNotExists))
 		return
 	}
-	if err = m.cluster.decommissionMetaPartition(nodeAddr, mp); err != nil {
+	if err = m.cluster.decommissionMetaPartition(nodeAddr, mp, getTargetAddressForMetaPartitionDecommission, false); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -1171,6 +1313,7 @@ func (m *Server) decommissionMetaNode(w http.ResponseWriter, r *http.Request) {
 		metaNode    *MetaNode
 		rstMsg      string
 		offLineAddr string
+		strictFlag  bool
 		err         error
 	)
 
@@ -1183,7 +1326,13 @@ func (m *Server) decommissionMetaNode(w http.ResponseWriter, r *http.Request) {
 		sendErrReply(w, r, newErrHTTPReply(proto.ErrMetaNodeNotExists))
 		return
 	}
-	if err = m.cluster.decommissionMetaNode(metaNode); err != nil {
+
+	if strictFlag, err = extractStrictFlag(r); err != nil {
+		sendErrReply(w, r, newErrHTTPReply(err))
+		return
+	}
+
+	if err = m.cluster.decommissionMetaNode(metaNode, strictFlag); err != nil {
 		sendErrReply(w, r, newErrHTTPReply(err))
 		return
 	}
@@ -1288,6 +1437,17 @@ func parseRequestForAddNode(r *http.Request) (nodeAddr, zoneName string, err err
 	return
 }
 
+func parseRequestForDecommissionDataNode(r *http.Request) (nodeAddr, zoneName string, err error) {
+	if err = r.ParseForm(); err != nil {
+		return
+	}
+	if nodeAddr, err = extractNodeAddr(r); err != nil {
+		return
+	}
+	zoneName = r.FormValue(zoneNameKey)
+	return
+}
+
 func parseAndExtractNodeAddr(r *http.Request) (nodeAddr string, err error) {
 	if err = r.ParseForm(); err != nil {
 		return
@@ -1295,6 +1455,15 @@ func parseAndExtractNodeAddr(r *http.Request) (nodeAddr string, err error) {
 	return extractNodeAddr(r)
 }
 
+func extractStrictFlag(r *http.Request) (strict bool, err error) {
+	var strictStr string
+	if strictStr = r.FormValue(strictFlagKey); strictStr == "" {
+		strictStr = "false"
+		return
+	}
+	return strconv.ParseBool(strictStr)
+}
+
 func parseRequestToDecommissionNode(r *http.Request) (nodeAddr, diskPath string, err error) {
 	if err = r.ParseForm(); err != nil {
 		return
@@ -1380,7 +1549,7 @@ func parseRequestToDeleteVol(r *http.Request) (name, authKey string, err error)
 
 }
 
-func parseRequestToUpdateVol(r *http.Request) (name, authKey, description string, err error) {
+func parseRequestToUpdateVol(r *http.Request) (name, authKey string, replicaNum int, err error) {
 	if err = r.ParseForm(); err != nil {
 		return
 	}
@@ -1390,12 +1559,10 @@ func parseRequestToUpdateVol(r *http.Request) (name, authKey, description string
 	if authKey, err = extractAuthKey(r); err != nil {
 		return
 	}
-	description = r.FormValue(descriptionKey)
 	return
 }
 
-func parseDefaultInfoToUpdateVol(r *http.Request, vol *Vol) (zoneName string, capacity uint64, replicaNum int,
-	enableToken bool, dpSelectorName string, dpSelectorParm string, err error) {
+func parseDefaultInfoToUpdateVol(r *http.Request, vol *Vol) (zoneName string, capacity int, description string, err error) {
 	if err = r.ParseForm(); err != nil {
 		return
 	}
@@ -1403,45 +1570,20 @@ func parseDefaultInfoToUpdateVol(r *http.Request, vol *Vol) (zoneName string, ca
 		zoneName = vol.zoneName
 	}
 	if capacityStr := r.FormValue(volCapacityKey); capacityStr != "" {
-		var capacityInt int
-		if capacityInt, err = strconv.Atoi(capacityStr); err != nil {
+		if capacity, err = strconv.Atoi(capacityStr); err != nil {
 			err = unmatchedKey(volCapacityKey)
 			return
 		}
-		capacity = uint64(capacityInt)
-	} else {
-		capacity = vol.Capacity
-	}
-	if replicaNumStr := r.FormValue(replicaNumKey); replicaNumStr != "" {
-		if replicaNum, err = strconv.Atoi(replicaNumStr); err != nil {
-			err = unmatchedKey(replicaNumKey)
-			return
-		}
-	} else {
-		replicaNum = int(vol.dpReplicaNum)
-	}
-	if enableTokenStr := r.FormValue(enableTokenKey); enableTokenStr != "" {
-		if enableToken, err = strconv.ParseBool(enableTokenStr); err != nil {
-			err = unmatchedKey(enableTokenKey)
-			return
-		}
 	} else {
-		enableToken = vol.enableToken
+		capacity = int(vol.Capacity)
 	}
-	dpSelectorName = r.FormValue(dpSelectorNameKey)
-	dpSelectorParm = r.FormValue(dpSelectorParmKey)
-	if (dpSelectorName == "") || (dpSelectorParm == "") {
-		if (dpSelectorName != "") || (dpSelectorParm != "") {
-			err = keyNotFound(dpSelectorNameKey + " or " + dpSelectorParmKey)
-			return
-		}
-		dpSelectorName = vol.dpSelectorName
-		dpSelectorParm = vol.dpSelectorParm
+	if description = r.FormValue(descriptionKey); description == "" {
+		description = vol.description
 	}
 	return
 }
 
-func parseBoolFieldToUpdateVol(r *http.Request, vol *Vol) (followerRead, authenticate bool, err error) {
+func parseBoolFieldToUpdateVol(r *http.Request, vol *Vol) (followerRead, authenticate, enableToken, autoRepair bool, err error) {
 	if followerReadStr := r.FormValue(followerReadKey); followerReadStr != "" {
 		if followerRead, err = strconv.ParseBool(followerReadStr); err != nil {
 			err = unmatchedKey(followerReadKey)
@@ -1458,6 +1600,22 @@ func parseBoolFieldToUpdateVol(r *http.Request, vol *Vol) (followerRead, authent
 	} else {
 		authenticate = vol.authenticate
 	}
+	if enableTokenStr := r.FormValue(enableTokenKey); enableTokenStr != "" {
+		if enableToken, err = strconv.ParseBool(enableTokenStr); err != nil {
+			err = unmatchedKey(enableTokenKey)
+			return
+		}
+	} else {
+		enableToken = vol.enableToken
+	}
+	if autoRepairStr := r.FormValue(autoRepairKey); autoRepairStr != "" {
+		if autoRepair, err = strconv.ParseBool(autoRepairStr); err != nil {
+			err = unmatchedKey(autoRepairKey)
+			return
+		}
+	} else {
+		autoRepair = vol.autoRepair
+	}
 	return
 }
 
@@ -1477,7 +1635,7 @@ func parseRequestToSetVolCapacity(r *http.Request) (name, authKey string, capaci
 	return
 }
 
-func parseRequestToCreateVol(r *http.Request) (name, owner, zoneName, description string, mpCount, dpReplicaNum, size, capacity int, followerRead, authenticate, crossZone, enableToken bool, err error) {
+func parseRequestToCreateVol(r *http.Request) (name, owner, zoneName, description string, mpCount, dpReplicaNum, size, capacity int, followerRead, authenticate, enableToken, autoRepair bool, err error) {
 	if err = r.ParseForm(); err != nil {
 		return
 	}
@@ -1519,11 +1677,12 @@ func parseRequestToCreateVol(r *http.Request) (name, owner, zoneName, descriptio
 	if authenticate, err = extractAuthenticate(r); err != nil {
 		return
 	}
-
-	if crossZone, err = extractCrossZone(r); err != nil {
+	if autoRepair, err = extractAutoRepair(r); err != nil {
 		return
 	}
-	zoneName = r.FormValue(zoneNameKey)
+	if zoneName = r.FormValue(zoneNameKey); zoneName == "" {
+		zoneName = DefaultZoneName
+	}
 	enableToken = extractEnableToken(r)
 	description = r.FormValue(descriptionKey)
 	return
@@ -1713,13 +1872,13 @@ func extractAuthenticate(r *http.Request) (authenticate bool, err error) {
 	return
 }
 
-func extractCrossZone(r *http.Request) (crossZone bool, err error) {
+func extractAutoRepair(r *http.Request) (autoRepair bool, err error) {
 	var value string
-	if value = r.FormValue(crossZoneKey); value == "" {
-		crossZone = false
+	if value = r.FormValue(autoRepairKey); value == "" {
+		autoRepair = false
 		return
 	}
-	if crossZone, err = strconv.ParseBool(value); err != nil {
+	if autoRepair, err = strconv.ParseBool(value); err != nil {
 		return
 	}
 	return
@@ -1744,28 +1903,45 @@ func parseAndExtractSetNodeInfoParams(r *http.Request) (params map[string]interf
 	if err = r.ParseForm(); err != nil {
 		return
 	}
-	var value string
 	noParams := true
 	params = make(map[string]interface{})
-	if value = r.FormValue(nodeDeleteBatchCountKey); value != "" {
-		noParams = false
-		var batchCount = uint64(0)
-		batchCount, err = strconv.ParseUint(value, 10, 64)
-		if err != nil {
-			err = unmatchedKey(nodeDeleteBatchCountKey)
-			return
-		}
-		params[nodeDeleteBatchCountKey] = batchCount
+	if noParams, err = parseNodeInfoKey(params, nodeDeleteBatchCountKey, noParams, r); err != nil {
+		return
+	}
+
+	if noParams, err = parseNodeInfoKey(params, nodeMarkDeleteRateKey, noParams, r); err != nil {
+		return
+	}
+	if noParams, err = parseNodeInfoKey(params, nodeDeleteWorkerSleepMs, noParams, r); err != nil {
+		return
+	}
+	if noParams, err = parseNodeInfoIntKey(params, dpRecoverPoolSizeKey, noParams, r); err != nil {
+		return
+	}
+	if noParams, err = parseNodeInfoIntKey(params, mpRecoverPoolSizeKey, noParams, r); err != nil {
+		return
 	}
-	if value = r.FormValue(nodeMarkDeleteRateKey); value != "" {
+	if noParams {
+		err = keyNotFound(nodeDeleteBatchCountKey)
+		return
+	}
+	return
+}
+
+func parseNodeInfoKey(params map[string]interface{}, key string, noParams bool, r *http.Request) (noPara bool, err error) {
+	var value string
+	defer func() {
+		noPara = noParams
+	}()
+	if value = r.FormValue(key); value != "" {
 		noParams = false
 		var val = uint64(0)
 		val, err = strconv.ParseUint(value, 10, 64)
 		if err != nil {
-			err = unmatchedKey(nodeMarkDeleteRateKey)
+			err = unmatchedKey(key)
 			return
 		}
-		params[nodeMarkDeleteRateKey] = val
+		params[key] = val
 	}
 
 	if value = r.FormValue(nodeAutoRepairRateKey); value != "" {
@@ -1781,17 +1957,31 @@ func parseAndExtractSetNodeInfoParams(r *http.Request) (params map[string]interf
 
 	if value = r.FormValue(nodeDeleteWorkerSleepMs); value != "" {
 		noParams = false
-		var val = uint64(0)
-		val, err = strconv.ParseUint(value, 10, 64)
+		var val = int64(0)
+		val, err = strconv.ParseInt(value, 10, 64)
 		if err != nil {
-			err = unmatchedKey(nodeMarkDeleteRateKey)
+			err = unmatchedKey(key)
 			return
 		}
-		params[nodeDeleteWorkerSleepMs] = val
+		params[key] = val
 	}
-	if noParams {
-		err = keyNotFound(nodeDeleteBatchCountKey)
-		return
+	return
+}
+
+func parseNodeInfoIntKey(params map[string]interface{}, key string, noParams bool, r *http.Request) (noPara bool, err error) {
+	var value string
+	defer func() {
+		noPara = noParams
+	}()
+	if value = r.FormValue(key); value != "" {
+		noParams = false
+		var val = int64(0)
+		val, err = strconv.ParseInt(value, 10, 64)
+		if err != nil {
+			err = unmatchedKey(key)
+			return
+		}
+		params[key] = val
 	}
 	return
 }
@@ -2052,10 +2242,12 @@ func (m *Server) getMetaPartition(w http.ResponseWriter, r *http.Request) {
 		}
 		for i := 0; i < len(replicas); i++ {
 			replicas[i] = &proto.MetaReplicaInfo{
-				Addr:       mp.Replicas[i].Addr,
-				ReportTime: mp.Replicas[i].ReportTime,
-				Status:     mp.Replicas[i].Status,
-				IsLeader:   mp.Replicas[i].IsLeader,
+				Addr:        mp.Replicas[i].Addr,
+				ReportTime:  mp.Replicas[i].ReportTime,
+				Status:      mp.Replicas[i].Status,
+				IsLeader:    mp.Replicas[i].IsLeader,
+				DentryCount: mp.Replicas[i].DentryCount,
+				InodeCount:  mp.Replicas[i].InodeCount,
 			}
 		}
 		var mpInfo = &proto.MetaPartitionInfo{
@@ -2155,6 +2347,14 @@ func parseAndExtractName(r *http.Request) (name string, err error) {
 	return extractName(r)
 }
 
+func extractZoneName(r *http.Request) (name string, err error) {
+	if name = r.FormValue(zoneNameKey); name == "" {
+		err = keyNotFound(zoneNameKey)
+		return
+	}
+	return
+}
+
 func extractName(r *http.Request) (name string, err error) {
 	if name = r.FormValue(nameKey); name == "" {
 		err = keyNotFound(nameKey)
diff --git a/master/api_service_test.go b/master/api_service_test.go
index 8ed93c1900..0c47729113 100644
--- a/master/api_service_test.go
+++ b/master/api_service_test.go
@@ -43,6 +43,10 @@ const (
 	mds3Addr          = "127.0.0.1:9103"
 	mds4Addr          = "127.0.0.1:9104"
 	mds5Addr          = "127.0.0.1:9105"
+	mds6Addr          = "127.0.0.1:9106"
+	mds7Addr          = "127.0.0.1:9107"
+	mds8Addr          = "127.0.0.1:9108"
+	mds9Addr          = "127.0.0.1:9109"
 
 	mms1Addr      = "127.0.0.1:8101"
 	mms2Addr      = "127.0.0.1:8102"
@@ -50,9 +54,13 @@ const (
 	mms4Addr      = "127.0.0.1:8104"
 	mms5Addr      = "127.0.0.1:8105"
 	mms6Addr      = "127.0.0.1:8106"
+	mms7Addr      = "127.0.0.1:8107"
+	mms8Addr      = "127.0.0.1:8108"
+	mms9Addr      = "127.0.0.1:8109"
 	commonVolName = "commonVol"
 	testZone1     = "zone1"
 	testZone2     = "zone2"
+	testZone3     = "zone3"
 
 	testUserID  = "testUser"
 	ak          = "0123456789123456"
@@ -85,6 +93,7 @@ func createDefaultMasterServerForTest() *Server {
 	if err != nil {
 		panic(err)
 	}
+	testServer.config.nodeSetCapacity = defaultNodeSetCapacity
 	//add data node
 	addDataServer(mds1Addr, testZone1)
 	addDataServer(mds2Addr, testZone1)
@@ -98,6 +107,9 @@ func createDefaultMasterServerForTest() *Server {
 	addMetaServer(mms4Addr, testZone2)
 	addMetaServer(mms5Addr, testZone2)
 	time.Sleep(5 * time.Second)
+	testServer.cluster.cfg = newClusterConfig()
+	testServer.cluster.cfg.DataPartitionsRecoverPoolSize = maxDataPartitionsRecoverPoolSize
+	testServer.cluster.cfg.MetaPartitionsRecoverPoolSize = maxMetaPartitionsRecoverPoolSize
 	testServer.cluster.checkDataNodeHeartbeat()
 	testServer.cluster.checkMetaNodeHeartbeat()
 	time.Sleep(5 * time.Second)
@@ -180,14 +192,28 @@ func createMasterServer(cfgJSON string) (server *Server, err error) {
 	return server, nil
 }
 
-func addDataServer(addr, zoneName string) {
-	mds := mocktest.NewMockDataServer(addr, zoneName)
+func addDataServer(addr, zoneName string) (mds *mocktest.MockDataServer) {
+	mds = mocktest.NewMockDataServer(addr, zoneName)
 	mds.Start()
+	return mds
 }
 
-func addMetaServer(addr, zoneName string) {
-	mms := mocktest.NewMockMetaServer(addr, zoneName)
+func stopDataServer(mds *mocktest.MockDataServer) {
+	dn, _ := server.cluster.dataNode(mds.TcpAddr)
+	server.cluster.delDataNodeFromCache(dn)
+	mds.Stop()
+}
+
+func addMetaServer(addr, zoneName string) (mms *mocktest.MockMetaServer) {
+	mms = mocktest.NewMockMetaServer(addr, zoneName)
 	mms.Start()
+	return mms
+}
+
+func stopMetaServer(mms *mocktest.MockMetaServer) {
+	mn, _ := server.cluster.metaNode(mms.TcpAddr)
+	server.cluster.deleteMetaNodeFromCache(mn)
+	mms.Stop()
 }
 
 func TestSetMetaNodeThreshold(t *testing.T) {
@@ -294,7 +320,7 @@ func decommissionDisk(addr, path string, t *testing.T) {
 
 func TestMarkDeleteVol(t *testing.T) {
 	name := "delVol"
-	createVol(name, t)
+	createVol(name, testZone2, t)
 	reqURL := fmt.Sprintf("%v%v?name=%v&authKey=%v", hostAddr, proto.AdminDeleteVol, name, buildAuthKey("cfs"))
 	process(reqURL, t)
 	userInfo, err := server.user.getUserInfo("cfs")
@@ -472,7 +498,7 @@ func TestGetMetaNode(t *testing.T) {
 
 func TestAddDataReplica(t *testing.T) {
 	partition := commonVol.dataPartitions.partitions[0]
-	dsAddr := "127.0.0.1:9106"
+	dsAddr := mds6Addr
 	addDataServer(dsAddr, "zone2")
 	reqURL := fmt.Sprintf("%v%v?id=%v&addr=%v", hostAddr, proto.AdminAddDataReplica, partition.PartitionID, dsAddr)
 	process(reqURL, t)
@@ -499,7 +525,7 @@ func TestAddDataReplica(t *testing.T) {
 func TestRemoveDataReplica(t *testing.T) {
 	partition := commonVol.dataPartitions.partitions[0]
 	partition.isRecover = false
-	dsAddr := "127.0.0.1:9106"
+	dsAddr := mds6Addr
 	reqURL := fmt.Sprintf("%v%v?id=%v&addr=%v", hostAddr, proto.AdminDeleteDataReplica, partition.PartitionID, dsAddr)
 	process(reqURL, t)
 	partition.RLock()
@@ -519,7 +545,7 @@ func TestAddMetaReplica(t *testing.T) {
 		t.Error("no meta partition")
 		return
 	}
-	msAddr := "127.0.0.1:8009"
+	msAddr := mms9Addr
 	addMetaServer(msAddr, testZone2)
 	server.cluster.checkMetaNodeHeartbeat()
 	time.Sleep(2 * time.Second)
@@ -542,7 +568,7 @@ func TestRemoveMetaReplica(t *testing.T) {
 		return
 	}
 	partition.IsRecover = false
-	msAddr := "127.0.0.1:8009"
+	msAddr := mms9Addr
 	reqURL := fmt.Sprintf("%v%v?id=%v&addr=%v", hostAddr, proto.AdminDeleteMetaReplica, partition.PartitionID, msAddr)
 	process(reqURL, t)
 	partition.RLock()
diff --git a/master/cluster.go b/master/cluster.go
index 98dc8801a0..daee6619f8 100644
--- a/master/cluster.go
+++ b/master/cluster.go
@@ -16,6 +16,8 @@ package master
 
 import (
 	"fmt"
+	"sort"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -49,13 +51,36 @@ type Cluster struct {
 	volStatInfo               sync.Map
 	BadDataPartitionIds       *sync.Map
 	BadMetaPartitionIds       *sync.Map
+	MigratedMetaPartitionIds  *sync.Map
+	MigratedDataPartitionIds  *sync.Map
 	DisableAutoAllocate       bool
 	fsm                       *MetadataFsm
 	partition                 raftstore.Partition
 	MasterSecretKey           []byte
 	lastMasterZoneForDataNode string
 	lastMasterZoneForMetaNode string
+	lastPermutationsForZone   uint8
+	dpRepairChan              chan *RepairTask
+	mpRepairChan              chan *RepairTask
 }
+type (
+	RepairType uint8
+)
+
+const (
+	BalanceMetaZone RepairType = iota
+	BalanceDataZone
+	RepairMetaDecommission
+	RepairDataDecommission
+	RepairAddReplica
+)
+
+type RepairTask struct {
+	RType       RepairType
+	Pid         uint64
+	OfflineAddr string
+}
+type ChooseDataHostFunc func(c *Cluster, offlineAddr string, dp *DataPartition, excludeNodeSets []uint64, zoneName string, destZoneName string) (oldAddr, newAddr string, err error)
 
 func newCluster(name string, leaderInfo *LeaderInfo, fsm *MetadataFsm, partition raftstore.Partition, cfg *clusterConfig) (c *Cluster) {
 	c = new(Cluster)
@@ -66,12 +91,16 @@ func newCluster(name string, leaderInfo *LeaderInfo, fsm *MetadataFsm, partition
 	c.t = newTopology()
 	c.BadDataPartitionIds = new(sync.Map)
 	c.BadMetaPartitionIds = new(sync.Map)
+	c.MigratedDataPartitionIds = new(sync.Map)
+	c.MigratedMetaPartitionIds = new(sync.Map)
 	c.dataNodeStatInfo = new(nodeStatInfo)
 	c.metaNodeStatInfo = new(nodeStatInfo)
 	c.zoneStatInfos = make(map[string]*proto.ZoneStat)
 	c.fsm = fsm
 	c.partition = partition
 	c.idAlloc = newIDAllocator(c.fsm.store, c.partition)
+	c.initDpRepairChan()
+	c.initMpRepairChan()
 	return
 }
 
@@ -88,6 +117,9 @@ func (c *Cluster) scheduleTask() {
 	c.scheduleToCheckMetaPartitionRecoveryProgress()
 	c.scheduleToLoadMetaPartitions()
 	c.scheduleToReduceReplicaNum()
+	c.scheduleToRepairMultiZoneMetaPartitions()
+	c.scheduleToRepairMultiZoneDataPartitions()
+
 }
 
 func (c *Cluster) masterAddr() (addr string) {
@@ -319,6 +351,196 @@ func (c *Cluster) checkVolReduceReplicaNum() {
 		vol.checkReplicaNum(c)
 	}
 }
+func (c *Cluster) repairDataPartition(wg sync.WaitGroup) {
+	for i := 0; i < cap(c.dpRepairChan); i++ {
+		select {
+		case task := <-c.dpRepairChan:
+			wg.Add(1)
+			go func(c *Cluster, task *RepairTask) {
+				var err error
+				defer func() {
+					wg.Done()
+					if err != nil {
+						log.LogErrorf("ClusterID[%v], Action[repairDataPartition], err[%v]", c.Name, err)
+					}
+				}()
+				var dp *DataPartition
+				if dp, err = c.getDataPartitionByID(task.Pid); err != nil {
+					return
+				}
+				switch task.RType {
+				case BalanceDataZone:
+					if err = c.decommissionDataPartition("", dp, getTargetAddressForBalanceDataPartitionZone, balanceDataPartitionZoneErr, "", false); err != nil {
+						return
+					}
+					Warn(c.Name, fmt.Sprintf("action[repairDataPartition] clusterID[%v] vol[%v] data partition[%v] "+
+						"Repair success, type[%v]", c.Name, dp.VolName, dp.PartitionID, task.RType))
+				default:
+					err = fmt.Errorf("action[repairDataPartition] unknown repair task type")
+					return
+				}
+			}(c, task)
+		default:
+			time.Sleep(time.Second * 2)
+		}
+	}
+}
+
+func (c *Cluster) repairMetaPartition(wg sync.WaitGroup) {
+	for i := 0; i < cap(c.mpRepairChan); i++ {
+		select {
+		case task := <-c.mpRepairChan:
+			wg.Add(1)
+			go func(c *Cluster, task *RepairTask) {
+				var err error
+				defer func() {
+					wg.Done()
+					if err != nil {
+						log.LogErrorf("ClusterID[%v], Action[repairMetaPartition], err[%v]", c.Name, err)
+					}
+				}()
+				var mp *MetaPartition
+				if mp, err = c.getMetaPartitionByID(task.Pid); err != nil {
+					return
+				}
+				switch task.RType {
+				case BalanceMetaZone:
+					if err = c.decommissionMetaPartition("", mp, getTargetAddressForRepairMetaZone, false); err != nil {
+						return
+					}
+					Warn(c.Name, fmt.Sprintf("action[repairMetaPartition] clusterID[%v] vol[%v] meta partition[%v] "+
+						"Repair success, task type[%v]", c.Name, mp.volName, mp.PartitionID, task.RType))
+				default:
+					err = fmt.Errorf("action[repairMetaPartition] unknown repair task type")
+					return
+				}
+			}(c, task)
+		default:
+			time.Sleep(time.Second * 2)
+		}
+	}
+}
+func (c *Cluster) dataPartitionInRecovering() (num int) {
+	c.BadDataPartitionIds.Range(func(key, value interface{}) bool {
+		badDataPartitionIds := value.([]uint64)
+		num = num + len(badDataPartitionIds)
+		return true
+	})
+
+	return
+}
+
+func (c *Cluster) metaPartitionInRecovering() (num int) {
+	c.BadMetaPartitionIds.Range(func(key, value interface{}) bool {
+		badMetaPartitionIds := value.([]uint64)
+		num = num + len(badMetaPartitionIds)
+		return true
+	})
+	return
+}
+func (c *Cluster) scheduleToRepairMultiZoneMetaPartitions() {
+	//consumer
+	go func() {
+		for {
+			var wg sync.WaitGroup
+			c.repairMetaPartition(wg)
+			wg.Wait()
+			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
+		}
+	}()
+	//producer
+	go func() {
+		for {
+			if c.partition != nil && c.partition.IsRaftLeader() && !c.t.isSingleZone() {
+				c.checkVolRepairMetaPartitions()
+			}
+			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
+		}
+	}()
+}
+
+func (c *Cluster) checkVolRepairMetaPartitions() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.LogWarnf("checkVolRepairMetaPartitions occurred panic,err[%v]", r)
+			WarnBySpecialKey(fmt.Sprintf("%v_%v_scheduling_job_panic", c.Name, ModuleName),
+				"checkVolRepairMetaPartitions occurred panic")
+		}
+	}()
+	var mpInRecover uint64
+	if c.DisableAutoAllocate || c.cfg.MetaPartitionsRecoverPoolSize == defaultRecoverPoolSize {
+		return
+	}
+	mpInRecover = uint64(c.metaPartitionInRecovering())
+	if int32(mpInRecover) > c.cfg.MetaPartitionsRecoverPoolSize {
+		log.LogInfof("action[checkVolRepairMetaPartitions] clusterID[%v]Recover pool is full, recover partition[%v], pool size[%v]", c.Name, mpInRecover, c.cfg.MetaPartitionsRecoverPoolSize)
+		return
+	}
+	vols := c.allVols()
+	for _, vol := range vols {
+		if !vol.autoRepair {
+			continue
+		}
+		if isValid, _ := c.isValidZone(vol.zoneName); !isValid {
+			log.LogWarnf("checkVolRepairMetaPartitions, vol[%v], zoneName[%v] not valid, skip repair", vol.Name, vol.zoneName)
+			continue
+		}
+		vol.checkRepairMetaPartitions(c)
+	}
+}
+
+func (c *Cluster) scheduleToRepairMultiZoneDataPartitions() {
+	//consumer
+	go func() {
+		for {
+			var wg sync.WaitGroup
+			c.repairDataPartition(wg)
+			wg.Wait()
+			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
+		}
+	}()
+	//producer
+	go func() {
+		for {
+			if c.partition != nil && c.partition.IsRaftLeader() && !c.t.isSingleZone() {
+				c.checkVolRepairDataPartitions()
+			}
+			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
+		}
+	}()
+}
+
+func (c *Cluster) checkVolRepairDataPartitions() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.LogWarnf("checkVolRepairDataPartitions occurred panic,err[%v]", r)
+			WarnBySpecialKey(fmt.Sprintf("%v_%v_scheduling_job_panic", c.Name, ModuleName),
+				"checkVolRepairDataPartitions occurred panic")
+		}
+	}()
+	var dpInRecover int
+	if c.DisableAutoAllocate || c.cfg.DataPartitionsRecoverPoolSize == defaultRecoverPoolSize {
+		return
+	}
+	dpInRecover = c.dataPartitionInRecovering()
+	if int32(dpInRecover) >= c.cfg.DataPartitionsRecoverPoolSize {
+		log.LogInfof("action[checkVolRepairDataPartitions] clusterID[%v] Recover pool is full, recover partition[%v], pool size[%v]", c.Name, dpInRecover, c.cfg.DataPartitionsRecoverPoolSize)
+		return
+	}
+
+	vols := c.allVols()
+	for _, vol := range vols {
+		if !vol.autoRepair {
+			continue
+		}
+		if isValid, _ := c.isValidZone(vol.zoneName); !isValid {
+			log.LogWarnf("checkVolRepairDataPartitions, vol[%v], zoneName[%v] not valid, skip repair", vol.Name, vol.zoneName)
+			continue
+		}
+		vol.checkRepairDataPartitions(c)
+	}
+}
+
 
 func (c *Cluster) getInvalidIDNodes() (nodes []*InvalidNodeView) {
 	metaNodes := c.getNotConsistentIDMetaNodes()
@@ -638,17 +860,11 @@ func (c *Cluster) markDeleteVol(name, authKey string) (err error) {
 }
 
 func (c *Cluster) batchCreateDataPartition(vol *Vol, reqCount int) (err error) {
-	var zoneNum int
 	for i := 0; i < reqCount; i++ {
 		if c.DisableAutoAllocate {
 			return
 		}
-		zoneNum = c.decideZoneNum(vol.crossZone)
-		//most of partitions are replicated across 3 zones,but a few partitions are replicated across 2 zones
-		if vol.crossZone && i%5 == 0 {
-			zoneNum = 2
-		}
-		if _, err = c.createDataPartition(vol.Name, zoneNum); err != nil {
+		if _, err = c.createDataPartition(vol.Name); err != nil {
 			log.LogErrorf("action[batchCreateDataPartition] after create [%v] data partition,occurred error,err[%v]", i, err)
 			break
 		}
@@ -662,7 +878,7 @@ func (c *Cluster) batchCreateDataPartition(vol *Vol, reqCount int) (err error) {
 // 3. Communicate with the data node to synchronously create a data partition.
 // - If succeeded, replicate the data through raft and persist it to RocksDB.
 // - Otherwise, throw errors
-func (c *Cluster) createDataPartition(volName string, zoneNum int) (dp *DataPartition, err error) {
+func (c *Cluster) createDataPartition(volName string) (dp *DataPartition, err error) {
 	var (
 		vol         *Vol
 		partitionID uint64
@@ -677,7 +893,7 @@ func (c *Cluster) createDataPartition(volName string, zoneNum int) (dp *DataPart
 	vol.createDpMutex.Lock()
 	defer vol.createDpMutex.Unlock()
 	errChannel := make(chan error, vol.dpReplicaNum)
-	if targetHosts, targetPeers, err = c.chooseTargetDataNodes("", nil, nil, int(vol.dpReplicaNum), zoneNum, vol.zoneName); err != nil {
+	if targetHosts, targetPeers, err = c.chooseTargetDataNodes("", nil, nil, int(vol.dpReplicaNum), vol.zoneName); err != nil {
 		goto errHandler
 	}
 	if partitionID, err = c.idAlloc.allocateDataPartitionID(); err != nil {
@@ -769,117 +985,236 @@ func (c *Cluster) syncCreateMetaPartitionToMetaNode(host string, mp *MetaPartiti
 	return
 }
 
-//decideZoneNum
-//if vol is not cross zone, return 1
-//if vol enable cross zone and the zone number of cluster less than defaultReplicaNum return 2
-//otherwise, return defaultReplicaNum
-func (c *Cluster) decideZoneNum(crossZone bool) (zoneNum int) {
-	if !crossZone {
-		return 1
+func (c *Cluster) isValidZone(zoneName string) (isValid bool, err error) {
+	isValid = true
+	if zoneName == "" {
+		isValid = false
+		return
 	}
-	zoneLen := c.t.zoneLen()
-	if zoneLen < defaultReplicaNum {
-		zoneNum = 2
-	} else {
-		zoneNum = defaultReplicaNum
+	zoneList := strings.Split(zoneName, ",")
+	for _, name := range zoneList {
+		if _, err = c.t.getZone(name); err != nil {
+			isValid = false
+			return
+		}
 	}
-	return zoneNum
+	return
 }
-func (c *Cluster) chooseTargetDataNodes(excludeZone string, excludeNodeSets []uint64, excludeHosts []string, replicaNum int, zoneNum int, specifiedZone string) (hosts []string, peers []proto.Peer, err error) {
 
-	var (
-		masterZone *Zone
-		zones      []*Zone
-	)
-	excludeZones := make([]string, 0)
-	if excludeZone != "" {
-		excludeZones = append(excludeZones, excludeZone)
+//valid zone name
+//if zone name duplicate, return error
+//if vol enable cross zone and the zone number of cluster less than defaultReplicaNum return error
+func (c *Cluster) validZone(zoneName string, replicaNum int) (err error) {
+	var crossZone bool
+	if zoneName == "" {
+		err = fmt.Errorf("zone name empty")
+		return
 	}
-	if replicaNum <= zoneNum {
-		zoneNum = replicaNum
+
+	zoneList := strings.Split(zoneName, ",")
+	sort.Strings(zoneList)
+	if len(zoneList) > 1 {
+		crossZone = true
 	}
-	// when creating vol,user specified a zone,we reset zoneNum to 1,to be created partition with specified zone,
-	//if specified zone is not writable,we choose a zone randomly
-	if specifiedZone != "" {
-		zoneNum = 1
-		zone, err := c.t.getZone(specifiedZone)
-		if err != nil {
-			Warn(c.Name, fmt.Sprintf("cluster[%v],specified zone[%v]is not writable", c.Name, specifiedZone))
-		} else {
-			zones = make([]*Zone, 0)
-			zones = append(zones, zone)
-		}
+	if crossZone && c.t.zoneLen() <= 1 {
+		return fmt.Errorf("cluster has one zone,can't cross zone")
 	}
-	if zones == nil || specifiedZone == "" {
-		if zones, err = c.t.allocZonesForDataNode(zoneNum, replicaNum, excludeZones); err != nil {
+	for _, name := range zoneList {
+		if _, err = c.t.getZone(name); err != nil {
 			return
 		}
 	}
-	//if vol enable cross zone,available zone less than 2,can't create partition
-	if zoneNum >= 2 && len(zones) < 2 {
-		return nil, nil, fmt.Errorf("no enough zones[%v] to be selected,crossNum[%v]", len(zones), zoneNum)
+	if len(zoneList) == 1 {
+		return
+	}
+	if len(zoneList) > replicaNum {
+		err = fmt.Errorf("can not specify zone number[%v] more than replica number[%v]", len(zoneList), replicaNum)
 	}
-	if len(zones) == 1 {
-		if hosts, peers, err = zones[0].getAvailDataNodeHosts(excludeNodeSets, excludeHosts, replicaNum); err != nil {
-			log.LogErrorf("action[chooseTargetDataNodes],err[%v]", err)
+	if len(zoneList) > defaultReplicaNum {
+		err = fmt.Errorf("can not specify zone number[%v] more than %v", len(zoneList), defaultReplicaNum)
+	}
+	//if length of zoneList more than 1, there should not be duplicate zone names
+	for i := 0; i < len(zoneList)-1; i++ {
+		if zoneList[i] == zoneList[i+1] {
+			err = fmt.Errorf("duplicate zone:[%v]", zoneList[i])
 			return
 		}
-		goto result
 	}
+	return
+}
+
+func (c *Cluster) chooseTargetDataNodes(excludeZone string, excludeNodeSets []uint64, excludeHosts []string, replicaNum int, zoneName string) (hosts []string, peers []proto.Peer, err error) {
+
+	var (
+		zones []*Zone
+	)
+	allocateZoneMap := make(map[*Zone][]string, 0)
+	hasAllocateNum := 0
+	excludeZones := make([]string, 0)
 	hosts = make([]string, 0)
 	peers = make([]proto.Peer, 0)
 	if excludeHosts == nil {
 		excludeHosts = make([]string, 0)
 	}
-	//replicaNum is equal with the number of allocated zones
-	if replicaNum == len(zones) {
+
+	if excludeZone != "" {
+		excludeZones = append(excludeZones, excludeZone)
+	}
+	zoneList := strings.Split(zoneName, ",")
+	if zones, err = c.t.allocZonesForDataNode(zoneName, replicaNum, excludeZones); err != nil {
+		return
+	}
+
+	if len(zones) == 1 && len(zoneList) == 1 {
+		if hosts, peers, err = zones[0].getAvailDataNodeHosts(excludeNodeSets, excludeHosts, replicaNum); err != nil {
+			log.LogErrorf("action[chooseTargetDataNodes],err[%v]", err)
+			return
+		}
+		goto result
+	}
+	// Different from the meta partition whose replicas fully fills the 3 zones,
+	// each data partition just fills 2 zones to decrease data transfer across zones.
+	// Loop through the 3-zones permutation according to the lastPermutationsForZone
+	// to choose 2 zones for each partition.
+	//   e.g.[zone0, zone0, zone1] -> [zone1, zone1, zone2] -> [zone2, zone2, zone0]
+	//    -> [zone1, zone1, zone0] -> [zone2, zone2, zone1] -> [zone0, zone0, zone2]
+	// If [zone0, zone1] is chosen for a partition with 3 replicas, 2 replicas will be allocated to zone0,
+	// the rest one will be allocated to zone1.
+	if len(zones) == 2 {
+		switch c.lastPermutationsForZone % 2 {
+		case 0:
+			zones = append(make([]*Zone, 0), zones[0], zones[1])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		default:
+			zones = append(make([]*Zone, 0), zones[1], zones[0])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		}
+	}
+	if len(zones) == 3 {
+		switch c.lastPermutationsForZone < 3 {
+		case true:
+			index := c.lastPermutationsForZone
+			zones = append(make([]*Zone, 0), zones[index], zones[index], zones[(index+1)%3])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		default:
+			index := c.lastPermutationsForZone - 3
+			zones = append(make([]*Zone, 0), zones[(index+1)%3], zones[(index+1)%3], zones[index])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		}
+	}
+	for hasAllocateNum < replicaNum {
+		localExcludeHosts := excludeHosts
 		for _, zone := range zones {
-			selectedHosts, selectedPeers, e := zone.getAvailDataNodeHosts(excludeNodeSets, excludeHosts, 1)
+			localExcludeHosts = append(localExcludeHosts, allocateZoneMap[zone]...)
+			selectedHosts, selectedPeers, e := zone.getAvailDataNodeHosts(excludeNodeSets, localExcludeHosts, 1)
 			if e != nil {
 				return nil, nil, errors.NewError(e)
 			}
 			hosts = append(hosts, selectedHosts...)
 			peers = append(peers, selectedPeers...)
+			allocateZoneMap[zone] = append(allocateZoneMap[zone], selectedHosts...)
+			hasAllocateNum = hasAllocateNum + 1
+			if hasAllocateNum == replicaNum {
+				break
+			}
 		}
-		goto result
 	}
-
-	// replicaNum larger than the number of allocated zones
-	for _, zone := range zones {
-		if zone.name != c.lastMasterZoneForDataNode {
-			masterZone = zone
-			c.lastMasterZoneForDataNode = zone.name
-			break
+	goto result
+result:
+	log.LogInfof("action[chooseTargetDataNodes] replicaNum[%v],zoneName[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneName, len(zones), hosts)
+	if len(hosts) != replicaNum {
+		log.LogErrorf("action[chooseTargetDataNodes] replicaNum[%v],zoneName[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneName, len(zones), hosts)
+		return nil, nil, errors.Trace(proto.ErrNoDataNodeToCreateDataPartition, "hosts len[%v],replicaNum[%v],zoneName[%v],selectedZones[%v]",
+			len(hosts), replicaNum, zoneName, len(zones))
+	}
+	return
+}
+func (c *Cluster) chooseTargetDataNodesForDecommission(excludeZone string, dp *DataPartition, excludeHosts []string, replicaNum int, zoneName string) (hosts []string, peers []proto.Peer, err error) {
+	var zones []*Zone
+	var targetZone *Zone
+	zones = make([]*Zone, 0)
+	zoneList := strings.Split(zoneName, ",")
+	for _, z := range zoneList {
+		var zone *Zone
+		if zone, err = c.t.getZone(z); err != nil {
+			return
 		}
+		zones = append(zones, zone)
 	}
-	if masterZone == nil {
-		masterZone = zones[0]
+	//if not cross zone, choose a zone from all zones
+	if len(zoneList) <= 1 {
+		zones = c.t.getAllZones()
 	}
-	for _, zone := range zones {
-		if zone.name == masterZone.name {
-			rNum := replicaNum - len(zones) + 1
-			selectedHosts, selectedPeers, e := zone.getAvailDataNodeHosts(excludeNodeSets, excludeHosts, rNum)
-			if e != nil {
-				return nil, nil, errors.NewError(e)
+	demandWriteNodes := 1
+	candidateZones := make([]*Zone, 0)
+	for _, z := range zones {
+		if z.status == unavailableZone {
+			continue
+		}
+		if excludeZone == z.name {
+			continue
+		}
+		if z.canWriteForDataNode(uint8(demandWriteNodes)) {
+			candidateZones = append(candidateZones, z)
+		}
+	}
+	//must have a candidate zone
+	if len(candidateZones) < 1 {
+		log.LogError(fmt.Sprintf("action[allocZonesForDataNode],there are no candidateZones, demandWriteNodes[%v], err:%v",
+			demandWriteNodes, proto.ErrNoZoneToCreateDataPartition))
+		return nil, nil, proto.ErrNoZoneToCreateDataPartition
+	}
+	//choose target zone for single zone partition
+	if len(zoneList) == 1 {
+		for index, zone := range candidateZones {
+			if c.lastMasterZoneForDataNode == "" {
+				targetZone = zone
+				c.lastMasterZoneForDataNode = targetZone.name
+				break
 			}
-			hosts = append(hosts, selectedHosts...)
-			peers = append(peers, selectedPeers...)
-		} else {
-			selectedHosts, selectedPeers, e := zone.getAvailDataNodeHosts(excludeNodeSets, excludeHosts, 1)
-			if e != nil {
-				return nil, nil, errors.NewError(e)
+			if zone.name == c.lastMasterZoneForDataNode {
+				if index == len(candidateZones)-1 {
+					targetZone = candidateZones[0]
+				} else {
+					targetZone = candidateZones[index+1]
+				}
+				c.lastMasterZoneForDataNode = targetZone.name
+				break
 			}
-			hosts = append(hosts, selectedHosts...)
-			peers = append(peers, selectedPeers...)
+		}
+		if targetZone == nil {
+			targetZone = candidateZones[0]
+			c.lastMasterZoneForDataNode = targetZone.name
 		}
 	}
-result:
-	log.LogInfof("action[chooseTargetDataNodes] replicaNum[%v],zoneNum[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneNum, len(zones), hosts)
-	if len(hosts) != replicaNum {
-		log.LogErrorf("action[chooseTargetDataNodes] replicaNum[%v],zoneNum[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneNum, len(zones), hosts)
-		return nil, nil, errors.Trace(proto.ErrNoDataNodeToCreateDataPartition, "hosts len[%v],replicaNum[%v],zoneNum[%v],selectedZones[%v]",
-			len(hosts), replicaNum, zoneNum, len(zones))
+	//choose target zone for cross zone partition
+	if len(zoneList) > 1 {
+		var curZonesMap map[string]uint8
+		if curZonesMap, err = dp.getDataZoneMap(c); err != nil {
+			return
+		}
+		//avoid change from 2 zones to 1 zone after decommission
+		if len(curZonesMap) == 2 && curZonesMap[excludeZone] == 1 {
+			for k := range curZonesMap {
+				if k == excludeZone {
+					continue
+				}
+				for _, z := range candidateZones {
+					if z.name == k {
+						continue
+					}
+					targetZone = z
+				}
+			}
+		} else {
+			targetZone = candidateZones[0]
+		}
 	}
+	if targetZone == nil {
+		err = fmt.Errorf("no candidate zones available")
+		return
+	}
+	hosts, peers, err = targetZone.getAvailDataNodeHosts(nil, excludeHosts, 1)
 	return
 }
 
@@ -987,8 +1322,9 @@ func (c *Cluster) getAllMetaPartitionsByMetaNode(addr string) (partitions []*Met
 	return
 }
 
-func (c *Cluster) decommissionDataNode(dataNode *DataNode) (err error) {
-	msg := fmt.Sprintf("action[decommissionDataNode], Node[%v] OffLine", dataNode.Addr)
+
+func (c *Cluster) decommissionDataNode(dataNode *DataNode, destZoneName string, strictFlag bool) (err error) {
+	msg := fmt.Sprintf("action[decommissionDataNode], Node[%v],strictMode[%v] OffLine", dataNode.Addr, strictFlag)
 	log.LogWarn(msg)
 	var wg sync.WaitGroup
 	dataNode.ToBeOffline = true
@@ -996,14 +1332,16 @@ func (c *Cluster) decommissionDataNode(dataNode *DataNode) (err error) {
 	partitions := c.getAllDataPartitionByDataNode(dataNode.Addr)
 	errChannel := make(chan error, len(partitions))
 	defer func() {
-		dataNode.ToBeOffline = false
+		if err != nil {
+			dataNode.ToBeOffline = false
+		}
 		close(errChannel)
 	}()
 	for _, dp := range partitions {
 		wg.Add(1)
 		go func(dp *DataPartition) {
 			defer wg.Done()
-			if err1 := c.decommissionDataPartition(dataNode.Addr, dp, dataNodeOfflineErr); err1 != nil {
+			if err1 := c.decommissionDataPartition(dataNode.Addr, dp, getTargetAddressForDataPartitionDecommission, dataNodeOfflineErr, destZoneName, strictFlag); err1 != nil {
 				errChannel <- err1
 			}
 		}(dp)
@@ -1033,7 +1371,9 @@ func (c *Cluster) delDataNodeFromCache(dataNode *DataNode) {
 	go dataNode.clean()
 }
 
-// Decommission a data partition.
+// Decommission a data partition.In strict mode, only if the size of the replica is equal,
+// or the number of files is equal, the recovery is considered complete. when it is triggered by migrated dataNode,
+// the strict mode is true,otherwise is false.
 // 1. Check if we can decommission a data partition. In the following cases, we are not allowed to do so:
 // - (a) a replica is not in the latest host list;
 // - (b) there is already a replica been taken offline;
@@ -1043,96 +1383,170 @@ func (c *Cluster) delDataNodeFromCache(dataNode *DataNode) {
 // 4. synchronized create a new data partition
 // 5. Set the data partition as readOnly.
 // 6. persistent the new host list
-func (c *Cluster) decommissionDataPartition(offlineAddr string, dp *DataPartition, errMsg string) (err error) {
+func (c *Cluster) decommissionDataPartition(offlineAddr string, dp *DataPartition, chooseDataHostFunc ChooseDataHostFunc, errMsg, destZoneName string, strictMode bool) (err error) {
 	var (
-		targetHosts     []string
-		newAddr         string
-		msg             string
-		dataNode        *DataNode
-		zone            *Zone
-		replica         *DataReplica
-		ns              *nodeSet
+		oldAddr         string
+		addAddr         string
+		dpReplica       *DataReplica
 		excludeNodeSets []uint64
-		zones           []string
-		excludeZone     string
+		msg             string
+		vol             *Vol
 	)
-	dp.RLock()
-	if ok := dp.hasHost(offlineAddr); !ok {
-		dp.RUnlock()
-		return
-	}
-	replica, _ = dp.getReplica(offlineAddr)
-	dp.RUnlock()
-	if err = c.validateDecommissionDataPartition(dp, offlineAddr); err != nil {
-		goto errHandler
-	}
-
-	if dataNode, err = c.dataNode(offlineAddr); err != nil {
-		goto errHandler
-	}
-
-	if dataNode.ZoneName == "" {
-		err = fmt.Errorf("dataNode[%v] zone is nil", dataNode.Addr)
+	dp.offlineMutex.Lock()
+	defer dp.offlineMutex.Unlock()
+	excludeNodeSets = make([]uint64, 0)
+	if vol, err = c.getVol(dp.VolName); err != nil {
 		goto errHandler
 	}
-	if zone, err = c.t.getZone(dataNode.ZoneName); err != nil {
+	if oldAddr, addAddr, err = chooseDataHostFunc(c, offlineAddr, dp, excludeNodeSets, vol.zoneName, destZoneName); err != nil {
 		goto errHandler
 	}
-	if ns, err = zone.getNodeSet(dataNode.NodeSetID); err != nil {
+	if dpReplica, err = dp.getReplica(oldAddr); err != nil {
 		goto errHandler
 	}
-	if targetHosts, _, err = ns.getAvailDataNodeHosts(dp.Hosts, 1); err != nil {
-		// select data nodes from the other node set in same zone
-		excludeNodeSets = append(excludeNodeSets, ns.ID)
-		if targetHosts, _, err = zone.getAvailDataNodeHosts(excludeNodeSets, dp.Hosts, 1); err != nil {
-			// select data nodes from the other zone
-			zones = dp.getLiveZones(offlineAddr)
-			if len(zones) == 0 {
-				excludeZone = zone.name
-			} else {
-				excludeZone = zones[0]
-			}
-			if targetHosts, _, err = c.chooseTargetDataNodes(excludeZone, excludeNodeSets, dp.Hosts, 1, 1, ""); err != nil {
-				goto errHandler
-			}
-		}
-	}
-	if err = c.removeDataReplica(dp, offlineAddr, false); err != nil {
-		goto errHandler
+	if err = c.removeDataReplica(dp, oldAddr, false, strictMode); err != nil {
+		return
 	}
-	newAddr = targetHosts[0]
-	if err = c.addDataReplica(dp, newAddr); err != nil {
-		goto errHandler
+	if err = c.addDataReplica(dp, addAddr); err != nil {
+		return
 	}
+	dp.Lock()
 	dp.Status = proto.ReadOnly
 	dp.isRecover = true
-	c.putBadDataPartitionIDs(replica, offlineAddr, dp.PartitionID)
-	dp.RLock()
 	c.syncUpdateDataPartition(dp)
-	dp.RUnlock()
-	log.LogWarnf("clusterID[%v] partitionID:%v  on Node:%v offline success,newHost[%v],PersistenceHosts:[%v]",
-		c.Name, dp.PartitionID, offlineAddr, newAddr, dp.Hosts)
+	dp.Unlock()
+	if strictMode {
+		c.putMigratedDataPartitionIDs(dpReplica, oldAddr, dp.PartitionID)
+	} else {
+		c.putBadDataPartitionIDs(dpReplica, oldAddr, dp.PartitionID)
+	}
 	return
 errHandler:
-	msg = fmt.Sprintf(errMsg+" clusterID[%v] partitionID:%v  on Node:%v  "+
+	msg = errMsg + fmt.Sprintf("clusterID[%v] partitionID:%v  on Node:%v  "+
 		"Then Fix It on newHost:%v   Err:%v , PersistenceHosts:%v  ",
-		c.Name, dp.PartitionID, offlineAddr, newAddr, err, dp.Hosts)
+		c.Name, dp.PartitionID, oldAddr, addAddr, err, dp.Hosts)
 	if err != nil {
 		Warn(c.Name, msg)
 		err = fmt.Errorf("vol[%v],partition[%v],err[%v]", dp.VolName, dp.PartitionID, err)
 	}
 	return
 }
+func (partition *DataPartition) RepairZone(vol *Vol, c *Cluster) (err error) {
+	var (
+		zoneList      []string
+		isNeedBalance bool
+	)
+	partition.RLock()
+	defer partition.RUnlock()
+	var isValidZone bool
+	if isValidZone, err = c.isValidZone(vol.zoneName); err != nil {
+		return
+	}
+	if !isValidZone {
+		log.LogWarnf("action[RepairZone], vol[%v], zoneName[%v], dpReplicaNum[%v] can not be automatically repaired", vol.Name, vol.zoneName, vol.dpReplicaNum)
+		return
+	}
+	rps := partition.liveReplicas(defaultDataPartitionTimeOutSec)
+	if len(rps) < int(vol.dpReplicaNum) {
+		log.LogWarnf("action[RepairZone], vol[%v], zoneName[%v], live Replicas [%v] less than dpReplicaNum[%v], can not be automatically repaired", vol.Name, vol.zoneName, len(rps), vol.dpReplicaNum)
+		return
+	}
+	zoneList = strings.Split(vol.zoneName, ",")
+	if len(partition.Replicas) != int(vol.dpReplicaNum) {
+		log.LogWarnf("action[RepairZone], data replica length[%v] not equal to dpReplicaNum[%v]", len(partition.Replicas), vol.dpReplicaNum)
+		return
+	}
+	if partition.isRecover {
+		log.LogWarnf("action[RepairZone], data partition[%v] is recovering", partition.PartitionID)
+		return
+	}
+	var dpInRecover int
+	dpInRecover = c.dataPartitionInRecovering()
+	if int32(dpInRecover) >= c.cfg.DataPartitionsRecoverPoolSize {
+		log.LogWarnf("action[repairDataPartition] clusterID[%v] Recover pool is full, recover partition[%v], pool size[%v]", c.Name, dpInRecover, c.cfg.DataPartitionsRecoverPoolSize)
+		return
+	}
+	if isNeedBalance, err = partition.needToRebalanceZone(c, zoneList); err != nil {
+		return
+	}
+	if !isNeedBalance {
+		return
+	}
+	if err = c.sendRepairDataPartitionTask(partition, BalanceDataZone); err != nil {
+		return
+	}
+	return
+}
+
+var getTargetAddressForDataPartitionDecommission = func(c *Cluster, offlineAddr string, dp *DataPartition, excludeNodeSets []uint64, zoneName string, destZoneName string) (oldAddr, newAddr string, err error) {
+	var (
+		dataNode    *DataNode
+		zone        *Zone
+		zones       []string
+		ns          *nodeSet
+		excludeZone string
+		targetHosts []string
+	)
+	if err = c.validateDecommissionDataPartition(dp, offlineAddr); err != nil {
+		return
+	}
+	if dataNode, err = c.dataNode(offlineAddr); err != nil {
+		return
+	}
+	if destZoneName != "" {
+		if zone, err = c.t.getZone(destZoneName); err != nil {
+			return
+		}
+		if targetHosts, _, err = zone.getAvailDataNodeHosts(excludeNodeSets, dp.Hosts, 1); err != nil {
+			return
+		}
+	} else {
+		if dataNode.ZoneName == "" {
+			err = fmt.Errorf("dataNode[%v] zone is nil", dataNode.Addr)
+			return
+		}
+		if zone, err = c.t.getZone(dataNode.ZoneName); err != nil {
+			return
+		}
+		if ns, err = zone.getNodeSet(dataNode.NodeSetID); err != nil {
+			return
+		}
+		if targetHosts, _, err = ns.getAvailDataNodeHosts(dp.Hosts, 1); err != nil {
+			// select data nodes from the other node set in same zone
+			excludeNodeSets = append(excludeNodeSets, ns.ID)
+			if targetHosts, _, err = zone.getAvailDataNodeHosts(excludeNodeSets, dp.Hosts, 1); err != nil {
+				// select data nodes from the other zone
+				zones = dp.getLiveZones(dataNode.Addr)
+				if len(zones) == 0 {
+					excludeZone = zone.name
+				} else {
+					excludeZone = zones[0]
+				}
+				if targetHosts, _, err = c.chooseTargetDataNodes(excludeZone, excludeNodeSets, dp.Hosts, 1, zoneName); err != nil {
+					return
+				}
+			}
+		}
+	}
+	newAddr = targetHosts[0]
+	oldAddr = offlineAddr
+	return
+}
 
 func (c *Cluster) validateDecommissionDataPartition(dp *DataPartition, offlineAddr string) (err error) {
 	dp.RLock()
 	defer dp.RUnlock()
+	if ok := dp.hasHost(offlineAddr); !ok {
+		err = fmt.Errorf("offline address:[%v] is not in data partition hosts:%v", offlineAddr, dp.Hosts)
+		return
+	}
+
 	var vol *Vol
 	if vol, err = c.getVol(dp.VolName); err != nil {
 		return
 	}
 
-	if err = dp.hasMissingOneReplica(int(vol.dpReplicaNum)); err != nil {
+	if err = dp.hasMissingOneReplica(offlineAddr, int(vol.dpReplicaNum)); err != nil {
 		return
 	}
 
@@ -1141,7 +1555,7 @@ func (c *Cluster) validateDecommissionDataPartition(dp *DataPartition, offlineAd
 		return
 	}
 
-	if dp.isRecover {
+	if dp.isRecover && !dp.isLatestReplica(offlineAddr) {
 		err = fmt.Errorf("vol[%v],data partition[%v] is recovering,[%v] can't be decommissioned", vol.Name, dp.PartitionID, offlineAddr)
 		return
 	}
@@ -1196,29 +1610,12 @@ func (c *Cluster) buildAddDataPartitionRaftMemberTaskAndSyncSendTask(dp *DataPar
 }
 
 func (c *Cluster) addDataPartitionRaftMember(dp *DataPartition, addPeer proto.Peer) (err error) {
-	dp.Lock()
-	defer dp.Unlock()
-	if contains(dp.Hosts, addPeer.Addr) {
-		err = fmt.Errorf("vol[%v],data partition[%v] has contains host[%v]", dp.VolName, dp.PartitionID, addPeer.Addr)
-		return
-	}
-
 	var (
-		candidateAddrs []string
 		leaderAddr     string
+		candidateAddrs []string
 	)
-	candidateAddrs = make([]string, 0, len(dp.Hosts))
-	leaderAddr = dp.getLeaderAddr()
-	if leaderAddr != "" && contains(dp.Hosts, leaderAddr) {
-		candidateAddrs = append(candidateAddrs, leaderAddr)
-	} else {
-		leaderAddr = ""
-	}
-	for _, host := range dp.Hosts {
-		if host == leaderAddr {
-			continue
-		}
-		candidateAddrs = append(candidateAddrs, host)
+	if leaderAddr, candidateAddrs, err = dp.prepareAddRaftMember(addPeer); err != nil {
+		return
 	}
 	//send task to leader addr first,if need to retry,then send to other addr
 	for index, host := range candidateAddrs {
@@ -1236,13 +1633,16 @@ func (c *Cluster) addDataPartitionRaftMember(dp *DataPartition, addPeer proto.Pe
 	if err != nil {
 		return
 	}
+	dp.Lock()
 	newHosts := make([]string, 0, len(dp.Hosts)+1)
 	newPeers := make([]proto.Peer, 0, len(dp.Peers)+1)
 	newHosts = append(dp.Hosts, addPeer.Addr)
 	newPeers = append(dp.Peers, addPeer)
 	if err = dp.update("addDataPartitionRaftMember", dp.VolName, newPeers, newHosts, c); err != nil {
+		dp.Unlock()
 		return
 	}
+	dp.Unlock()
 	return
 }
 
@@ -1272,7 +1672,7 @@ func (c *Cluster) createDataReplica(dp *DataPartition, addPeer proto.Peer) (err
 	return
 }
 
-func (c *Cluster) removeDataReplica(dp *DataPartition, addr string, validate bool) (err error) {
+func (c *Cluster) removeDataReplica(dp *DataPartition, addr string, validate, migrationMode bool) (err error) {
 	defer func() {
 		if err != nil {
 			log.LogErrorf("action[removeDataReplica],vol[%v],data partition[%v],err[%v]", dp.VolName, dp.PartitionID, err)
@@ -1283,7 +1683,7 @@ func (c *Cluster) removeDataReplica(dp *DataPartition, addr string, validate boo
 			return
 		}
 	}
-	ok := c.isRecovering(dp, addr)
+	ok := c.isRecovering(dp, addr) && !dp.isLatestReplica(addr)
 	if ok {
 		err = fmt.Errorf("vol[%v],data partition[%v] can't decommision until it has recovered", dp.VolName, dp.PartitionID)
 		return
@@ -1293,10 +1693,11 @@ func (c *Cluster) removeDataReplica(dp *DataPartition, addr string, validate boo
 		return
 	}
 	removePeer := proto.Peer{ID: dataNode.ID, Addr: addr}
-	if err = c.removeDataPartitionRaftMember(dp, removePeer); err != nil {
+
+	if err = c.removeDataPartitionRaftMember(dp, removePeer, migrationMode); err != nil {
 		return
 	}
-	if err = c.deleteDataReplica(dp, dataNode); err != nil {
+	if err = c.deleteDataReplica(dp, dataNode, migrationMode); err != nil {
 		return
 	}
 	leaderAddr := dp.getLeaderAddrWithLock()
@@ -1335,12 +1736,12 @@ func (c *Cluster) isRecovering(dp *DataPartition, addr string) (isRecover bool)
 	return
 }
 
-func (c *Cluster) removeDataPartitionRaftMember(dp *DataPartition, removePeer proto.Peer) (err error) {
-	dp.offlineMutex.Lock()
-	defer dp.offlineMutex.Unlock()
+
+func (c *Cluster) removeDataPartitionRaftMember(dp *DataPartition, removePeer proto.Peer, migrationMode bool) (err error) {
 	defer func() {
 		if err1 := c.updateDataPartitionOfflinePeerIDWithLock(dp, 0); err1 != nil {
-			err = errors.Trace(err, "updateDataPartitionOfflinePeerIDWithLock failed, err[%v]", err1)		}
+			err = errors.Trace(err, "updateDataPartitionOfflinePeerIDWithLock failed, err[%v]", err1)
+		}
 	}()
 	if err = c.updateDataPartitionOfflinePeerIDWithLock(dp, removePeer.ID); err != nil {
 		log.LogErrorf("action[removeDataPartitionRaftMember] vol[%v],data partition[%v],err[%v]", dp.VolName, dp.PartitionID, err)
@@ -1350,6 +1751,7 @@ func (c *Cluster) removeDataPartitionRaftMember(dp *DataPartition, removePeer pr
 	if err != nil {
 		return
 	}
+	task.ReserveResource = migrationMode
 	leaderAddr := dp.getLeaderAddr()
 	leaderDataNode, err := c.dataNode(leaderAddr)
 	if _, err = leaderDataNode.TaskManager.syncSendAdminTask(task); err != nil {
@@ -1370,12 +1772,14 @@ func (c *Cluster) removeDataPartitionRaftMember(dp *DataPartition, removePeer pr
 		}
 		newPeers = append(newPeers, peer)
 	}
+	dp.Lock()
 	if err = dp.update("removeDataPartitionRaftMember", dp.VolName, newPeers, newHosts, c); err != nil {
+		dp.Unlock()
 		return
 	}
+	dp.Unlock()
 	return
 }
-
 func (c *Cluster) updateDataPartitionOfflinePeerIDWithLock(dp *DataPartition, peerID uint64) (err error) {
 	dp.Lock()
 	defer dp.Unlock()
@@ -1385,7 +1789,10 @@ func (c *Cluster) updateDataPartitionOfflinePeerIDWithLock(dp *DataPartition, pe
 	}
 	return
 }
-func (c *Cluster) deleteDataReplica(dp *DataPartition, dataNode *DataNode) (err error) {
+
+
+
+func (c *Cluster) deleteDataReplica(dp *DataPartition, dataNode *DataNode, migrationMode bool) (err error) {
 	dp.Lock()
 	// in case dataNode is unreachable,update meta first.
 	dp.removeReplicaByAddr(dataNode.Addr)
@@ -1396,6 +1803,9 @@ func (c *Cluster) deleteDataReplica(dp *DataPartition, dataNode *DataNode) (err
 	}
 	task := dp.createTaskToDeleteDataPartition(dataNode.Addr)
 	dp.Unlock()
+	if migrationMode {
+		return
+	}
 	_, err = dataNode.TaskManager.syncSendAdminTask(task)
 	if err != nil {
 		log.LogErrorf("action[deleteDataReplica] vol[%v],data partition[%v],err[%v]", dp.VolName, dp.PartitionID, err)
@@ -1453,7 +1863,8 @@ func (c *Cluster) getBadDataPartitionsView() (bpvs []badPartitionView) {
 	return
 }
 
-func (c *Cluster) decommissionMetaNode(metaNode *MetaNode) (err error) {
+
+func (c *Cluster) decommissionMetaNode(metaNode *MetaNode, strictMode bool) (err error) {
 	msg := fmt.Sprintf("action[decommissionMetaNode],clusterID[%v] Node[%v] begin", c.Name, metaNode.Addr)
 	log.LogWarn(msg)
 	var wg sync.WaitGroup
@@ -1469,7 +1880,7 @@ func (c *Cluster) decommissionMetaNode(metaNode *MetaNode) (err error) {
 		wg.Add(1)
 		go func(mp *MetaPartition) {
 			defer wg.Done()
-			if err1 := c.decommissionMetaPartition(metaNode.Addr, mp); err1 != nil {
+			if err1 := c.decommissionMetaPartition(metaNode.Addr, mp, getTargetAddressForMetaPartitionDecommission, strictMode); err1 != nil {
 				errChannel <- err1
 			}
 		}(mp)
@@ -1498,20 +1909,21 @@ func (c *Cluster) deleteMetaNodeFromCache(metaNode *MetaNode) {
 	go metaNode.clean()
 }
 
-func (c *Cluster) updateVol(name, authKey string, newArgs *VolVarargs) (err error) {
+
+func (c *Cluster) updateVol(name, authKey, zoneName, description string, capacity uint64, replicaNum uint8, followerRead, authenticate, enableToken, autoRepair bool) (err error) {
 	var (
-		vol               *Vol
-		serverAuthKey     string
-		oldDpReplicaNum   uint8
-		oldCapacity       uint64
-		oldFollowerRead   bool
-		oldAuthenticate   bool
-		oldEnableToken    bool
-		oldZoneName       string
-		oldDescription    string
-		oldDpSelectorName string
-		oldDpSelectorParm string
-		volUsedSpace      uint64
+		vol             *Vol
+		serverAuthKey   string
+		oldDpReplicaNum uint8
+		oldCapacity     uint64
+		oldFollowerRead bool
+		oldAuthenticate bool
+		oldEnableToken  bool
+		oldAutoRepair   bool
+		oldZoneName     string
+		oldDescription  string
+		oldCrossZone    bool
+		zoneList        []string
 	)
 	if vol, err = c.getVol(name); err != nil {
 		log.LogErrorf("action[updateVol] err[%v]", err)
@@ -1524,18 +1936,15 @@ func (c *Cluster) updateVol(name, authKey string, newArgs *VolVarargs) (err erro
 	if !matchKey(serverAuthKey, authKey) {
 		return proto.ErrVolAuthKeyNotMatch
 	}
-	volUsedSpace = vol.totalUsedSpace()
-	if float64(newArgs.capacity*util.GB) < float64(volUsedSpace)*1.2 {
-		err = fmt.Errorf("capacity[%v] has to be 20 percent larger than the used space[%v]", newArgs.capacity,
-			volUsedSpace/util.GB)
-		goto errHandler
-	}
-	if newArgs.dpReplicaNum > vol.dpReplicaNum {
-		err = fmt.Errorf("don't support new replicaNum[%v] larger than old dpReplicaNum[%v]", newArgs.dpReplicaNum,
-			vol.dpReplicaNum)
+	//if capacity < vol.Capacity {
+	//	err = fmt.Errorf("capacity[%v] less than old capacity[%v]", capacity, vol.Capacity)
+	//	goto errHandler
+	//}
+	if replicaNum > vol.dpReplicaNum {
+		err = fmt.Errorf("don't support new replicaNum[%v] larger than old dpReplicaNum[%v]", replicaNum, vol.dpReplicaNum)
 		goto errHandler
 	}
-	if newArgs.enableToken == true && len(vol.tokens) == 0 {
+	if enableToken == true && len(vol.tokens) == 0 {
 		if err = c.createToken(vol, proto.ReadOnlyToken); err != nil {
 			goto errHandler
 		}
@@ -1543,43 +1952,42 @@ func (c *Cluster) updateVol(name, authKey string, newArgs *VolVarargs) (err erro
 			goto errHandler
 		}
 	}
-
-	if vol.crossZone && newArgs.zoneName != "" {
-		err = fmt.Errorf("only the vol which don't across zones,can specified zoneName")
-		goto errHandler
-	}
-	if newArgs.zoneName != "" {
-		_, err = c.t.getZone(newArgs.zoneName)
-		if err != nil {
+	oldZoneName = vol.zoneName
+	if zoneName != "" {
+		if err = c.validZone(zoneName, int(replicaNum)); err != nil {
 			goto errHandler
 		}
+		if err = c.validZone(zoneName, int(vol.mpReplicaNum)); err != nil {
+			goto errHandler
+		}
+		vol.zoneName = zoneName
+	}
+	oldCrossZone = vol.crossZone
+	zoneList = strings.Split(vol.zoneName, ",")
+	if len(zoneList) > 1 {
+		vol.crossZone = true
+	} else {
+		vol.crossZone = false
 	}
-
 	oldCapacity = vol.Capacity
 	oldDpReplicaNum = vol.dpReplicaNum
 	oldFollowerRead = vol.FollowerRead
 	oldAuthenticate = vol.authenticate
 	oldEnableToken = vol.enableToken
-	oldZoneName = vol.zoneName
+	oldAutoRepair = vol.autoRepair
 	oldDescription = vol.description
-	oldDpSelectorName = vol.dpSelectorName
-	oldDpSelectorParm = vol.dpSelectorParm
-
-	vol.zoneName = newArgs.zoneName
-	vol.Capacity = newArgs.capacity
-	vol.FollowerRead = newArgs.followerRead
-	vol.authenticate = newArgs.authenticate
-	vol.enableToken = newArgs.enableToken
-	if newArgs.description != "" {
-		vol.description = newArgs.description
+	vol.Capacity = capacity
+	vol.FollowerRead = followerRead
+	vol.authenticate = authenticate
+	vol.enableToken = enableToken
+	vol.autoRepair = autoRepair
+	if description != "" {
+		vol.description = description
 	}
 	//only reduced replica num is supported
-	if newArgs.dpReplicaNum != 0 && newArgs.dpReplicaNum < vol.dpReplicaNum {
-		vol.dpReplicaNum = newArgs.dpReplicaNum
+	if replicaNum != 0 && replicaNum < vol.dpReplicaNum {
+		vol.dpReplicaNum = replicaNum
 	}
-	vol.dpSelectorName = newArgs.dpSelectorName
-	vol.dpSelectorParm = newArgs.dpSelectorParm
-
 	if err = c.syncUpdateVol(vol); err != nil {
 		vol.Capacity = oldCapacity
 		vol.dpReplicaNum = oldDpReplicaNum
@@ -1587,10 +1995,9 @@ func (c *Cluster) updateVol(name, authKey string, newArgs *VolVarargs) (err erro
 		vol.authenticate = oldAuthenticate
 		vol.enableToken = oldEnableToken
 		vol.zoneName = oldZoneName
+		vol.crossZone = oldCrossZone
+		vol.autoRepair = oldAutoRepair
 		vol.description = oldDescription
-		vol.dpSelectorName = oldDpSelectorName
-		vol.dpSelectorParm = oldDpSelectorParm
-
 		log.LogErrorf("action[updateVol] vol[%v] err[%v]", name, err)
 		err = proto.ErrPersistenceByRaft
 		goto errHandler
@@ -1605,7 +2012,7 @@ errHandler:
 
 // Create a new volume.
 // By default we create 3 meta partitions and 10 data partitions during initialization.
-func (c *Cluster) createVol(name, owner, zoneName, description string, mpCount, dpReplicaNum, size, capacity int, followerRead, authenticate, crossZone, enableToken bool) (vol *Vol, err error) {
+func (c *Cluster) createVol(name, owner, zoneName, description string, mpCount, dpReplicaNum, size, capacity int, followerRead, authenticate, enableToken, autoRepair bool) (vol *Vol, err error) {
 	var (
 		dataPartitionSize       uint64
 		readWriteDataPartitions int
@@ -1615,21 +2022,13 @@ func (c *Cluster) createVol(name, owner, zoneName, description string, mpCount,
 	} else {
 		dataPartitionSize = uint64(size) * util.GB
 	}
-
-	if crossZone && c.t.zoneLen() <= 1 {
-		return nil, fmt.Errorf("cluster has one zone,can't cross zone")
-	}
-	if crossZone && zoneName != "" {
-		return nil, fmt.Errorf("only the vol which don't across zones,can specified zoneName")
+	if err = c.validZone(zoneName, dpReplicaNum); err != nil {
+		goto errHandler
 	}
-	if zoneName != "" {
-		if _, err = c.t.getZone(zoneName); err != nil {
-			return
-		}
-	} else if !crossZone {
-		zoneName = DefaultZoneName
+	if vol, err = c.doCreateVol(name, owner, zoneName, description, dataPartitionSize, uint64(capacity), dpReplicaNum, followerRead, authenticate, enableToken, autoRepair); err != nil {
+		goto errHandler
 	}
-	if vol, err = c.doCreateVol(name, owner, zoneName, description, dataPartitionSize, uint64(capacity), dpReplicaNum, followerRead, authenticate, crossZone, enableToken); err != nil {
+	if err = c.validZone(zoneName, int(vol.mpReplicaNum)); err != nil {
 		goto errHandler
 	}
 	if err = vol.initMetaPartitions(c, mpCount); err != nil {
@@ -1657,7 +2056,7 @@ errHandler:
 	return
 }
 
-func (c *Cluster) doCreateVol(name, owner, zoneName, description string, dpSize, capacity uint64, dpReplicaNum int, followerRead, authenticate, crossZone, enableToken bool) (vol *Vol, err error) {
+func (c *Cluster) doCreateVol(name, owner, zoneName, description string, dpSize, capacity uint64, dpReplicaNum int, followerRead, authenticate, enableToken, autoRepair bool) (vol *Vol, err error) {
 	var id uint64
 	c.createVolMutex.Lock()
 	defer c.createVolMutex.Unlock()
@@ -1670,7 +2069,8 @@ func (c *Cluster) doCreateVol(name, owner, zoneName, description string, dpSize,
 	if err != nil {
 		goto errHandler
 	}
-	vol = newVol(id, name, owner, zoneName, dpSize, capacity, uint8(dpReplicaNum), defaultReplicaNum, followerRead, authenticate, crossZone, enableToken, createTime, description)
+	vol = newVol(id, name, owner, zoneName, dpSize, capacity, uint8(dpReplicaNum), defaultReplicaNum, followerRead, authenticate, enableToken, autoRepair, createTime, description)
+
 	// refresh oss secure
 	vol.refreshOSSSecure()
 	if err = c.syncAddVol(vol); err != nil {
@@ -1727,100 +2127,153 @@ func (c *Cluster) updateInodeIDRange(volName string, start uint64) (err error) {
 }
 
 // Choose the target hosts from the available zones and meta nodes.
-func (c *Cluster) chooseTargetMetaHosts(excludeZone string, excludeNodeSets []uint64, excludeHosts []string, replicaNum int, crossZone bool, specifiedZone string) (hosts []string, peers []proto.Peer, err error) {
+func (c *Cluster) chooseTargetMetaHosts(excludeZone string, excludeNodeSets []uint64, excludeHosts []string, replicaNum int, zoneName string) (hosts []string, peers []proto.Peer, err error) {
 	var (
-		zones      []*Zone
-		masterZone *Zone
+		zones []*Zone
 	)
+	allocateZoneMap := make(map[*Zone][]string, 0)
+	hasAllocateNum := 0
 	excludeZones := make([]string, 0)
+	hosts = make([]string, 0)
+	peers = make([]proto.Peer, 0)
+	if excludeHosts == nil {
+		excludeHosts = make([]string, 0)
+	}
 	if excludeZone != "" {
 		excludeZones = append(excludeZones, excludeZone)
 	}
-	zoneNum := c.decideZoneNum(crossZone)
-	if replicaNum < zoneNum {
-		zoneNum = replicaNum
-	}
-	// when creating vol,user specified a zone,we reset zoneNum to 1,to be created partition with specified zone,
-	//if specified zone is not writable,we choose a zone randomly
-	if specifiedZone != "" {
-		zoneNum = 1
-		zone, err := c.t.getZone(specifiedZone)
-		if err != nil {
-			Warn(c.Name, fmt.Sprintf("cluster[%v],specified zone[%v]is not writable", c.Name, specifiedZone))
-		} else {
-			zones = make([]*Zone, 0)
-			zones = append(zones, zone)
-		}
-	}
-	if zones == nil || specifiedZone == "" {
-		if zones, err = c.t.allocZonesForMetaNode(zoneNum, replicaNum, excludeZones); err != nil {
-			return
-		}
-	}
-
-	if crossZone && len(zones) < 2 {
-		log.LogWarn(fmt.Sprintf("action[chooseTargetMetaNodes] ,no enough zones [%v] to be selected, expect select [%v] zones", len(zones), zoneNum))
-		return nil, nil, fmt.Errorf("action[chooseTargetMetaNodes] no enough zones [%v] to be selected, expect select [%v] zones", len(zones), zoneNum)
+	if zones, err = c.t.allocZonesForMetaNode(zoneName, replicaNum, excludeZones); err != nil {
+		return
 	}
-	if len(zones) == 1 {
+	zoneList := strings.Split(zoneName, ",")
+	if len(zones) == 1 && len(zoneList) == 1 {
 		if hosts, peers, err = zones[0].getAvailMetaNodeHosts(excludeNodeSets, excludeHosts, replicaNum); err != nil {
 			log.LogErrorf("action[chooseTargetMetaNodes],err[%v]", err)
 			return
 		}
-		return
+		goto result
 	}
-	hosts = make([]string, 0)
-	peers = make([]proto.Peer, 0)
-	if excludeHosts == nil {
-		excludeHosts = make([]string, 0)
+	if len(zones) == 2 {
+		switch c.lastPermutationsForZone % 2 {
+		case 0:
+			zones = append(make([]*Zone, 0), zones[0], zones[1])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		default:
+			zones = append(make([]*Zone, 0), zones[1], zones[0])
+			c.lastPermutationsForZone = (c.lastPermutationsForZone + 1) % 6
+		}
 	}
-	//replicaNum is equal with the number of allocated zones
-	if replicaNum == len(zones) {
+	for hasAllocateNum < replicaNum {
+		localExcludeHosts := excludeHosts
 		for _, zone := range zones {
-			selectedHosts, selectedPeers, e := zone.getAvailMetaNodeHosts(excludeNodeSets, excludeHosts, 1)
+			localExcludeHosts = append(localExcludeHosts, allocateZoneMap[zone]...)
+			selectedHosts, selectedPeers, e := zone.getAvailMetaNodeHosts(excludeNodeSets, localExcludeHosts, 1)
 			if e != nil {
 				return nil, nil, errors.NewError(e)
 			}
 			hosts = append(hosts, selectedHosts...)
 			peers = append(peers, selectedPeers...)
+			allocateZoneMap[zone] = append(allocateZoneMap[zone], selectedHosts...)
+			hasAllocateNum = hasAllocateNum + 1
+			if hasAllocateNum == replicaNum {
+				break
+			}
 		}
-		goto result
 	}
+	goto result
+result:
+	log.LogInfof("action[chooseTargetMetaHosts] replicaNum[%v],zoneName[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneName, zones, hosts)
+	if len(hosts) != replicaNum {
+		return nil, nil, errors.Trace(proto.ErrNoMetaNodeToCreateMetaPartition, "hosts len[%v],replicaNum[%v]", len(hosts), replicaNum)
+	}
+	return
+}
 
-	// replicaNum larger than with the number of allocated zones
-	for _, zone := range zones {
-		if zone.name != c.lastMasterZoneForMetaNode {
-			masterZone = zone
-			c.lastMasterZoneForMetaNode = zone.name
-			break
+func (c *Cluster) chooseTargetMetaHostForDecommission(excludeZone string, mp *MetaPartition, excludeHosts []string, replicaNum int, zoneName string) (hosts []string, peers []proto.Peer, err error) {
+	var zones []*Zone
+	var targetZone *Zone
+	zones = make([]*Zone, 0)
+	zoneList := strings.Split(zoneName, ",")
+	for _, z := range zoneList {
+		var zone *Zone
+		if zone, err = c.t.getZone(z); err != nil {
+			return
 		}
+		zones = append(zones, zone)
+
 	}
-	if masterZone == nil {
-		masterZone = zones[0]
+	//if not cross zone, choose a zone from all zones
+	if len(zoneList) == 1 {
+		zones = c.t.getAllZones()
 	}
-	for _, zone := range zones {
-		if zone.name == masterZone.name {
-			rNum := replicaNum - len(zones) + 1
-			selectedHosts, selectedPeers, e := zone.getAvailMetaNodeHosts(excludeNodeSets, excludeHosts, rNum)
-			if e != nil {
-				return nil, nil, errors.NewError(e)
+	demandWriteNodes := 1
+	candidateZones := make([]*Zone, 0)
+	for _, z := range zones {
+		if z.status == unavailableZone {
+			continue
+		}
+		if excludeZone == z.name {
+			continue
+		}
+		if z.canWriteForMetaNode(uint8(demandWriteNodes)) {
+			candidateZones = append(candidateZones, z)
+		}
+	}
+	//must have a candidate zone
+	if len(candidateZones) < 1 {
+		log.LogError(fmt.Sprintf("action[allocZonesForMetaNode],there are no candidateZones, demandWriteNodes[%v], err:%v",
+			demandWriteNodes, proto.ErrNoZoneToCreateMetaPartition))
+		return nil, nil, proto.ErrNoZoneToCreateMetaPartition
+	}
+	if len(zoneList) == 1 {
+		for index, zone := range candidateZones {
+			if c.lastMasterZoneForMetaNode == "" {
+				targetZone = zone
+				c.lastMasterZoneForMetaNode = targetZone.name
+				break
 			}
-			hosts = append(hosts, selectedHosts...)
-			peers = append(peers, selectedPeers...)
-		} else {
-			selectedHosts, selectedPeers, e := zone.getAvailMetaNodeHosts(excludeNodeSets, excludeHosts, 1)
-			if e != nil {
-				return nil, nil, errors.NewError(e)
+			if zone.name == c.lastMasterZoneForMetaNode {
+				if index == len(candidateZones)-1 {
+					targetZone = candidateZones[0]
+				} else {
+					targetZone = candidateZones[index+1]
+				}
+				c.lastMasterZoneForMetaNode = targetZone.name
+				break
 			}
-			hosts = append(hosts, selectedHosts...)
-			peers = append(peers, selectedPeers...)
+		}
+		if targetZone == nil {
+			targetZone = candidateZones[0]
+			c.lastMasterZoneForMetaNode = targetZone.name
 		}
 	}
-result:
-	log.LogInfof("action[chooseTargetMetaHosts] replicaNum[%v],zoneNum[%v],selectedZones[%v],hosts[%v]", replicaNum, zoneNum, len(zones), hosts)
-	if len(hosts) != replicaNum {
-		return nil, nil, errors.Trace(proto.ErrNoMetaNodeToCreateMetaPartition, "hosts len[%v],replicaNum[%v]", len(hosts), replicaNum)
+	if len(zoneList) > 1 {
+		var curZonesMap map[string]uint8
+		if curZonesMap, err = mp.getMetaZoneMap(c); err != nil {
+			return
+		}
+		//avoid change from 2 zones to 1 zone after decommission
+		if len(curZonesMap) == 2 && curZonesMap[excludeZone] == 1 {
+			for k := range curZonesMap {
+				if k == excludeZone {
+					continue
+				}
+				for _, z := range candidateZones {
+					if z.name == k {
+						continue
+					}
+					targetZone = z
+				}
+			}
+		} else {
+			targetZone = candidateZones[0]
+		}
+	}
+	if targetZone == nil {
+		err = fmt.Errorf("no candidate zones available")
+		return
 	}
+	hosts, peers, err = targetZone.getAvailMetaNodeHosts(nil, excludeHosts, 1)
 	return
 }
 
@@ -2009,3 +2462,127 @@ func (c *Cluster) clearMetaNodes() {
 		return true
 	})
 }
+
+func (c *Cluster) setDataNodeToOfflineState(startID, endID uint64, state bool, zoneName string) {
+	c.dataNodes.Range(func(key, value interface{}) bool {
+		node, ok := value.(*DataNode)
+		if !ok {
+			return true
+		}
+		if node.ID < startID || node.ID > endID {
+			return true
+		}
+		if node.ZoneName != zoneName {
+			return true
+		}
+		node.Lock()
+		node.ToBeMigrated = state
+		node.Unlock()
+		return true
+	})
+}
+
+func (c *Cluster) setMetaNodeToOfflineState(startID, endID uint64, state bool, zoneName string) {
+	c.metaNodes.Range(func(key, value interface{}) bool {
+		node, ok := value.(*MetaNode)
+		if !ok {
+			return true
+		}
+		if node.ID < startID || node.ID > endID {
+			return true
+		}
+		if node.ZoneName != zoneName {
+			return true
+		}
+		node.Lock()
+		node.ToBeMigrated = state
+		node.Unlock()
+		return true
+	})
+}
+func (c *Cluster) setDpRecoverPoolSize(dpRecoverPool int32) (err error) {
+	oldDpPool := atomic.LoadInt32(&c.cfg.DataPartitionsRecoverPoolSize)
+	atomic.StoreInt32(&c.cfg.DataPartitionsRecoverPoolSize, dpRecoverPool)
+
+	if err = c.syncPutCluster(); err != nil {
+		log.LogErrorf("action[setDpRecoverPoolSize] err[%v]", err)
+		atomic.StoreInt32(&c.cfg.DataPartitionsRecoverPoolSize, oldDpPool)
+		err = proto.ErrPersistenceByRaft
+		return
+	}
+	c.initDpRepairChan()
+	return
+}
+
+func (c *Cluster) setMpRecoverPoolSize(mpRecoverPool int32) (err error) {
+	oldMpPool := atomic.LoadInt32(&c.cfg.MetaPartitionsRecoverPoolSize)
+	atomic.StoreInt32(&c.cfg.MetaPartitionsRecoverPoolSize, mpRecoverPool)
+
+	if err = c.syncPutCluster(); err != nil {
+		log.LogErrorf("action[setMpRecoverPoolSize] err[%v]", err)
+		atomic.StoreInt32(&c.cfg.MetaPartitionsRecoverPoolSize, oldMpPool)
+		err = proto.ErrPersistenceByRaft
+		return
+	}
+	c.initMpRepairChan()
+	return
+}
+
+func (c *Cluster) initDpRepairChan() {
+	var chanCapacity int32
+	chanCapacity = c.cfg.DataPartitionsRecoverPoolSize
+	if chanCapacity > maxDataPartitionsRecoverPoolSize {
+		chanCapacity = maxDataPartitionsRecoverPoolSize
+	}
+	if chanCapacity < 1 {
+		chanCapacity = 1
+	}
+	c.dpRepairChan = make(chan *RepairTask, chanCapacity)
+}
+
+func (c *Cluster) initMpRepairChan() {
+	var chanCapacity int32
+	chanCapacity = c.cfg.MetaPartitionsRecoverPoolSize
+	if chanCapacity > maxMetaPartitionsRecoverPoolSize {
+		chanCapacity = maxMetaPartitionsRecoverPoolSize
+	}
+	if chanCapacity < 1 {
+		chanCapacity = 1
+	}
+	c.mpRepairChan = make(chan *RepairTask, chanCapacity)
+}
+
+func (c *Cluster) sendRepairMetaPartitionTask(mp *MetaPartition, rType RepairType) (err error) {
+	var repairTask *RepairTask
+	repairTask = &RepairTask{
+		RType: rType,
+		Pid:   mp.PartitionID,
+	}
+	select {
+	case c.mpRepairChan <- repairTask:
+		Warn(c.Name, fmt.Sprintf("action[sendRepairMetaPartitionTask] clusterID[%v] vol[%v] meta partition[%v] "+
+			"task type[%v]", c.Name, mp.volName, mp.PartitionID, rType))
+	default:
+		Warn(c.Name, fmt.Sprintf("action[sendRepairMetaPartitionTask] clusterID[%v] vol[%v] meta partition[%v] "+
+			"task type[%v], mpRepairChan has been full", c.Name, mp.volName, mp.PartitionID, rType))
+	}
+	return
+}
+
+func (c *Cluster) sendRepairDataPartitionTask(dp *DataPartition, rType RepairType) (err error) {
+	var repairTask *RepairTask
+	repairTask = &RepairTask{
+		RType: rType,
+		Pid:   dp.PartitionID,
+	}
+	select {
+	case c.dpRepairChan <- repairTask:
+		Warn(c.Name, fmt.Sprintf("action[sendRepairDataPartitionTask] clusterID[%v] vol[%v] data partition[%v] "+
+			"task type[%v]", c.Name, dp.VolName, dp.PartitionID, rType))
+	default:
+		Warn(c.Name, fmt.Sprintf("action[sendRepairDataPartitionTask] clusterID[%v] vol[%v] data partition[%v] "+
+			"task type[%v], chanLength[%v], chanCapacity[%v], dpRepairChan has been full", c.Name, dp.VolName, dp.PartitionID, rType, len(c.dpRepairChan),
+			cap(c.dpRepairChan)))
+	}
+	return
+}
diff --git a/master/cluster_task.go b/master/cluster_task.go
index 3e4955b59f..71b7c7031d 100644
--- a/master/cluster_task.go
+++ b/master/cluster_task.go
@@ -26,6 +26,8 @@ import (
 	"github.com/chubaofs/chubaofs/util/log"
 )
 
+type ChooseMetaHostFunc func(c *Cluster, nodeAddr string, mp *MetaPartition, oldHosts []string, excludeNodeSets []uint64, zoneName string) (oldAddr, addAddr string, err error)
+
 func (c *Cluster) addDataNodeTasks(tasks []*proto.AdminTask) {
 	for _, t := range tasks {
 		c.addDataNodeTask(t)
@@ -84,7 +86,9 @@ func (c *Cluster) loadDataPartition(dp *DataPartition) {
 	}()
 }
 
-// taking the given mata partition offline.
+// taking the given mata partition offline. In strict mode, only if the size of the replica is equal,
+// or the number of files is equal, the recovery is considered complete. when it is triggered by migrated metaNode,
+// the strict mode is true,otherwise is false.
 // 1. checking if the meta partition can be offline.
 // There are two cases where the partition is not allowed to be offline:
 // (1) the replica is not in the latest host list
@@ -93,36 +97,73 @@ func (c *Cluster) loadDataPartition(dp *DataPartition) {
 // 3. synchronized decommission meta partition
 // 4. synchronized create a new meta partition
 // 5. persistent the new host list
-func (c *Cluster) decommissionMetaPartition(nodeAddr string, mp *MetaPartition) (err error) {
+func (c *Cluster) decommissionMetaPartition(nodeAddr string, mp *MetaPartition, chooseMetaHostFunc ChooseMetaHostFunc, strictMode bool) (err error) {
 	var (
-		newPeers        []proto.Peer
-		metaNode        *MetaNode
-		zone            *Zone
-		ns              *nodeSet
+		addAddr         string
 		excludeNodeSets []uint64
 		oldHosts        []string
-		zones           []string
-		excludeZone     string
+		vol             *Vol
 	)
+	mp.offlineMutex.Lock()
+	defer mp.offlineMutex.Unlock()
+	oldHosts = mp.Hosts
+	if vol, err = c.getVol(mp.volName); err != nil {
+		goto errHandler
+	}
+	if nodeAddr, addAddr, err = chooseMetaHostFunc(c, nodeAddr, mp, oldHosts, excludeNodeSets, vol.zoneName); err != nil {
+		goto errHandler
+	}
+
 	log.LogWarnf("action[decommissionMetaPartition],volName[%v],nodeAddr[%v],partitionID[%v] begin", mp.volName, nodeAddr, mp.PartitionID)
-	mp.RLock()
-	if !contains(mp.Hosts, nodeAddr) {
-		mp.RUnlock()
-		return
+	if err = c.deleteMetaReplica(mp, nodeAddr, false, strictMode); err != nil {
+		goto errHandler
 	}
-	oldHosts = mp.Hosts
+	if err = c.addMetaReplica(mp, addAddr); err != nil {
+		goto errHandler
+	}
+	mp.IsRecover = true
+	if strictMode {
+		c.putMigratedMetaPartitions(nodeAddr, mp.PartitionID)
+	} else {
+		c.putBadMetaPartitions(nodeAddr, mp.PartitionID)
+	}
+	mp.RLock()
+	c.syncUpdateMetaPartition(mp)
 	mp.RUnlock()
+	return
+errHandler:
+	log.LogError(fmt.Sprintf("action[decommissionMetaPartition],volName: %v,partitionID: %v,err: %v",
+		mp.volName, mp.PartitionID, errors.Stack(err)))
+	Warn(c.Name, fmt.Sprintf("clusterID[%v] meta partition[%v] offline addr[%v] failed,err:%v",
+		c.Name, mp.PartitionID, nodeAddr, err))
+	if err != nil {
+		err = fmt.Errorf("vol[%v],partition[%v],err[%v]", mp.volName, mp.PartitionID, err)
+	}
+	return
+}
+
+var getTargetAddressForMetaPartitionDecommission = func(c *Cluster, nodeAddr string, mp *MetaPartition, oldHosts []string, excludeNodeSets []uint64, zoneName string) (oldAddr, addAddr string, err error) {
+	var (
+		metaNode    *MetaNode
+		zone        *Zone
+		zones       []string
+		ns          *nodeSet
+		newPeers    []proto.Peer
+		excludeZone string
+	)
+	oldAddr = nodeAddr
+
 	if err = c.validateDecommissionMetaPartition(mp, nodeAddr); err != nil {
-		goto errHandler
+		return
 	}
 	if metaNode, err = c.metaNode(nodeAddr); err != nil {
-		goto errHandler
+		return
 	}
 	if zone, err = c.t.getZone(metaNode.ZoneName); err != nil {
-		goto errHandler
+		return
 	}
 	if ns, err = zone.getNodeSet(metaNode.NodeSetID); err != nil {
-		goto errHandler
+		return
 	}
 	if _, newPeers, err = ns.getAvailMetaNodeHosts(oldHosts, 1); err != nil {
 		// choose a meta node in other node set in the same zone
@@ -135,33 +176,13 @@ func (c *Cluster) decommissionMetaPartition(nodeAddr string, mp *MetaPartition)
 				excludeZone = zones[0]
 			}
 			// choose a meta node in other zone
-			if _, newPeers, err = c.chooseTargetMetaHosts(excludeZone, excludeNodeSets, oldHosts, 1, false, ""); err != nil {
-				goto errHandler
+			if _, newPeers, err = c.chooseTargetMetaHostForDecommission(excludeZone, mp, oldHosts, 1, zoneName); err != nil {
+				return
 			}
 		}
 	}
-	if err = c.deleteMetaReplica(mp, nodeAddr, false); err != nil {
-		goto errHandler
-	}
-	if err = c.addMetaReplica(mp, newPeers[0].Addr); err != nil {
-		goto errHandler
-	}
-	mp.IsRecover = true
-	c.putBadMetaPartitions(nodeAddr, mp.PartitionID)
-	mp.RLock()
-	c.syncUpdateMetaPartition(mp)
-	mp.RUnlock()
-	Warn(c.Name, fmt.Sprintf("action[decommissionMetaPartition] clusterID[%v] vol[%v] meta partition[%v] "+
-		"offline addr[%v] success,new addr[%v]", c.Name, mp.volName, mp.PartitionID, nodeAddr, newPeers[0].Addr))
-	return
-
-errHandler:
-	log.LogError(fmt.Sprintf("action[decommissionMetaPartition],volName: %v,partitionID: %v,err: %v",
-		mp.volName, mp.PartitionID, errors.Stack(err)))
-	Warn(c.Name, fmt.Sprintf("clusterID[%v] meta partition[%v] offline addr[%v] failed,err:%v",
-		c.Name, mp.PartitionID, nodeAddr, err))
-	if err != nil {
-		err = fmt.Errorf("vol[%v],partition[%v],err[%v]", mp.volName, mp.PartitionID, err)
+	if len(newPeers) > 0 {
+		addAddr = newPeers[0].Addr
 	}
 	return
 }
@@ -170,6 +191,11 @@ func (c *Cluster) validateDecommissionMetaPartition(mp *MetaPartition, nodeAddr
 	mp.RLock()
 	defer mp.RUnlock()
 	var vol *Vol
+	if !contains(mp.Hosts, nodeAddr) {
+		err = fmt.Errorf("offline address:[%v] is not in meta partition hosts:%v", nodeAddr, mp.Hosts)
+		return
+	}
+
 	if vol, err = c.getVol(mp.volName); err != nil {
 		return
 	}
@@ -177,11 +203,11 @@ func (c *Cluster) validateDecommissionMetaPartition(mp *MetaPartition, nodeAddr
 		return
 	}
 
-	if err = mp.hasMissingOneReplica(int(vol.mpReplicaNum)); err != nil {
+	if err = mp.hasMissingOneReplica(nodeAddr, int(vol.mpReplicaNum)); err != nil {
 		return
 	}
 
-	if mp.IsRecover {
+	if mp.IsRecover && !mp.isLatestReplica(nodeAddr) {
 		err = fmt.Errorf("vol[%v],meta partition[%v] is recovering,[%v] can't be decommissioned", vol.Name, mp.PartitionID, nodeAddr)
 		return
 	}
@@ -270,7 +296,7 @@ func (c *Cluster) checkLackReplicaMetaPartitions() (lackReplicaMetaPartitions []
 	return
 }
 
-func (c *Cluster) deleteMetaReplica(partition *MetaPartition, addr string, validate bool) (err error) {
+func (c *Cluster) deleteMetaReplica(partition *MetaPartition, addr string, validate, migrationMode bool) (err error) {
 	defer func() {
 		if err != nil {
 			log.LogErrorf("action[deleteMetaReplica],vol[%v],data partition[%v],err[%v]", partition.volName, partition.PartitionID, err)
@@ -286,16 +312,16 @@ func (c *Cluster) deleteMetaReplica(partition *MetaPartition, addr string, valid
 		return
 	}
 	removePeer := proto.Peer{ID: metaNode.ID, Addr: addr}
-	if err = c.removeMetaPartitionRaftMember(partition, removePeer); err != nil {
+	if err = c.removeMetaPartitionRaftMember(partition, removePeer, migrationMode); err != nil {
 		return
 	}
-	if err = c.deleteMetaPartition(partition, metaNode); err != nil {
+	if err = c.deleteMetaPartition(partition, metaNode, migrationMode); err != nil {
 		return
 	}
 	return
 }
 
-func (c *Cluster) deleteMetaPartition(partition *MetaPartition, removeMetaNode *MetaNode) (err error) {
+func (c *Cluster) deleteMetaPartition(partition *MetaPartition, removeMetaNode *MetaNode, migrationMode bool) (err error) {
 	partition.Lock()
 	mr, err := partition.getMetaReplica(removeMetaNode.Addr)
 	if err != nil {
@@ -306,6 +332,9 @@ func (c *Cluster) deleteMetaPartition(partition *MetaPartition, removeMetaNode *
 	partition.removeReplicaByAddr(removeMetaNode.Addr)
 	partition.removeMissingReplica(removeMetaNode.Addr)
 	partition.Unlock()
+	if migrationMode {
+		return
+	}
 	_, err = removeMetaNode.Sender.syncSendAdminTask(task)
 	if err != nil {
 		log.LogErrorf("action[deleteMetaPartition] vol[%v],data partition[%v],err[%v]", partition.volName, partition.PartitionID, err)
@@ -313,10 +342,9 @@ func (c *Cluster) deleteMetaPartition(partition *MetaPartition, removeMetaNode *
 	return nil
 }
 
-func (c *Cluster) removeMetaPartitionRaftMember(partition *MetaPartition, removePeer proto.Peer) (err error) {
-	partition.offlineMutex.Lock()
-	defer partition.offlineMutex.Unlock()
-	defer func(){
+
+func (c *Cluster) removeMetaPartitionRaftMember(partition *MetaPartition, removePeer proto.Peer, migrationMode bool) (err error) {
+	defer func() {
 		if err1 := c.updateMetaPartitionOfflinePeerIDWithLock(partition, 0); err1 != nil {
 			err = errors.Trace(err, "updateMetaPartitionOfflinePeerIDWithLock failed, err[%v]", err1)
 		}
@@ -332,6 +360,7 @@ func (c *Cluster) removeMetaPartitionRaftMember(partition *MetaPartition, remove
 	if err != nil {
 		return
 	}
+	t.ReserveResource = migrationMode
 	var leaderMetaNode *MetaNode
 	leaderMetaNode = mr.metaNode
 	if leaderMetaNode == nil {
@@ -357,9 +386,12 @@ func (c *Cluster) removeMetaPartitionRaftMember(partition *MetaPartition, remove
 		}
 		newPeers = append(newPeers, peer)
 	}
+	partition.Lock()
 	if err = partition.persistToRocksDB("removeMetaPartitionRaftMember", partition.volName, newHosts, newPeers, c); err != nil {
+		partition.Unlock()
 		return
 	}
+	partition.Unlock()
 	if mr.Addr != removePeer.Addr {
 		return
 	}
@@ -373,7 +405,7 @@ func (c *Cluster) removeMetaPartitionRaftMember(partition *MetaPartition, remove
 	return
 }
 
-func (c *Cluster) updateMetaPartitionOfflinePeerIDWithLock(mp *MetaPartition, peerID uint64) (err error){
+func (c *Cluster) updateMetaPartitionOfflinePeerIDWithLock(mp *MetaPartition, peerID uint64) (err error) {
 	mp.Lock()
 	defer mp.Unlock()
 	mp.OfflinePeerID = peerID
@@ -569,7 +601,7 @@ func (c *Cluster) doLoadDataPartition(dp *DataPartition) {
 
 	dp.getFileCount()
 	dp.validateCRC(c.Name)
-	dp.checkReplicaSize(c.Name,c.cfg.diffSpaceUsage)
+	dp.checkReplicaSize(c.Name, c.cfg.diffSpaceUsage)
 	dp.setToNormal()
 }
 
diff --git a/master/cluster_test.go b/master/cluster_test.go
index 763e640f77..7c28f1cf73 100644
--- a/master/cluster_test.go
+++ b/master/cluster_test.go
@@ -20,8 +20,9 @@ func buildPanicVol() *Vol {
 		return nil
 	}
 	var createTime = time.Now().Unix() // record create time of this volume
-	vol := newVol(id, commonVol.Name, commonVol.Owner, "", commonVol.dataPartitionSize, commonVol.Capacity,
-		defaultReplicaNum, defaultReplicaNum, false, false, false, false, createTime, "")
+	vol := newVol(id, commonVol.Name, commonVol.Owner, testZone1+","+testZone2, commonVol.dataPartitionSize, commonVol.Capacity,
+		defaultReplicaNum, defaultReplicaNum, false, false, true, false, createTime, "")
+
 	vol.dataPartitions = nil
 	return vol
 }
@@ -116,6 +117,39 @@ func TestPanicCheckBadDiskRecovery(t *testing.T) {
 	c.scheduleToCheckDiskRecoveryProgress()
 }
 
+func TestPanicCheckMigratedDataPartitionsRecovery(t *testing.T) {
+	c := buildPanicCluster()
+	vol, err := c.getVol(commonVolName)
+	if err != nil {
+		t.Error(err)
+	}
+	partitionID, err := server.cluster.idAlloc.allocateDataPartitionID()
+	if err != nil {
+		t.Error(err)
+	}
+	dp := newDataPartition(partitionID, vol.dpReplicaNum, vol.Name, vol.ID)
+	c.MigratedDataPartitionIds.Store(fmt.Sprintf("%v", dp.PartitionID), dp)
+	c.checkMigratedDataPartitionsRecoveryProgress()
+}
+
+func TestPanicCheckMigratedMetaPartitionsRecovery(t *testing.T) {
+	c := buildPanicCluster()
+	vol, err := c.getVol(commonVolName)
+	if err != nil {
+		t.Error(err)
+	}
+	partitionID, err := server.cluster.idAlloc.allocateMetaPartitionID()
+	if err != nil {
+		t.Error(err)
+	}
+	mp := newMetaPartition(partitionID, 1, defaultMaxMetaPartitionInodeID, vol.mpReplicaNum, vol.Name, vol.ID)
+	vol.addMetaPartition(mp)
+	c.MigratedMetaPartitionIds.Store(fmt.Sprintf("%v", mp.PartitionID), mp)
+	mp = nil
+	c.checkMigratedMetaPartitionRecoveryProgress()
+	t.Logf("catched panic")
+}
+
 func TestCheckBadDiskRecovery(t *testing.T) {
 	server.cluster.checkDataNodeHeartbeat()
 	time.Sleep(5 * time.Second)
diff --git a/master/config.go b/master/config.go
index ec8a081e82..cbc574abf8 100644
--- a/master/config.go
+++ b/master/config.go
@@ -64,7 +64,8 @@ const (
 	defaultMetaPartitionMemUsageThreshold      float32 = 0.75    // memory usage threshold on a meta partition
 	defaultMaxMetaPartitionCountOnEachNode             = 10000
 	defaultReplicaNum                                  = 3
-	defaultDiffSpaceUsage                              = 1024 * 1024 * 1024
+	defaultDiffSpaceUsage                              = 10 * 1024 * 1024 * 1024
+	defaultCrossZoneNum                                = 3
 )
 
 // AddrDatabase is a map that stores the address of a given host (e.g., the leader)
@@ -92,6 +93,8 @@ type clusterConfig struct {
 	heartbeatPort                       int64
 	replicaPort                         int64
 	diffSpaceUsage                      uint64
+	DataPartitionsRecoverPoolSize       int32
+	MetaPartitionsRecoverPoolSize       int32
 }
 
 func newClusterConfig() (cfg *clusterConfig) {
@@ -108,6 +111,8 @@ func newClusterConfig() (cfg *clusterConfig) {
 	cfg.MetaNodeThreshold = defaultMetaPartitionMemUsageThreshold
 	cfg.metaNodeReservedMem = defaultMetaNodeReservedMem
 	cfg.diffSpaceUsage = defaultDiffSpaceUsage
+	cfg.DataPartitionsRecoverPoolSize = defaultRecoverPoolSize
+	cfg.MetaPartitionsRecoverPoolSize = defaultRecoverPoolSize
 	return
 }
 
diff --git a/master/const.go b/master/const.go
index d87b6687a0..7d21df6d8a 100644
--- a/master/const.go
+++ b/master/const.go
@@ -29,6 +29,10 @@ const (
 	idKey                   = "id"
 	countKey                = "count"
 	startKey                = "start"
+	endKey                  = "end"
+	nodeTypeKey             = "nodeType"
+	strictFlagKey           = "strict"
+	stateKey                = "state"
 	enableKey               = "enable"
 	thresholdKey            = "threshold"
 	dataPartitionSizeKey    = "size"
@@ -42,7 +46,7 @@ const (
 	akKey                   = "ak"
 	keywordsKey             = "keywords"
 	zoneNameKey             = "zoneName"
-	crossZoneKey            = "crossZone"
+	autoRepairKey           = "autoRepair"
 	tokenKey                = "token"
 	tokenTypeKey            = "tokenType"
 	enableTokenKey          = "enableToken"
@@ -55,6 +59,14 @@ const (
 	descriptionKey          = "description"
 	dpSelectorNameKey       = "dpSelectorName"
 	dpSelectorParmKey       = "dpSelectorParm"
+	dpRecoverPoolSizeKey    = "dpRecoverPool"
+	mpRecoverPoolSizeKey    = "mpRecoverPool"
+)
+
+const (
+	nodeTypeDataNode = "dataNode"
+	nodeTypeMetaNode = "metaNode"
+	nodeTypeAll      = "all"
 )
 
 const (
@@ -64,6 +76,7 @@ const (
 	dataNodeOfflineErr            = "dataNodeOfflineErr "
 	diskOfflineErr                = "diskOfflineErr "
 	handleDataPartitionOfflineErr = "handleDataPartitionOffLineErr "
+	balanceDataPartitionZoneErr   = "balanceDataPartitionZoneErr "
 )
 
 const (
@@ -96,6 +109,10 @@ const (
 	retrySendSyncTaskInternal                    = 3 * time.Second
 	defaultRangeOfCountDifferencesAllowed        = 50
 	defaultMinusOfMaxInodeID                     = 1000
+	defaultPercentMinusOfInodeCount              = 0.20
+	defaultRecoverPoolSize                       = -1
+	maxDataPartitionsRecoverPoolSize             = 50
+	maxMetaPartitionsRecoverPoolSize             = 30
 )
 
 const (
diff --git a/master/data_node.go b/master/data_node.go
index 463e82ab1d..57a627f8db 100644
--- a/master/data_node.go
+++ b/master/data_node.go
@@ -25,18 +25,18 @@ import (
 
 // DataNode stores all the information about a data node
 type DataNode struct {
-	Total                     uint64 `json:"TotalWeight"`
-	Used                      uint64 `json:"UsedWeight"`
+	Total                     uint64            `json:"TotalWeight"`
+	Used                      uint64            `json:"UsedWeight"`
 	AvailableSpace            uint64
 	ID                        uint64
-	ZoneName                  string `json:"Zone"`
+	ZoneName                  string            `json:"Zone"`
 	Addr                      string
 	ReportTime                time.Time
 	isActive                  bool
-	sync.RWMutex              `graphql:"-"`
-	UsageRatio                float64           // used / total space
-	SelectedTimes             uint64            // number times that this datanode has been selected as the location for a data partition.
-	Carry                     float64           // carry is a factor used in cacluate the node's weight
+	sync.RWMutex                                `graphql:"-"`
+	UsageRatio                float64 // used / total space
+	SelectedTimes             uint64  // number times that this datanode has been selected as the location for a data partition.
+	Carry                     float64 // carry is a factor used in cacluate the node's weight
 	TaskManager               *AdminTaskManager `graphql:"-"`
 	DataPartitionReports      []*proto.PartitionReport
 	DataPartitionCount        uint32
@@ -44,6 +44,7 @@ type DataNode struct {
 	PersistenceDataPartitions []uint64
 	BadDisks                  []string
 	ToBeOffline               bool
+	ToBeMigrated              bool
 }
 
 func newDataNode(addr, zoneName, clusterID string) (dataNode *DataNode) {
@@ -102,7 +103,8 @@ func (dataNode *DataNode) isWriteAble() (ok bool) {
 	dataNode.RLock()
 	defer dataNode.RUnlock()
 
-	if dataNode.isActive == true && dataNode.AvailableSpace > 10*util.GB {
+	if dataNode.isActive == true && dataNode.AvailableSpace > 10*util.GB &&
+		dataNode.ToBeOffline == false && dataNode.ToBeMigrated == false {
 		ok = true
 	}
 
diff --git a/master/data_partition.go b/master/data_partition.go
index 3a83143cae..99a1bed123 100644
--- a/master/data_partition.go
+++ b/master/data_partition.go
@@ -98,6 +98,29 @@ func (partition *DataPartition) tryToChangeLeader(c *Cluster, dataNode *DataNode
 	return
 }
 
+func (partition *DataPartition) prepareAddRaftMember(addPeer proto.Peer) (leaderAddr string, candidateAddrs []string, err error) {
+	partition.RLock()
+	defer partition.RUnlock()
+	if contains(partition.Hosts, addPeer.Addr) {
+		err = fmt.Errorf("vol[%v],data partition[%v] has contains host[%v]", partition.VolName, partition.PartitionID, addPeer.Addr)
+		return
+	}
+	candidateAddrs = make([]string, 0, len(partition.Hosts))
+	leaderAddr = partition.getLeaderAddr()
+	if leaderAddr != "" && contains(partition.Hosts, leaderAddr) {
+		candidateAddrs = append(candidateAddrs, leaderAddr)
+	} else {
+		leaderAddr = ""
+	}
+	for _, host := range partition.Hosts {
+		if host == leaderAddr {
+			continue
+		}
+		candidateAddrs = append(candidateAddrs, host)
+	}
+	return
+}
+
 func (partition *DataPartition) createTaskToTryToChangeLeader(addr string) (task *proto.AdminTask, err error) {
 	task = proto.NewAdminTask(proto.OpDataPartitionTryToLeader, addr, nil)
 	partition.resetTaskID(task)
@@ -141,9 +164,20 @@ func (partition *DataPartition) resetTaskID(t *proto.AdminTask) {
 }
 
 // Check if there is a replica missing or not.
-func (partition *DataPartition) hasMissingOneReplica(replicaNum int) (err error) {
-	hostNum := len(partition.Replicas)
-	if hostNum <= replicaNum-1 {
+func (partition *DataPartition) hasMissingOneReplica(offlineAddr string, replicaNum int) (err error) {
+	curHostCount := len(partition.Hosts)
+	for _, host := range partition.Hosts {
+		if host == offlineAddr {
+			curHostCount = curHostCount - 1
+		}
+	}
+	curReplicaCount := len(partition.Replicas)
+	for _, r := range partition.Replicas {
+		if r.Addr == offlineAddr {
+			curReplicaCount = curReplicaCount - 1
+		}
+	}
+	if curReplicaCount < replicaNum-1 || curHostCount < replicaNum-1 {
 		log.LogError(fmt.Sprintf("action[%v],partitionID:%v,err:%v",
 			"hasMissingOneReplica", partition.PartitionID, proto.ErrHasOneMissingReplica))
 		err = proto.ErrHasOneMissingReplica
@@ -596,8 +630,9 @@ func (partition *DataPartition) containsBadDisk(diskPath string, nodeAddr string
 }
 
 func (partition *DataPartition) getMinus() (minus float64) {
-	partition.RLock()
-	defer partition.RUnlock()
+	if len(partition.Replicas) == 0 {
+		return
+	}
 	used := partition.Replicas[0].Used
 	for _, replica := range partition.Replicas {
 		if math.Abs(float64(replica.Used)-float64(used)) > minus {
@@ -607,6 +642,23 @@ func (partition *DataPartition) getMinus() (minus float64) {
 	return minus
 }
 
+func (partition *DataPartition) getMinusOfFileCount() (minus float64) {
+	partition.RLock()
+	defer partition.RUnlock()
+	var sentry float64
+	for index, replica := range partition.Replicas {
+		if index == 0 {
+			sentry = float64(replica.FileCount)
+			continue
+		}
+		diff := math.Abs(float64(replica.FileCount) - sentry)
+		if diff > minus {
+			minus = diff
+		}
+	}
+	return
+}
+
 func (partition *DataPartition) getToBeDecommissionHost(replicaNum int) (host string) {
 	partition.RLock()
 	defer partition.RUnlock()
@@ -619,7 +671,9 @@ func (partition *DataPartition) getToBeDecommissionHost(replicaNum int) (host st
 }
 
 func (partition *DataPartition) removeOneReplicaByHost(c *Cluster, host string) (err error) {
-	if err = c.removeDataReplica(partition, host, false); err != nil {
+	partition.offlineMutex.Lock()
+	defer partition.offlineMutex.Unlock()
+	if err = c.removeDataReplica(partition, host, false, false); err != nil {
 		return
 	}
 	partition.RLock()
@@ -683,3 +737,246 @@ func (partition *DataPartition) ToProto(c *Cluster) *proto.DataPartitionInfo {
 		FilesWithMissingReplica: partition.FilesWithMissingReplica,
 	}
 }
+
+func (partition *DataPartition) isLatestReplica(addr string) (ok bool) {
+	hostsLen := len(partition.Hosts)
+	if hostsLen <= 1 {
+		return
+	}
+	latestAddr := partition.Hosts[hostsLen-1]
+	return latestAddr == addr
+}
+
+func (partition *DataPartition) isDataCatchUp() (ok bool) {
+	partition.RLock()
+	defer partition.RUnlock()
+	minus := partition.getMinus()
+	return minus < util.GB
+}
+
+func (partition *DataPartition) isDataCatchUpInStrictMode() (ok bool) {
+	partition.RLock()
+	defer partition.RUnlock()
+	minus := partition.getMinus()
+	if partition.used > 10*util.GB {
+		if minus < util.GB {
+			return true
+		}
+	} else if partition.used > util.GB {
+		if minus < 500*util.MB {
+			return true
+		}
+	} else {
+		if partition.used == 0 {
+			return true
+		}
+		percent := minus / float64(partition.used)
+		if partition.used > util.MB {
+			if percent < 0.5 {
+				return true
+			}
+		} else {
+			if percent < 0.7 {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+//check if the data partition needs to rebalance zone
+func (partition *DataPartition) needToRebalanceZone(c *Cluster, zoneList []string) (isNeed bool, err error) {
+	var curZoneMap map[string]uint8
+	var curZoneList []string
+	curZoneList = make([]string, 0)
+	curZoneMap = make(map[string]uint8, 0)
+	if curZoneMap, err = partition.getDataZoneMap(c); err != nil {
+		return
+	}
+	for k := range curZoneMap {
+		curZoneList = append(curZoneList, k)
+	}
+	log.LogDebugf("action[needToRebalanceZone],data partitionID:%v,zone name:%v,current zones[%v]",
+		partition.PartitionID, zoneList, curZoneList)
+	if (len(zoneList) == 1 && len(curZoneMap) == 1) || (len(curZoneMap) == 2 && (len(zoneList) == 2 || len(zoneList) == 3)) {
+		isNeed = false
+		for zone := range curZoneMap {
+			if !contains(zoneList, zone) {
+				isNeed = true
+				return
+			}
+		}
+		return
+	}
+	isNeed = true
+	return
+}
+
+var getTargetAddressForBalanceDataPartitionZone = func(c *Cluster, offlineAddr string, dp *DataPartition, excludeNodeSets []uint64, zoneName string, destZone string) (oldAddr, newAddr string, err error) {
+	var (
+		offlineZoneName     string
+		targetZoneName      string
+		targetZone          *Zone
+		nodesetInTargetZone *nodeSet
+		addrInTargetZone    string
+		targetHosts         []string
+	)
+	if offlineZoneName, targetZoneName, err = dp.getOfflineAndTargetZone(c, zoneName); err != nil {
+		return
+	}
+	if offlineZoneName == "" || targetZoneName == "" {
+		err = fmt.Errorf("getOfflineAndTargetZone error, offlineZone[%v], targetZone[%v]", offlineZoneName, targetZoneName)
+		return
+	}
+	if targetZone, err = c.t.getZone(targetZoneName); err != nil {
+		return
+	}
+	if oldAddr, err = dp.getAddressByZoneName(c, offlineZoneName); err != nil {
+		return
+	}
+	if oldAddr == "" {
+		err = fmt.Errorf("can not find address to decommission")
+		return
+	}
+	if err = c.validateDecommissionDataPartition(dp, oldAddr); err != nil {
+		return
+	}
+	if addrInTargetZone, err = dp.getAddressByZoneName(c, targetZone.name); err != nil {
+		return
+	}
+	//if there is no replica in target zone, choose random nodeset in target zone
+	if addrInTargetZone == "" {
+		if targetHosts, _, err = targetZone.getAvailDataNodeHosts(nil, dp.Hosts, 1); err != nil {
+			return
+		}
+		if len(targetHosts) == 0 {
+			err = fmt.Errorf("no available space to find a target address")
+			return
+		}
+		newAddr = targetHosts[0]
+		return
+	}
+	//if there is a replica in target zone, choose the same nodeset with this replica
+	var targetNode *DataNode
+	if targetNode, err = c.dataNode(addrInTargetZone); err != nil {
+		return
+	}
+	if nodesetInTargetZone, err = targetZone.getNodeSet(targetNode.NodeSetID); err != nil {
+		return
+	}
+	if targetHosts, _, err = nodesetInTargetZone.getAvailDataNodeHosts(dp.Hosts, 1); err != nil {
+		// select data nodes from the other node set in same zone
+		excludeNodeSets = append(excludeNodeSets, nodesetInTargetZone.ID)
+		if targetHosts, _, err = targetZone.getAvailDataNodeHosts(excludeNodeSets, dp.Hosts, 1); err != nil {
+			return
+		}
+	}
+	if len(targetHosts) == 0 {
+		err = fmt.Errorf("no available space to find a target address")
+		return
+	}
+	newAddr = targetHosts[0]
+	log.LogInfof("action[balanceZone],data partitionID:%v,zone name:[%v],old address:[%v], new address:[%v]",
+		dp.PartitionID, zoneName, oldAddr, newAddr)
+	return
+}
+
+//
+func (partition *DataPartition) getOfflineAndTargetZone(c *Cluster, zoneName string) (offlineZone, targetZone string, err error) {
+	zoneList := strings.Split(zoneName, ",")
+	var currentZoneList []string
+	switch len(zoneList) {
+	case 1:
+		zoneList = append(make([]string, 0), zoneList[0], zoneList[0], zoneList[0])
+	case 2:
+		switch partition.PartitionID % 2 {
+		case 0:
+			zoneList = append(make([]string, 0), zoneList[0], zoneList[0], zoneList[1])
+		default:
+			zoneList = append(make([]string, 0), zoneList[1], zoneList[1], zoneList[0])
+		}
+		log.LogInfof("action[getSourceAndTargetZone],data partitionID:%v,zone name:[%v],chosen zoneList:%v",
+			partition.PartitionID, zoneName, zoneList)
+	case 3:
+		index := partition.PartitionID % 6
+		switch partition.PartitionID%6 < 3 {
+		case true:
+			zoneList = append(make([]string, 0), zoneList[index], zoneList[index], zoneList[(index+1)%3])
+		default:
+			zoneList = append(make([]string, 0), zoneList[(index+1)%3], zoneList[(index+1)%3], zoneList[index%3])
+		}
+		log.LogInfof("action[getSourceAndTargetZone],data partitionID:%v,zone name:[%v],chosen zoneList:%v",
+			partition.PartitionID, zoneName, zoneList)
+	default:
+		err = fmt.Errorf("partition zone num must be 1, 2 or 3")
+		return
+	}
+
+	if currentZoneList, err = partition.getZoneList(c); err != nil {
+		return
+	}
+	intersect := util.Intersect(zoneList, currentZoneList)
+	projectiveToZoneList := util.Projective(zoneList, intersect)
+	projectiveToCurZoneList := util.Projective(currentZoneList, intersect)
+	log.LogInfof("Current replica zoneList:%v, volume zoneName:%v ", currentZoneList, zoneList)
+	if len(projectiveToZoneList) == 0 || len(projectiveToCurZoneList) == 0 {
+		err = fmt.Errorf("action[getSourceAndTargetZone], Current replica zoneList:%v is consistent with the volume zoneName:%v, do not need to balance", currentZoneList, zoneList)
+		return
+	}
+	offlineZone = projectiveToCurZoneList[0]
+	targetZone = projectiveToZoneList[0]
+	return
+}
+
+func (partition *DataPartition) getAddressByZoneName(c *Cluster, zone string) (addr string, err error) {
+	for _, host := range partition.Hosts {
+		var dataNode *DataNode
+		var z *Zone
+		if dataNode, err = c.dataNode(host); err != nil {
+			return
+		}
+		if z, err = c.t.getZoneByDataNode(dataNode); err != nil {
+			return
+		}
+		if zone == z.name {
+			addr = host
+		}
+	}
+	return
+}
+
+func (partition *DataPartition) getZoneList(c *Cluster) (zoneList []string, err error) {
+	zoneList = make([]string, 0)
+	for _, host := range partition.Hosts {
+		var dataNode *DataNode
+		var zone *Zone
+		if dataNode, err = c.dataNode(host); err != nil {
+			return
+		}
+		if zone, err = c.t.getZoneByDataNode(dataNode); err != nil {
+			return
+		}
+		zoneList = append(zoneList, zone.name)
+	}
+	return
+}
+
+func (partition *DataPartition) getDataZoneMap(c *Cluster) (curZonesMap map[string]uint8, err error) {
+	curZonesMap = make(map[string]uint8, 0)
+	for _, host := range partition.Hosts {
+		var dataNode *DataNode
+		var zone *Zone
+		if dataNode, err = c.dataNode(host); err != nil {
+			return
+		}
+		if zone, err = c.t.getZoneByDataNode(dataNode); err != nil {
+			return
+		}
+		if _, ok := curZonesMap[zone.name]; !ok {
+			curZonesMap[zone.name] = 1
+		} else {
+			curZonesMap[zone.name] = curZonesMap[zone.name] + 1
+		}
+	}
+	return
+}
diff --git a/master/data_partition_check.go b/master/data_partition_check.go
index 5aaa20a78f..97a7311268 100644
--- a/master/data_partition_check.go
+++ b/master/data_partition_check.go
@@ -180,17 +180,17 @@ func (partition *DataPartition) checkDiskError(clusterID, leaderAddr string) {
 	return
 }
 
-func (partition *DataPartition) checkReplicationTask(clusterID string, dataPartitionSize uint64) (tasks []*proto.AdminTask) {
+func (partition *DataPartition) checkReplicationTask(c *Cluster, dataPartitionSize uint64) {
 	var msg string
-	tasks = make([]*proto.AdminTask, 0)
 	if excessAddr, excessErr := partition.deleteIllegalReplica(); excessErr != nil {
 		msg = fmt.Sprintf("action[%v], partitionID:%v  Excess Replication"+
 			" On :%v  Err:%v  rocksDBRecords:%v",
 			deleteIllegalReplicaErr, partition.PartitionID, excessAddr, excessErr.Error(), partition.Hosts)
-		Warn(clusterID, msg)
-		partition.Lock()
-		partition.removeReplicaByAddr(excessAddr)
-		partition.Unlock()
+		Warn(c.Name, msg)
+		dn, _ := c.dataNode(excessAddr)
+		if dn != nil {
+			c.deleteDataReplica(partition, dn, false)
+		}
 	}
 	if partition.Status == proto.ReadWrite {
 		return
@@ -199,7 +199,7 @@ func (partition *DataPartition) checkReplicationTask(clusterID string, dataParti
 		msg = fmt.Sprintf("action[%v], partitionID:%v  Lack Replication"+
 			" On :%v  Err:%v  Hosts:%v  new task to create DataReplica",
 			addMissingReplicaErr, partition.PartitionID, lackAddr, lackErr.Error(), partition.Hosts)
-		Warn(clusterID, msg)
+		Warn(c.Name, msg)
 	} else {
 		partition.setToNormal()
 	}
diff --git a/master/data_partition_test.go b/master/data_partition_test.go
index e157d3a019..ab3c2b73e1 100644
--- a/master/data_partition_test.go
+++ b/master/data_partition_test.go
@@ -24,6 +24,8 @@ func TestDataPartition(t *testing.T) {
 	getDataPartition(partition.PartitionID, t)
 	loadDataPartitionTest(partition, t)
 	decommissionDataPartition(partition, t)
+	partition2 := commonVol.dataPartitions.partitions[1]
+	delDataReplicaTest(partition2, t)
 }
 
 func createDataPartition(vol *Vol, count int, t *testing.T) {
@@ -89,3 +91,32 @@ func loadDataPartitionTest(dp *DataPartition, t *testing.T) {
 	dp.validateCRC(server.cluster.Name)
 	dp.setToNormal()
 }
+func delDataReplicaTest(dp *DataPartition, t *testing.T) {
+	t.Logf("dpID[%v],hosts[%v],replica length[%v]", dp.PartitionID, dp.Hosts, len(dp.Replicas))
+	testAddr := mds9Addr
+	extraReplica := proto.DataReplica{
+		Status: 2,
+		Addr:   testAddr,
+	}
+	addDataServer(testAddr, testZone1)
+	dn, _ := server.cluster.dataNode(testAddr)
+	extraDataReplica := &DataReplica{
+		DataReplica: extraReplica,
+		dataNode:    dn,
+	}
+	dp.Replicas = append(dp.Replicas, extraDataReplica)
+	err := server.cluster.deleteDataReplica(dp, dn, false)
+	if err != nil {
+		t.Errorf("delete replica failed, err[%v]", err)
+	}
+	server.cluster.checkDataPartitions()
+	if len(dp.Replicas) != 3 {
+		t.Errorf("delete replica failed, expect replica length[%v], but is[%v]", 3, len(dp.Replicas))
+	}
+	for _, r := range dp.Replicas {
+		if testAddr == r.Addr {
+			t.Errorf("delete replica [%v] failed", testAddr)
+			return
+		}
+	}
+}
diff --git a/master/disk_manager.go b/master/disk_manager.go
index 9bb71ca471..e3fc313cde 100644
--- a/master/disk_manager.go
+++ b/master/disk_manager.go
@@ -16,8 +16,8 @@ package master
 
 import (
 	"fmt"
-	"github.com/chubaofs/chubaofs/util"
 	"github.com/chubaofs/chubaofs/util/log"
+	"sync"
 	"time"
 )
 
@@ -27,6 +27,7 @@ func (c *Cluster) scheduleToCheckDiskRecoveryProgress() {
 			if c.partition != nil && c.partition.IsRaftLeader() {
 				if c.vols != nil {
 					c.checkDiskRecoveryProgress()
+					c.checkMigratedDataPartitionsRecoveryProgress()
 				}
 			}
 			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
@@ -42,7 +43,6 @@ func (c *Cluster) checkDiskRecoveryProgress() {
 				"checkDiskRecoveryProgress occurred panic")
 		}
 	}()
-	var diff float64
 	c.BadDataPartitionIds.Range(func(key, value interface{}) bool {
 		badDataPartitionIds := value.([]uint64)
 		newBadDpIds := make([]uint64, 0)
@@ -58,20 +58,19 @@ func (c *Cluster) checkDiskRecoveryProgress() {
 			if len(partition.Replicas) == 0 || len(partition.Replicas) < int(vol.dpReplicaNum) {
 				continue
 			}
-			diff = partition.getMinus()
-			if diff < util.GB {
+			if partition.isDataCatchUp() {
 				partition.isRecover = false
 				partition.RLock()
 				c.syncUpdateDataPartition(partition)
 				partition.RUnlock()
-				Warn(c.Name, fmt.Sprintf("clusterID[%v],partitionID[%v] has recovered success", c.Name, partitionID))
+				Warn(c.Name, fmt.Sprintf("action[checkDiskRecoveryProgress] clusterID[%v],partitionID[%v] has recovered success", c.Name, partitionID))
 			} else {
 				newBadDpIds = append(newBadDpIds, partitionID)
 			}
 		}
 
 		if len(newBadDpIds) == 0 {
-			Warn(c.Name, fmt.Sprintf("clusterID[%v],node:disk[%v] has recovered success", c.Name, key))
+			Warn(c.Name, fmt.Sprintf("action[checkDiskRecoveryProgress] clusterID[%v],node:disk[%v] has recovered success", c.Name, key))
 			c.BadDataPartitionIds.Delete(key)
 		} else {
 			c.BadDataPartitionIds.Store(key, newBadDpIds)
@@ -84,11 +83,25 @@ func (c *Cluster) checkDiskRecoveryProgress() {
 func (c *Cluster) decommissionDisk(dataNode *DataNode, badDiskPath string, badPartitions []*DataPartition) (err error) {
 	msg := fmt.Sprintf("action[decommissionDisk], Node[%v] OffLine,disk[%v]", dataNode.Addr, badDiskPath)
 	log.LogWarn(msg)
-
+	var wg sync.WaitGroup
+	errChannel := make(chan error, len(badPartitions))
+	defer func() {
+		close(errChannel)
+	}()
 	for _, dp := range badPartitions {
-		if err = c.decommissionDataPartition(dataNode.Addr, dp, diskOfflineErr); err != nil {
-			return
-		}
+		wg.Add(1)
+		go func(dp *DataPartition) {
+			defer wg.Done()
+			if err1 := c.decommissionDataPartition(dataNode.Addr, dp, getTargetAddressForDataPartitionDecommission, diskOfflineErr, "", false); err != nil {
+				errChannel <- err1
+			}
+		}(dp)
+	}
+	wg.Wait()
+	select {
+	case err = <-errChannel:
+		return
+	default:
 	}
 	msg = fmt.Sprintf("action[decommissionDisk],clusterID[%v] Node[%v] OffLine success",
 		c.Name, dataNode.Addr)
diff --git a/master/gapi_cluster.go b/master/gapi_cluster.go
index fd8b29c37e..ee4a515488 100644
--- a/master/gapi_cluster.go
+++ b/master/gapi_cluster.go
@@ -218,7 +218,7 @@ func (m *ClusterService) decommissionDataNode(ctx context.Context, args struct {
 	if err != nil {
 		return nil, err
 	}
-	if err := m.cluster.decommissionDataNode(node); err != nil {
+	if err := m.cluster.decommissionDataNode(node, "", false); err != nil {
 		return nil, err
 	}
 	rstMsg := fmt.Sprintf("decommission data node [%v] successfully", args.OffLineAddr)
@@ -236,7 +236,7 @@ func (m *ClusterService) decommissionMetaNode(ctx context.Context, args struct {
 	if err != nil {
 		return nil, err
 	}
-	if err = m.cluster.decommissionMetaNode(metaNode); err != nil {
+	if err = m.cluster.decommissionMetaNode(metaNode, false); err != nil {
 		return nil, err
 	}
 	log.LogInfof("decommissionMetaNode metaNode [%v] has offline successfully", args.OffLineAddr)
@@ -270,7 +270,7 @@ func (m *ClusterService) decommissionMetaPartition(ctx context.Context, args str
 	if err != nil {
 		return nil, err
 	}
-	if err := m.cluster.decommissionMetaPartition(args.NodeAddr, mp); err != nil {
+	if err := m.cluster.decommissionMetaPartition(args.NodeAddr, mp, getTargetAddressForMetaPartitionDecommission, false); err != nil {
 		return nil, err
 	}
 	log.LogInfof(proto.AdminDecommissionMetaPartition+" partitionID :%v  decommissionMetaPartition successfully", args.PartitionID)
@@ -611,6 +611,8 @@ func (m *ClusterService) makeClusterView() *proto.ClusterView {
 		LeaderAddr:          m.cluster.leaderInfo.addr,
 		DisableAutoAlloc:    m.cluster.DisableAutoAllocate,
 		MetaNodeThreshold:   m.cluster.cfg.MetaNodeThreshold,
+		DpRecoverPool:       m.cluster.cfg.DataPartitionsRecoverPoolSize,
+		MpRecoverPool:       m.cluster.cfg.MetaPartitionsRecoverPoolSize,
 		Applied:             m.cluster.fsm.applied,
 		MaxDataPartitionID:  m.cluster.idAlloc.dataPartitionID,
 		MaxMetaNodeID:       m.cluster.idAlloc.commonID,
diff --git a/master/gapi_volume.go b/master/gapi_volume.go
index ee5ec613e9..fba9116698 100644
--- a/master/gapi_volume.go
+++ b/master/gapi_volume.go
@@ -65,8 +65,9 @@ func (s *VolumeService) registerObject(schema *schemabuilder.Schema) {
 			FollowerRead:       vol.FollowerRead,
 			NeedToLowerReplica: vol.NeedToLowerReplica,
 			Authenticate:       vol.authenticate,
-			CrossZone:          vol.crossZone,
 			EnableToken:        vol.enableToken,
+			CrossZone:          vol.crossZone,
+			AutoRepair:         vol.autoRepair,
 			Tokens:             vol.tokens,
 			RwDpCnt:            vol.dataPartitions.readableAndWritableCnt,
 			MpCnt:              len(vol.MetaPartitions),
@@ -205,7 +206,7 @@ func (s *VolumeService) createVolume(ctx context.Context, args struct {
 		return nil, fmt.Errorf("[%s] not has permission to create volume for [%s]", uid, args.Owner)
 	}
 
-	vol, err := s.cluster.createVol(args.Name, args.Owner, args.ZoneName, args.Description, int(args.MpCount), int(args.DpReplicaNum), int(args.DataPartitionSize), int(args.Capacity), args.FollowerRead, args.Authenticate, args.CrossZone, args.EnableToken)
+	vol, err := s.cluster.createVol(args.Name, args.Owner, args.ZoneName, args.Description, int(args.MpCount), int(args.DpReplicaNum), int(args.DataPartitionSize), int(args.Capacity), args.FollowerRead, args.Authenticate, args.EnableToken, false)
 	if err != nil {
 		return nil, err
 	}
@@ -266,11 +267,11 @@ func (s *VolumeService) markDeleteVol(ctx context.Context, args struct {
 }
 
 func (s *VolumeService) updateVolume(ctx context.Context, args struct {
-	Name, AuthKey              string
-	ZoneName, Description      *string
-	Capacity, ReplicaNum       *uint64
-	EnableToken                *bool
-	FollowerRead, Authenticate *bool
+	Name, AuthKey                          string
+	ZoneName, Description                  *string
+	Capacity, ReplicaNum                   *uint64
+	EnableToken                            *bool
+	FollowerRead, Authenticate, AutoRepair *bool
 }) (*Vol, error) {
 	uid, perm, err := permissions(ctx, ADMIN|USER)
 	if err != nil {
@@ -326,7 +327,15 @@ func (s *VolumeService) updateVolume(ctx context.Context, args struct {
 		newArgs.description = *args.Description
 	}
 
-	if err = s.cluster.updateVol(args.Name, args.AuthKey, newArgs); err != nil {
+	if args.AutoRepair == nil {
+		args.AutoRepair = &vol.autoRepair
+	}
+
+	if args.Description == nil {
+		args.Description = &vol.description
+	}
+
+	if err = s.cluster.updateVol(args.Name, args.AuthKey, *args.ZoneName, *args.Description, *args.Capacity, uint8(*args.ReplicaNum), *args.FollowerRead, *args.Authenticate, *args.EnableToken, *args.AutoRepair); err != nil {
 		return nil, err
 	}
 
diff --git a/master/http_server.go b/master/http_server.go
index 987dd4354d..3d8bdaf2f3 100644
--- a/master/http_server.go
+++ b/master/http_server.go
@@ -244,6 +244,9 @@ func (m *Server) registerAPIRoutes(router *mux.Router) {
 	router.NewRoute().Methods(http.MethodGet, http.MethodPost).
 		Path(proto.AdminGetNodeInfo).
 		HandlerFunc(m.getNodeInfoHandler)
+	router.NewRoute().Methods(http.MethodGet, http.MethodPost).
+		Path(proto.AdminSetNodeState).
+		HandlerFunc(m.setNodeToOfflineState)
 
 	// user management APIs
 	router.NewRoute().Methods(http.MethodPost).
diff --git a/master/meta_node.go b/master/meta_node.go
index 1661285526..d185b46f85 100644
--- a/master/meta_node.go
+++ b/master/meta_node.go
@@ -40,8 +40,9 @@ type MetaNode struct {
 	metaPartitionInfos        []*proto.MetaPartitionReport
 	MetaPartitionCount        int
 	NodeSetID                 uint64
-	sync.RWMutex              `graphql:"-"`
+	sync.RWMutex                                `graphql:"-"`
 	ToBeOffline               bool
+	ToBeMigrated              bool
 	PersistenceMetaPartitions []uint64
 }
 
@@ -89,7 +90,8 @@ func (metaNode *MetaNode) isWritable() (ok bool) {
 	metaNode.RLock()
 	defer metaNode.RUnlock()
 	if metaNode.IsActive && metaNode.MaxMemAvailWeight > gConfig.metaNodeReservedMem &&
-		!metaNode.reachesThreshold() && metaNode.MetaPartitionCount < defaultMaxMetaPartitionCountOnEachNode {
+		!metaNode.reachesThreshold() && metaNode.MetaPartitionCount < defaultMaxMetaPartitionCountOnEachNode &&
+		metaNode.ToBeOffline == false && metaNode.ToBeMigrated == false {
 		ok = true
 	}
 	return
diff --git a/master/meta_partition.go b/master/meta_partition.go
index d1da322491..8cc7e3adc9 100644
--- a/master/meta_partition.go
+++ b/master/meta_partition.go
@@ -15,6 +15,7 @@
 package master
 
 import (
+	"github.com/chubaofs/chubaofs/util"
 	"sync"
 
 	"fmt"
@@ -57,8 +58,8 @@ type MetaPartition struct {
 	volName       string
 	Hosts         []string
 	Peers         []proto.Peer
-	OfflinePeerID uint64
 	MissNodes     map[string]int64
+	OfflinePeerID uint64
 	LoadResponse  []*proto.MetaPartitionLoadResponse
 	offlineMutex  sync.RWMutex
 	sync.RWMutex
@@ -185,13 +186,15 @@ func (mp *MetaPartition) checkEnd(c *Cluster, maxPartitionID uint64) {
 		log.LogWarnf("action[checkEnd] vol[%v] not exist", mp.volName)
 		return
 	}
-	mp.Lock()
-	defer mp.Unlock()
+	vol.createMpMutex.RLock()
+	defer vol.createMpMutex.RUnlock()
 	curMaxPartitionID := vol.maxPartitionID()
 	if mp.PartitionID != curMaxPartitionID {
 		log.LogWarnf("action[checkEnd] partition[%v] not max partition[%v]", mp.PartitionID, curMaxPartitionID)
 		return
 	}
+	mp.Lock()
+	defer mp.Unlock()
 	if _, err = mp.getMetaReplicaLeader(); err != nil {
 		log.LogWarnf("action[checkEnd] partition[%v] no leader", mp.PartitionID)
 		return
@@ -299,12 +302,12 @@ func (mp *MetaPartition) checkReplicaNum(c *Cluster, volName string, replicaNum
 	}
 }
 
-func (mp *MetaPartition) removeIllegalReplica() (excessAddr string, t *proto.AdminTask, err error) {
-	mp.RLock()
-	defer mp.RUnlock()
+func (mp *MetaPartition) removeIllegalReplica() (excessAddr string, err error) {
+	mp.Lock()
+	defer mp.Unlock()
 	for _, mr := range mp.Replicas {
 		if !contains(mp.Hosts, mr.Addr) {
-			t = mr.createTaskToDeleteReplica(mp.PartitionID)
+			excessAddr = mr.Addr
 			err = proto.ErrIllegalMetaReplica
 			break
 		}
@@ -362,9 +365,20 @@ func (mp *MetaPartition) canBeOffline(nodeAddr string, replicaNum int) (err erro
 }
 
 // Check if there is a replica missing or not.
-func (mp *MetaPartition) hasMissingOneReplica(replicaNum int) (err error) {
-	hostNum := len(mp.Replicas)
-	if hostNum <= replicaNum-1 {
+func (mp *MetaPartition) hasMissingOneReplica(offlineAddr string, replicaNum int) (err error) {
+	curHostCount := len(mp.Hosts)
+	for _, host := range mp.Hosts {
+		if host == offlineAddr {
+			curHostCount = curHostCount - 1
+		}
+	}
+	curReplicaCount := len(mp.Replicas)
+	for _, r := range mp.Replicas {
+		if r.Addr == offlineAddr {
+			curReplicaCount = curReplicaCount - 1
+		}
+	}
+	if curHostCount < replicaNum-1 || curReplicaCount < replicaNum-1 {
 		log.LogError(fmt.Sprintf("action[%v],partitionID:%v,err:%v",
 			"hasMissingOneReplica", mp.PartitionID, proto.ErrHasOneMissingReplica))
 		err = proto.ErrHasOneMissingReplica
@@ -470,20 +484,22 @@ func (mp *MetaPartition) reportMissingReplicas(clusterID, leaderAddr string, sec
 	}
 }
 
-func (mp *MetaPartition) replicaCreationTasks(clusterID, volName string) (tasks []*proto.AdminTask) {
+func (mp *MetaPartition) replicaCreationTasks(c *Cluster, volName string) {
 	var msg string
-	tasks = make([]*proto.AdminTask, 0)
-	if addr, _, err := mp.removeIllegalReplica(); err != nil {
+	mp.offlineMutex.Lock()
+	defer mp.offlineMutex.Unlock()
+	if addr, err := mp.removeIllegalReplica(); err != nil {
 		msg = fmt.Sprintf("action[%v],clusterID[%v] metaPartition:%v  excess replication"+
 			" on :%v  err:%v  persistenceHosts:%v",
-			deleteIllegalReplicaErr, clusterID, mp.PartitionID, addr, err.Error(), mp.Hosts)
+			deleteIllegalReplicaErr, c.Name, mp.PartitionID, addr, err.Error(), mp.Hosts)
 		log.LogWarn(msg)
+		c.deleteMetaReplica(mp, addr, true, false)
 	}
 	if addrs := mp.missingReplicaAddrs(); addrs != nil {
 		msg = fmt.Sprintf("action[missingReplicaAddrs],clusterID[%v] metaPartition:%v  lack replication"+
 			" on :%v Hosts:%v",
-			clusterID, mp.PartitionID, addrs, mp.Hosts)
-		Warn(clusterID, msg)
+			c.Name, mp.PartitionID, addrs, mp.Hosts)
+		Warn(c.Name, msg)
 	}
 
 	return
@@ -670,6 +686,81 @@ func (mp *MetaPartition) getMinusOfMaxInodeID() (minus float64) {
 	return
 }
 
+func (mp *MetaPartition) getPercentMinusOfInodeCount() (minus float64) {
+	mp.RLock()
+	defer mp.RUnlock()
+	var sentry float64
+	for index, replica := range mp.Replicas {
+		if index == 0 {
+			sentry = float64(replica.InodeCount)
+			continue
+		}
+		diff := math.Abs(float64(replica.InodeCount) - sentry)
+		if diff > minus {
+			minus = diff
+		}
+	}
+	minus = minus / sentry
+	return
+}
+
+func (mp *MetaPartition) getMinusOfInodeCount() (minus float64) {
+	mp.RLock()
+	defer mp.RUnlock()
+	var sentry float64
+	for index, replica := range mp.Replicas {
+		if index == 0 {
+			sentry = float64(replica.InodeCount)
+			continue
+		}
+		diff := math.Abs(float64(replica.InodeCount) - sentry)
+		if diff > minus {
+			minus = diff
+		}
+	}
+	return
+}
+
+func (mp *MetaPartition) getMinusOfDentryCount() (minus float64) {
+	mp.RLock()
+	defer mp.RUnlock()
+	if len(mp.Replicas) == 0 {
+		return 1
+	}
+	var sentry float64
+	for index, replica := range mp.Replicas {
+		if index == 0 {
+			sentry = float64(replica.DentryCount)
+			continue
+		}
+		diff := math.Abs(float64(replica.DentryCount) - sentry)
+		if diff > minus {
+			minus = diff
+		}
+	}
+	return
+}
+
+func (mp *MetaPartition) getMinusOfApplyID() (minus float64) {
+	mp.RLock()
+	defer mp.RUnlock()
+	if len(mp.LoadResponse) == 0 {
+		return 1
+	}
+	var sentry float64
+	for index, resp := range mp.LoadResponse {
+		if index == 0 {
+			sentry = float64(resp.ApplyID)
+			continue
+		}
+		diff := math.Abs(float64(resp.ApplyID) - sentry)
+		if diff > minus {
+			minus = diff
+		}
+	}
+	return
+}
+
 func (mp *MetaPartition) setMaxInodeID() {
 	var maxUsed uint64
 	for _, r := range mp.Replicas {
@@ -729,3 +820,250 @@ func (mp *MetaPartition) getLiveZones(offlineAddr string) (zones []string) {
 	}
 	return
 }
+
+func (mp *MetaPartition) isLatestReplica(addr string) (ok bool) {
+	hostsLen := len(mp.Hosts)
+	if hostsLen <= 1 {
+		return
+	}
+	latestAddr := mp.Hosts[hostsLen-1]
+	return latestAddr == addr
+}
+func (mp *MetaPartition) RepairZone(vol *Vol, c *Cluster) (err error) {
+	var (
+		zoneList        []string
+		isNeedRebalance bool
+	)
+	mp.RLock()
+	defer mp.RUnlock()
+	var isValidZone bool
+	if isValidZone, err = c.isValidZone(vol.zoneName); err != nil {
+		return
+	}
+	if !isValidZone {
+		log.LogWarnf("action[RepairZone], vol[%v], zoneName[%v], mpReplicaNum[%v] can not be automatically repaired", vol.Name, vol.zoneName, vol.dpReplicaNum)
+		return
+	}
+	zoneList = strings.Split(vol.zoneName, ",")
+	if len(mp.Replicas) != int(vol.mpReplicaNum) {
+		log.LogWarnf("action[RepairZone], meta replica length[%v] not equal to mpReplicaNum[%v]", len(mp.Replicas), vol.mpReplicaNum)
+		return
+	}
+	if mp.IsRecover {
+		log.LogWarnf("action[RepairZone], meta partition[%v] is recovering", mp.PartitionID)
+		return
+	}
+
+	var mpInRecover uint64
+	mpInRecover = uint64(c.metaPartitionInRecovering())
+	if int32(mpInRecover) > c.cfg.MetaPartitionsRecoverPoolSize {
+		log.LogWarnf("action[repairMetaPartition] clusterID[%v]Recover pool is full, recover partition[%v], pool size[%v]", c.Name, mpInRecover, c.cfg.MetaPartitionsRecoverPoolSize)
+		return
+	}
+	rps := mp.getLiveReplicas()
+	if len(rps) < int(vol.mpReplicaNum) {
+		log.LogWarnf("action[RepairZone], vol[%v], zoneName[%v], live Replicas [%v] less than mpReplicaNum[%v], can not be automatically repaired", vol.Name, vol.zoneName, len(rps), vol.mpReplicaNum)
+		return
+	}
+
+	if isNeedRebalance, err = mp.needToRebalanceZone(c, zoneList); err != nil {
+		return
+	}
+	if !isNeedRebalance {
+		return
+	}
+
+	if err = c.sendRepairMetaPartitionTask(mp, BalanceMetaZone); err != nil {
+		log.LogErrorf("action[RepairZone] clusterID[%v] vol[%v] meta partition[%v] err[%v]", c.Name, vol.Name, mp.PartitionID, err)
+		return
+	}
+	return
+}
+
+var getTargetAddressForRepairMetaZone = func(c *Cluster, nodeAddr string, mp *MetaPartition, oldHosts []string, excludeNodeSets []uint64, zoneName string) (oldAddr, addAddr string, err error) {
+	var (
+		offlineZoneName     string
+		targetZoneName      string
+		addrInTargetZone    string
+		targetZone          *Zone
+		nodesetInTargetZone *nodeSet
+		targetHosts         []string
+	)
+	if offlineZoneName, targetZoneName, err = mp.getOfflineAndTargetZone(c, zoneName); err != nil {
+		return
+	}
+	if offlineZoneName == "" || targetZoneName == "" {
+		return
+	}
+	if targetZone, err = c.t.getZone(targetZoneName); err != nil {
+		return
+	}
+	if oldAddr, err = mp.getAddressByZoneName(c, offlineZoneName); err != nil {
+		return
+	}
+	if oldAddr == "" {
+		err = fmt.Errorf("can not find address to decommission")
+		return
+	}
+	if err = c.validateDecommissionMetaPartition(mp, oldAddr); err != nil {
+		return
+	}
+	if addrInTargetZone, err = mp.getAddressByZoneName(c, targetZone.name); err != nil {
+		return
+	}
+	//if there is no replica in target zone, choose random nodeset in target zone
+	if addrInTargetZone == "" {
+		if targetHosts, _, err = targetZone.getAvailMetaNodeHosts(nil, mp.Hosts, 1); err != nil {
+			return
+		}
+		if len(targetHosts) == 0 {
+			err = fmt.Errorf("no available space to find a target address")
+			return
+		}
+		addAddr = targetHosts[0]
+		return
+	}
+	var targetNode *MetaNode
+	//if there is a replica in target zone, choose the same nodeset with this replica
+	if targetNode, err = c.metaNode(addrInTargetZone); err != nil {
+		err = fmt.Errorf("action[getTargetAddressForRepairMetaZone] partitionID[%v], addr[%v] metaNode not exist", mp.PartitionID, addrInTargetZone)
+		return
+	}
+	if nodesetInTargetZone, err = targetZone.getNodeSet(targetNode.NodeSetID); err != nil {
+		return
+	}
+	if targetHosts, _, err = nodesetInTargetZone.getAvailMetaNodeHosts(mp.Hosts, 1); err != nil {
+		// select meta nodes from the other node set in same zone
+		excludeNodeSets = append(excludeNodeSets, nodesetInTargetZone.ID)
+		if targetHosts, _, err = targetZone.getAvailMetaNodeHosts(excludeNodeSets, mp.Hosts, 1); err != nil {
+			return
+		}
+	}
+	if len(targetHosts) == 0 {
+		err = fmt.Errorf("no available space to find a target address")
+		return
+	}
+	addAddr = targetHosts[0]
+	log.LogInfof("action[getTargetAddressForRepairMetaZone],meta partitionID:%v,zone name:[%v],old address:[%v], new address:[%v]",
+		mp.PartitionID, zoneName, oldAddr, addAddr)
+	return
+}
+
+//check if the meta partition needs to rebalance zone
+func (mp *MetaPartition) needToRebalanceZone(c *Cluster, zoneList []string) (isNeed bool, err error) {
+	var curZoneMap map[string]uint8
+	var curZoneList []string
+	curZoneMap = make(map[string]uint8, 0)
+	curZoneList = make([]string, 0)
+	if curZoneMap, err = mp.getMetaZoneMap(c); err != nil {
+		return
+	}
+	for k := range curZoneMap {
+		curZoneList = append(curZoneList, k)
+	}
+
+	log.LogInfof("action[needToRebalanceZone],meta partitionID:%v,zone name:%v,current zones[%v]",
+		mp.PartitionID, zoneList, curZoneList)
+	if len(curZoneMap) == len(zoneList) {
+		isNeed = false
+		for _, zone := range zoneList {
+			if _, ok := curZoneMap[zone]; !ok {
+				isNeed = true
+			}
+		}
+		return
+	}
+	isNeed = true
+	return
+}
+
+func (mp *MetaPartition) getOfflineAndTargetZone(c *Cluster, volZoneName string) (offlineZone, targetZone string, err error) {
+	zoneList := strings.Split(volZoneName, ",")
+	switch len(zoneList) {
+	case 1:
+		zoneList = append(make([]string, 0), zoneList[0], zoneList[0], zoneList[0])
+	case 2:
+		switch mp.PartitionID % 2 {
+		case 0:
+			zoneList = append(make([]string, 0), zoneList[0], zoneList[0], zoneList[1])
+		default:
+			zoneList = append(make([]string, 0), zoneList[1], zoneList[1], zoneList[0])
+		}
+		log.LogInfof("action[getSourceAndTargetZone],data partitionID:%v,zone name:[%v],chosen zoneList:%v",
+			mp.PartitionID, volZoneName, zoneList)
+	case 3:
+		log.LogInfof("action[getSourceAndTargetZone],data partitionID:%v,zone name:[%v],chosen zoneList:%v",
+			mp.PartitionID, volZoneName, zoneList)
+	default:
+		err = fmt.Errorf("partition zone num must be 1, 2 or 3")
+		return
+	}
+	var currentZoneList []string
+	if currentZoneList, err = mp.getZoneList(c); err != nil {
+		return
+	}
+	intersect := util.Intersect(zoneList, currentZoneList)
+	projectiveToZoneList := util.Projective(zoneList, intersect)
+	projectiveToCurZoneList := util.Projective(currentZoneList, intersect)
+	log.LogInfof("Current replica zoneList:%v, volume zoneName:%v ", currentZoneList, zoneList)
+	if len(projectiveToZoneList) == 0 || len(projectiveToCurZoneList) == 0 {
+		err = fmt.Errorf("action[getSourceAndTargetZone], Current replica zoneList:%v is consistent with the volume zoneName:%v, do not need to balance ", currentZoneList, zoneList)
+		return
+	}
+	offlineZone = projectiveToCurZoneList[0]
+	targetZone = projectiveToZoneList[0]
+	return
+}
+
+func (mp *MetaPartition) getAddressByZoneName(c *Cluster, zone string) (addr string, err error) {
+	for _, host := range mp.Hosts {
+		var metaNode *MetaNode
+		var z *Zone
+		if metaNode, err = c.metaNode(host); err != nil {
+			return
+		}
+		if z, err = c.t.getZoneByMetaNode(metaNode); err != nil {
+			return
+		}
+		if zone == z.name {
+			addr = host
+		}
+	}
+	return
+}
+
+func (mp *MetaPartition) getZoneList(c *Cluster) (zoneList []string, err error) {
+	zoneList = make([]string, 0)
+	for _, host := range mp.Hosts {
+		var metaNode *MetaNode
+		var zone *Zone
+		if metaNode, err = c.metaNode(host); err != nil {
+			return
+		}
+		if zone, err = c.t.getZoneByMetaNode(metaNode); err != nil {
+			return
+		}
+		zoneList = append(zoneList, zone.name)
+	}
+	return
+}
+
+func (mp *MetaPartition) getMetaZoneMap(c *Cluster) (curZonesMap map[string]uint8, err error) {
+	curZonesMap = make(map[string]uint8, 0)
+	for _, host := range mp.Hosts {
+		var metaNode *MetaNode
+		var zone *Zone
+		if metaNode, err = c.metaNode(host); err != nil {
+			return
+		}
+		if zone, err = c.t.getZoneByMetaNode(metaNode); err != nil {
+			return
+		}
+		if _, ok := curZonesMap[zone.name]; !ok {
+			curZonesMap[zone.name] = 1
+		} else {
+			curZonesMap[zone.name] = curZonesMap[zone.name] + 1
+		}
+	}
+	return
+}
diff --git a/master/meta_partition_manager.go b/master/meta_partition_manager.go
index b98d6a2920..16560dc75c 100644
--- a/master/meta_partition_manager.go
+++ b/master/meta_partition_manager.go
@@ -134,9 +134,10 @@ func (c *Cluster) scheduleToCheckMetaPartitionRecoveryProgress() {
 			if c.partition != nil && c.partition.IsRaftLeader() {
 				if c.vols != nil {
 					c.checkMetaPartitionRecoveryProgress()
+					c.checkMigratedMetaPartitionRecoveryProgress()
 				}
 			}
-			time.Sleep(time.Second * defaultIntervalToCheckDataPartition)
+			time.Sleep(3 * time.Second * defaultIntervalToCheckDataPartition)
 		}
 	}()
 }
@@ -179,7 +180,7 @@ func (c *Cluster) checkMetaPartitionRecoveryProgress() {
 		}
 
 		if len(newBadMpIds) == 0 {
-			Warn(c.Name, fmt.Sprintf("clusterID[%v],node[%v] has recovered success", c.Name, key))
+			Warn(c.Name, fmt.Sprintf("action[checkMetaPartitionRecoveryProgress] clusterID[%v],node[%v] has recovered success", c.Name, key))
 			c.BadMetaPartitionIds.Delete(key)
 		} else {
 			c.BadMetaPartitionIds.Store(key, newBadMpIds)
diff --git a/master/metadata_fsm.go b/master/metadata_fsm.go
index d42771ef85..90ea2ba32c 100644
--- a/master/metadata_fsm.go
+++ b/master/metadata_fsm.go
@@ -77,7 +77,9 @@ func (mf *MetadataFsm) restore() {
 }
 
 func (mf *MetadataFsm) restoreApplied() {
-
+	defer func() {
+		log.LogInfof("action[restoreApplied],applyID[%v]", mf.applied)
+	}()
 	value, err := mf.store.Get(applied)
 	if err != nil {
 		panic(fmt.Sprintf("Failed to restore applied err:%v", err.Error()))
diff --git a/master/metadata_fsm_op.go b/master/metadata_fsm_op.go
index 9506ff1b49..2cc2224dd8 100644
--- a/master/metadata_fsm_op.go
+++ b/master/metadata_fsm_op.go
@@ -32,24 +32,28 @@ import (
    transferred over the network. */
 
 type clusterValue struct {
-	Name                        string
-	Threshold                   float32
-	DisableAutoAllocate         bool
-	DataNodeDeleteLimitRate     uint64
-	MetaNodeDeleteBatchCount    uint64
-	MetaNodeDeleteWorkerSleepMs uint64
 	DataNodeAutoRepairLimitRate uint64
+	Name                              string
+	Threshold                         float32
+	DisableAutoAllocate               bool
+	DataNodeDeleteLimitRate           uint64
+	MetaNodeDeleteBatchCount          uint64
+	MetaNodeDeleteWorkerSleepMs       uint64
+	PoolSizeOfDataPartitionsInRecover int32
+	PoolSizeOfMetaPartitionsInRecover int32
 }
 
 func newClusterValue(c *Cluster) (cv *clusterValue) {
 	cv = &clusterValue{
-		Name:                        c.Name,
-		Threshold:                   c.cfg.MetaNodeThreshold,
-		DataNodeDeleteLimitRate:     c.cfg.DataNodeDeleteLimitRate,
-		MetaNodeDeleteBatchCount:    c.cfg.MetaNodeDeleteBatchCount,
-		MetaNodeDeleteWorkerSleepMs: c.cfg.MetaNodeDeleteWorkerSleepMs,
 		DataNodeAutoRepairLimitRate: c.cfg.DataNodeAutoRepairLimitRate,
-		DisableAutoAllocate:         c.DisableAutoAllocate,
+		Name:                              c.Name,
+		Threshold:                         c.cfg.MetaNodeThreshold,
+		DataNodeDeleteLimitRate:           c.cfg.DataNodeDeleteLimitRate,
+		MetaNodeDeleteBatchCount:          c.cfg.MetaNodeDeleteBatchCount,
+		MetaNodeDeleteWorkerSleepMs:       c.cfg.MetaNodeDeleteWorkerSleepMs,
+		DisableAutoAllocate:               c.DisableAutoAllocate,
+		PoolSizeOfDataPartitionsInRecover: c.cfg.DataPartitionsRecoverPoolSize,
+		PoolSizeOfMetaPartitionsInRecover: c.cfg.MetaPartitionsRecoverPoolSize,
 	}
 	return cv
 }
@@ -134,8 +138,9 @@ type volValue struct {
 	Owner             string
 	FollowerRead      bool
 	Authenticate      bool
-	CrossZone         bool
 	EnableToken       bool
+	CrossZone         bool
+	AutoRepair        bool
 	ZoneName          string
 	OSSAccessKey      string
 	OSSSecretKey      string
@@ -162,8 +167,9 @@ func newVolValue(vol *Vol) (vv *volValue) {
 		Owner:             vol.Owner,
 		FollowerRead:      vol.FollowerRead,
 		Authenticate:      vol.authenticate,
-		CrossZone:         vol.crossZone,
+		AutoRepair:        vol.autoRepair,
 		ZoneName:          vol.zoneName,
+		CrossZone:         vol.crossZone,
 		EnableToken:       vol.enableToken,
 		OSSAccessKey:      vol.OSSAccessKey,
 		OSSSecretKey:      vol.OSSSecretKey,
@@ -527,7 +533,16 @@ func (c *Cluster) updateMetaNodeDeleteBatchCount(val uint64) {
 func (c *Cluster) updateMetaNodeDeleteWorkerSleepMs(val uint64) {
 	atomic.StoreUint64(&c.cfg.MetaNodeDeleteWorkerSleepMs, val)
 }
-
+func (c *Cluster) updateRecoverPoolSize(dpPoolSize, mpPoolSize int32) {
+	if dpPoolSize == 0 {
+		dpPoolSize = defaultRecoverPoolSize
+	}
+	if mpPoolSize == 0 {
+		mpPoolSize = defaultRecoverPoolSize
+	}
+	atomic.StoreInt32(&c.cfg.DataPartitionsRecoverPoolSize, dpPoolSize)
+	atomic.StoreInt32(&c.cfg.MetaPartitionsRecoverPoolSize, mpPoolSize)
+}
 func (c *Cluster) updateDataNodeAutoRepairLimit(val uint64) {
 	atomic.StoreUint64(&c.cfg.DataNodeAutoRepairLimitRate, val)
 }
@@ -549,11 +564,13 @@ func (c *Cluster) loadClusterValue() (err error) {
 			return err
 		}
 		c.cfg.MetaNodeThreshold = cv.Threshold
+		c.cfg.nodeSetCapacity = defaultNodeSetCapacity
 		c.DisableAutoAllocate = cv.DisableAutoAllocate
 		c.updateMetaNodeDeleteBatchCount(cv.MetaNodeDeleteBatchCount)
 		c.updateMetaNodeDeleteWorkerSleepMs(cv.MetaNodeDeleteWorkerSleepMs)
 		c.updateDataNodeDeleteLimitRate(cv.DataNodeDeleteLimitRate)
 		c.updateDataNodeAutoRepairLimit(cv.DataNodeAutoRepairLimitRate)
+		c.updateRecoverPoolSize(cv.PoolSizeOfDataPartitionsInRecover, cv.PoolSizeOfMetaPartitionsInRecover)
 		log.LogInfof("action[loadClusterValue], metaNodeThreshold[%v]", cv.Threshold)
 	}
 	return
@@ -643,7 +660,7 @@ func (c *Cluster) loadMetaNodes() (err error) {
 			}
 		}
 		c.metaNodes.Store(metaNode.Addr, metaNode)
-		log.LogInfof("action[loadMetaNodes],metaNode[%v], metaNodeID[%v],zone[%v],ns[%v]", metaNode.Addr, metaNode.ID, mnv.ZoneName, mnv.NodeSetID)
+		log.LogInfof("action[loadMetaNodes],metaNode[%v],id[%v],zone[%v],ns[%v]", metaNode.Addr, mnv.ID, mnv.ZoneName, mnv.NodeSetID)
 	}
 	return
 }
@@ -660,10 +677,13 @@ func (c *Cluster) loadVols() (err error) {
 			err = fmt.Errorf("action[loadVols],value:%v,unmarshal err:%v", string(value), err)
 			return err
 		}
+		if !vv.CrossZone && vv.ZoneName == "" {
+			vv.ZoneName = DefaultZoneName
+		}
 		vol := newVolFromVolValue(vv)
 		vol.Status = vv.Status
 		c.putVol(vol)
-		log.LogInfof("action[loadVols],vol[%v]", vol.Name)
+		log.LogInfof("action[loadVols],vol[%v],id[%v],status[%v]", vol.Name, vv.ID, vv.Status)
 	}
 	return
 }
@@ -701,6 +721,9 @@ func (c *Cluster) loadMetaPartitions() (err error) {
 		mp.setPeers(mpv.Peers)
 		mp.OfflinePeerID = mpv.OfflinePeerID
 		mp.IsRecover = mpv.IsRecover
+		if mp.IsRecover {
+			c.putMigratedMetaPartitions("history", mp.PartitionID)
+		}
 		vol.addMetaPartition(mp)
 		log.LogInfof("action[loadMetaPartitions],vol[%v],mp[%v]", vol.Name, mp.PartitionID)
 	}
@@ -746,6 +769,9 @@ func (c *Cluster) loadDataPartitions() (err error) {
 			}
 			dp.afterCreation(rv.Addr, rv.DiskPath, c)
 		}
+		if dp.isRecover {
+			c.putMigratedDataPartitionIDs(nil, "history", dp.PartitionID)
+		}
 		vol.dataPartitions.put(dp)
 		log.LogInfof("action[loadDataPartitions],vol[%v],dp[%v]", vol.Name, dp.PartitionID)
 	}
diff --git a/master/migration.go b/master/migration.go
new file mode 100644
index 0000000000..635d51919c
--- /dev/null
+++ b/master/migration.go
@@ -0,0 +1,142 @@
+package master
+
+import (
+	"fmt"
+	"github.com/chubaofs/chubaofs/util/log"
+)
+
+func (c *Cluster) checkMigratedDataPartitionsRecoveryProgress() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.LogWarnf("checkMigratedDataPartitionsRecoveryProgress occurred panic,err[%v]", r)
+			WarnBySpecialKey(fmt.Sprintf("%v_%v_scheduling_job_panic", c.Name, ModuleName),
+				"checkMigratedDataPartitionsRecoveryProgress occurred panic")
+		}
+	}()
+
+	c.MigratedDataPartitionIds.Range(func(key, value interface{}) bool {
+		badDataPartitionIds := value.([]uint64)
+		newBadDpIds := make([]uint64, 0)
+		for _, partitionID := range badDataPartitionIds {
+			partition, err := c.getDataPartitionByID(partitionID)
+			if err != nil {
+				continue
+			}
+			vol, err := c.getVol(partition.VolName)
+			if err != nil {
+				continue
+			}
+			if len(partition.Replicas) == 0 || len(partition.Replicas) < int(vol.dpReplicaNum) {
+				continue
+			}
+			if partition.isDataCatchUpInStrictMode() {
+				partition.isRecover = false
+				partition.RLock()
+				c.syncUpdateDataPartition(partition)
+				partition.RUnlock()
+			} else {
+				newBadDpIds = append(newBadDpIds, partitionID)
+			}
+		}
+
+		if len(newBadDpIds) == 0 {
+			Warn(c.Name, fmt.Sprintf("action[checkMigratedDpRecoveryProgress] clusterID[%v],node:disk[%v] has recovered success", c.Name, key))
+			c.MigratedDataPartitionIds.Delete(key)
+		} else {
+			c.MigratedDataPartitionIds.Store(key, newBadDpIds)
+		}
+
+		return true
+	})
+}
+
+func (c *Cluster) putMigratedDataPartitionIDs(replica *DataReplica, addr string, partitionID uint64) {
+	var key string
+	newMigratedPartitionIDs := make([]uint64, 0)
+	if replica != nil {
+		key = fmt.Sprintf("%s:%s", addr, replica.DiskPath)
+	} else {
+		key = fmt.Sprintf("%s:%s", addr, "")
+	}
+	migratedPartitionIDs, ok := c.MigratedDataPartitionIds.Load(key)
+	if ok {
+		newMigratedPartitionIDs = migratedPartitionIDs.([]uint64)
+	}
+	newMigratedPartitionIDs = append(newMigratedPartitionIDs, partitionID)
+	c.MigratedDataPartitionIds.Store(key, newMigratedPartitionIDs)
+}
+
+func (c *Cluster) putMigratedMetaPartitions(addr string, partitionID uint64) {
+	newMigratedPartitionIDs := make([]uint64, 0)
+	migratedPartitionIDs, ok := c.MigratedMetaPartitionIds.Load(addr)
+	if ok {
+		newMigratedPartitionIDs = migratedPartitionIDs.([]uint64)
+	}
+	newMigratedPartitionIDs = append(newMigratedPartitionIDs, partitionID)
+	c.MigratedMetaPartitionIds.Store(addr, newMigratedPartitionIDs)
+}
+
+func (c *Cluster) checkMigratedMetaPartitionRecoveryProgress() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.LogWarnf("checkMigratedMetaPartitionRecoveryProgress occurred panic,err[%v]", r)
+			WarnBySpecialKey(fmt.Sprintf("%v_%v_scheduling_job_panic", c.Name, ModuleName),
+				"checkMigratedMetaPartitionRecoveryProgress occurred panic")
+		}
+	}()
+
+	c.MigratedMetaPartitionIds.Range(func(key, value interface{}) bool {
+		badMetaPartitionIds := value.([]uint64)
+		for _, partitionID := range badMetaPartitionIds {
+			partition, err := c.getMetaPartitionByID(partitionID)
+			if err != nil {
+				continue
+			}
+			c.doLoadMetaPartition(partition)
+		}
+		return true
+	})
+
+	var (
+		dentryDiff  float64
+		applyIDDiff float64
+	)
+	c.MigratedMetaPartitionIds.Range(func(key, value interface{}) bool {
+		badMetaPartitionIds := value.([]uint64)
+		newBadMpIds := make([]uint64, 0)
+		for _, partitionID := range badMetaPartitionIds {
+			partition, err := c.getMetaPartitionByID(partitionID)
+			if err != nil {
+				continue
+			}
+			vol, err := c.getVol(partition.volName)
+			if err != nil {
+				continue
+			}
+			if len(partition.Replicas) == 0 || len(partition.Replicas) < int(vol.mpReplicaNum) {
+				continue
+			}
+			dentryDiff = partition.getMinusOfDentryCount()
+			//inodeDiff = partition.getMinusOfInodeCount()
+			//inodeDiff = partition.getPercentMinusOfInodeCount()
+			applyIDDiff = partition.getMinusOfApplyID()
+			if dentryDiff == 0 && applyIDDiff == 0 {
+				partition.IsRecover = false
+				partition.RLock()
+				c.syncUpdateMetaPartition(partition)
+				partition.RUnlock()
+			} else {
+				newBadMpIds = append(newBadMpIds, partitionID)
+			}
+		}
+
+		if len(newBadMpIds) == 0 {
+			Warn(c.Name, fmt.Sprintf("action[checkMigratedMpRecoveryProgress] clusterID[%v],node[%v] has recovered success", c.Name, key))
+			c.MigratedMetaPartitionIds.Delete(key)
+		} else {
+			c.MigratedMetaPartitionIds.Store(key, newBadMpIds)
+		}
+
+		return true
+	})
+}
diff --git a/master/mocktest/data_server.go b/master/mocktest/data_server.go
index 57dc96c161..4da69b740c 100644
--- a/master/mocktest/data_server.go
+++ b/master/mocktest/data_server.go
@@ -45,6 +45,7 @@ type MockDataServer struct {
 	partitions                      []*MockDataPartition
 	zoneName                        string
 	mc                              *master.MasterClient
+	stopC                           chan bool
 }
 
 func NewMockDataServer(addr string, zoneName string) *MockDataServer {
@@ -53,6 +54,7 @@ func NewMockDataServer(addr string, zoneName string) *MockDataServer {
 		zoneName:   zoneName,
 		partitions: make([]*MockDataPartition, 0),
 		mc:         master.NewMasterClient([]string{hostAddr}, false),
+		stopC:      make(chan bool),
 	}
 
 	return mds
@@ -63,6 +65,10 @@ func (mds *MockDataServer) Start() {
 	go mds.start()
 }
 
+func (mds *MockDataServer) Stop() {
+	close(mds.stopC)
+}
+
 func (mds *MockDataServer) register() {
 	var err error
 	var nodeID uint64
@@ -86,6 +92,16 @@ func (mds *MockDataServer) start() {
 	if err != nil {
 		panic(err)
 	}
+	defer listener.Close()
+	go func() {
+		for {
+			select {
+			case <-mds.stopC:
+				return
+			default:
+			}
+		}
+	}()
 	for {
 		conn, err := listener.Accept()
 		if err != nil {
diff --git a/master/mocktest/meta_server.go b/master/mocktest/meta_server.go
index 514b2a2122..57486556e9 100644
--- a/master/mocktest/meta_server.go
+++ b/master/mocktest/meta_server.go
@@ -35,6 +35,7 @@ type MockMetaServer struct {
 	mc         *master.MasterClient
 	partitions map[uint64]*MockMetaPartition // Key: metaRangeId, Val: metaPartition
 	sync.RWMutex
+	stopC chan bool
 }
 
 func NewMockMetaServer(addr string, zoneName string) *MockMetaServer {
@@ -42,6 +43,7 @@ func NewMockMetaServer(addr string, zoneName string) *MockMetaServer {
 		TcpAddr: addr, partitions: make(map[uint64]*MockMetaPartition, 0),
 		ZoneName: zoneName,
 		mc:       master.NewMasterClient([]string{hostAddr}, false),
+		stopC:    make(chan bool),
 	}
 	return mms
 }
@@ -51,6 +53,10 @@ func (mms *MockMetaServer) Start() {
 	go mms.start()
 }
 
+func (mms *MockMetaServer) Stop() {
+	close(mms.stopC)
+}
+
 func (mms *MockMetaServer) register() {
 	var err error
 	var nodeID uint64
@@ -75,6 +81,16 @@ func (mms *MockMetaServer) start() {
 	if err != nil {
 		panic(err)
 	}
+	defer listener.Close()
+	go func() {
+		for {
+			select {
+			case <-mms.stopC:
+				return
+			default:
+			}
+		}
+	}()
 	for {
 		conn, err := listener.Accept()
 		if err != nil {
diff --git a/master/server.go b/master/server.go
index e6eabca631..9af4860e45 100644
--- a/master/server.go
+++ b/master/server.go
@@ -117,6 +117,7 @@ func (m *Server) Start(cfg *config.Config) (err error) {
 	if m.cluster.MasterSecretKey, err = cryptoutil.Base64Decode(MasterSecretKey); err != nil {
 		return fmt.Errorf("action[Start] failed %v, err: master service Key invalid = %s", proto.ErrInvalidCfg, MasterSecretKey)
 	}
+
 	m.cluster.scheduleTask()
 	m.startHTTPService(ModuleName, cfg)
 	exporter.RegistConsul(m.clusterName, ModuleName, cfg)
diff --git a/master/topology.go b/master/topology.go
index 0ef8c379e9..12c16967a2 100644
--- a/master/topology.go
+++ b/master/topology.go
@@ -20,6 +20,7 @@ import (
 	"github.com/chubaofs/chubaofs/util/errors"
 	"github.com/chubaofs/chubaofs/util/log"
 	"sort"
+	"strings"
 	"sync"
 )
 
@@ -84,6 +85,9 @@ func (t *topology) putZoneIfAbsent(zone *Zone) (beStoredZone *Zone) {
 }
 
 func (t *topology) getZone(name string) (zone *Zone, err error) {
+	if name == "" {
+		return nil, fmt.Errorf("zone name is empty")
+	}
 	t.zoneMap.Range(func(zoneName, value interface{}) bool {
 		if zoneName != name {
 			return true
@@ -134,6 +138,15 @@ func (t *topology) getZoneByDataNode(dataNode *DataNode) (zone *Zone, err error)
 	return t.getZone(dataNode.ZoneName)
 }
 
+func (t *topology) getZoneByMetaNode(metaNode *MetaNode) (zone *Zone, err error) {
+	_, ok := t.metaNodes.Load(metaNode.Addr)
+	if !ok {
+		return nil, errors.Trace(metaNodeNotFound(metaNode.Addr), "%v not found", metaNode.Addr)
+	}
+
+	return t.getZone(metaNode.ZoneName)
+}
+
 func (t *topology) putMetaNode(metaNode *MetaNode) (err error) {
 	if _, ok := t.metaNodes.Load(metaNode.Addr); ok {
 		return
@@ -222,10 +235,15 @@ func (ns *nodeSet) deleteMetaNode(metaNode *MetaNode) {
 	ns.metaNodes.Delete(metaNode.Addr)
 }
 
-func (ns *nodeSet) canWriteForDataNode(replicaNum int) bool {
+// can Write For DataNode With Exclude Hosts
+func (ns *nodeSet) canWriteForDataNode(excludeHosts []string, replicaNum int) bool {
 	var count int
 	ns.dataNodes.Range(func(key, value interface{}) bool {
 		node := value.(*DataNode)
+		if contains(excludeHosts, node.Addr) == true {
+			log.LogDebugf("contains return")
+			return true
+		}
 		if node.isWriteAble() {
 			count++
 		}
@@ -306,64 +324,78 @@ func calculateDemandWriteNodes(zoneNum, replicaNum int) (demandWriteNodes int) {
 	return
 }
 
-func (t *topology) allocZonesForMetaNode(zoneNum, replicaNum int, excludeZone []string) (zones []*Zone, err error) {
-	zones = t.getAllZones()
+func (t *topology) allocZonesForMetaNode(zoneName string, replicaNum int, excludeZone []string) (candidateZones []*Zone, err error) {
+	var initCandidateZones []*Zone
+	initCandidateZones = make([]*Zone, 0)
+	zoneList := strings.Split(zoneName, ",")
 	if t.isSingleZone() {
-		return zones, nil
+		return t.getAllZones(), nil
 	}
 	if excludeZone == nil {
 		excludeZone = make([]string, 0)
 	}
-	candidateZones := make([]*Zone, 0)
-	demandWriteNodes := calculateDemandWriteNodes(zoneNum, replicaNum)
-	for i := 0; i < len(zones); i++ {
-		if t.zoneIndexForMetaNode >= len(zones) {
-			t.zoneIndexForMetaNode = 0
+	for _, z := range zoneList {
+		var zone *Zone
+		if zone, err = t.getZone(z); err != nil {
+			return
 		}
-		zone := t.getZoneByIndex(t.zoneIndexForMetaNode)
-		t.zoneIndexForMetaNode++
+		initCandidateZones = append(initCandidateZones, zone)
+	}
+	demandWriteNodes := calculateDemandWriteNodes(len(zoneList), replicaNum)
+	candidateZones = make([]*Zone, 0)
+	for _, zone := range initCandidateZones {
 		if zone.status == unavailableZone {
 			continue
 		}
-		if contains(excludeZone, zone.name) {
-			continue
-		}
 		if zone.canWriteForMetaNode(uint8(demandWriteNodes)) {
 			candidateZones = append(candidateZones, zone)
 		}
-		if len(candidateZones) >= zoneNum {
+		if len(candidateZones) >= len(zoneList) {
 			break
 		}
 	}
+	//if there is no space in the zone for single zone partition, randomly choose another zone
+	if len(candidateZones) < 1 && len(zoneList) == 1 {
+		initCandidateZones = t.getAllZones()
+		for _, zone := range initCandidateZones {
+			if zone.status == unavailableZone {
+				continue
+			}
+			if zone.canWriteForDataNode(uint8(demandWriteNodes)) {
+				candidateZones = append(candidateZones, zone)
+			}
+		}
+	}
 
 	//if across zone,candidateZones must be larger than or equal with 2,otherwise,must have a candidate zone
-	if (zoneNum >= 2 && len(candidateZones) < 2) || len(candidateZones) < 1 {
+	if (replicaNum == 3 && len(zoneList) >= 2 && len(candidateZones) < 2) || len(candidateZones) < 1 {
 		log.LogError(fmt.Sprintf("action[allocZonesForMetaNode],reqZoneNum[%v],candidateZones[%v],demandWriteNodes[%v],err:%v",
-			zoneNum, len(candidateZones), demandWriteNodes, proto.ErrNoZoneToCreateMetaPartition))
+			len(zoneList), len(candidateZones), demandWriteNodes, proto.ErrNoZoneToCreateMetaPartition))
 		return nil, proto.ErrNoZoneToCreateMetaPartition
 	}
-	zones = candidateZones
 	err = nil
 	return
 }
 
-func (t *topology) allocZonesForDataNode(zoneNum, replicaNum int, excludeZone []string) (zones []*Zone, err error) {
-	zones = t.getAllZones()
-	log.LogInfof("len(zones) = %v \n", len(zones))
+
+//allocate zones according to the specified zoneName and replicaNum
+func (t *topology) allocZonesForDataNode(zoneName string, replicaNum int, excludeZone []string) (candidateZones []*Zone, err error) {
+	var initCandidateZones []*Zone
+	initCandidateZones = make([]*Zone, 0)
+	zoneList := strings.Split(zoneName, ",")
 	if t.isSingleZone() {
-		return zones, nil
+		return t.getAllZones(), nil
 	}
-	if excludeZone == nil {
-		excludeZone = make([]string, 0)
-	}
-	demandWriteNodes := calculateDemandWriteNodes(zoneNum, replicaNum)
-	candidateZones := make([]*Zone, 0)
-	for i := 0; i < len(zones); i++ {
-		if t.zoneIndexForDataNode >= len(zones) {
-			t.zoneIndexForDataNode = 0
+	for _, z := range zoneList {
+		var zone *Zone
+		if zone, err = t.getZone(z); err != nil {
+			return
 		}
-		zone := t.getZoneByIndex(t.zoneIndexForDataNode)
-		t.zoneIndexForDataNode++
+		initCandidateZones = append(initCandidateZones, zone)
+	}
+	demandWriteNodes := calculateDemandWriteNodes(len(zoneList), replicaNum)
+	candidateZones = make([]*Zone, 0)
+	for _, zone := range initCandidateZones {
 		if zone.status == unavailableZone {
 			continue
 		}
@@ -373,17 +405,31 @@ func (t *topology) allocZonesForDataNode(zoneNum, replicaNum int, excludeZone []
 		if zone.canWriteForDataNode(uint8(demandWriteNodes)) {
 			candidateZones = append(candidateZones, zone)
 		}
-		if len(candidateZones) >= zoneNum {
+		if len(candidateZones) >= len(zoneList) {
 			break
 		}
 	}
-	//if across zone,candidateZones must be larger than or equal with 2,otherwise,must have one candidate zone
-	if (zoneNum >= 2 && len(candidateZones) < 2) || len(candidateZones) < 1 {
+	//if there is no space in the zone for single zone partition, randomly choose a zone from all zones
+	if len(candidateZones) < 1 && len(zoneList) == 1 {
+		initCandidateZones = t.getAllZones()
+		for _, zone := range initCandidateZones {
+			if zone.status == unavailableZone {
+				continue
+			}
+			if contains(excludeZone, zone.name) {
+				continue
+			}
+			if zone.canWriteForDataNode(uint8(demandWriteNodes)) {
+				candidateZones = append(candidateZones, zone)
+			}
+		}
+	}
+	//if across zone,candidateZones must be larger than or equal with 2, if not across zone, must have one candidate zone
+	if (replicaNum == 3 && len(zoneList) >= 2 && len(candidateZones) < 2) || len(candidateZones) < 1 {
 		log.LogError(fmt.Sprintf("action[allocZonesForDataNode],reqZoneNum[%v],candidateZones[%v],demandWriteNodes[%v],err:%v",
-			zoneNum, len(candidateZones), demandWriteNodes, proto.ErrNoZoneToCreateDataPartition))
+			len(zoneList), len(candidateZones), demandWriteNodes, proto.ErrNoZoneToCreateDataPartition))
 		return nil, errors.NewError(proto.ErrNoZoneToCreateDataPartition)
 	}
-	zones = candidateZones
 	err = nil
 	return
 }
@@ -564,7 +610,7 @@ func (zone *Zone) deleteMetaNode(metaNode *MetaNode) (err error) {
 	return
 }
 
-func (zone *Zone) allocNodeSetForDataNode(excludeNodeSets []uint64, replicaNum uint8) (ns *nodeSet, err error) {
+func (zone *Zone) allocNodeSetForDataNode(excludeNodeSets []uint64, excludeHosts []string, replicaNum uint8) (ns *nodeSet, err error) {
 	nset := zone.getAllNodeSet()
 	if nset == nil {
 		return nil, errors.NewError(proto.ErrNoNodeSetToCreateDataPartition)
@@ -580,7 +626,7 @@ func (zone *Zone) allocNodeSetForDataNode(excludeNodeSets []uint64, replicaNum u
 		if containsID(excludeNodeSets, ns.ID) {
 			continue
 		}
-		if ns.canWriteForDataNode(int(replicaNum)) {
+		if ns.canWriteForDataNode(excludeHosts, int(replicaNum)) {
 			return
 		}
 	}
@@ -666,7 +712,7 @@ func (zone *Zone) getAvailDataNodeHosts(excludeNodeSets []uint64, excludeHosts [
 	if replicaNum == 0 {
 		return
 	}
-	ns, err := zone.allocNodeSetForDataNode(excludeNodeSets, uint8(replicaNum))
+	ns, err := zone.allocNodeSetForDataNode(excludeNodeSets, excludeHosts, uint8(replicaNum))
 	if err != nil {
 		return nil, nil, errors.Trace(err, "zone[%v] alloc node set,replicaNum[%v]", zone.name, replicaNum)
 	}
diff --git a/master/topology_test.go b/master/topology_test.go
index 65dd844146..ffc3a17af7 100644
--- a/master/topology_test.go
+++ b/master/topology_test.go
@@ -41,7 +41,7 @@ func TestSingleZone(t *testing.T) {
 	//single zone exclude,if it is a single zone excludeZones don't take effect
 	excludeZones := make([]string, 0)
 	excludeZones = append(excludeZones, zoneName)
-	zones, err := topo.allocZonesForDataNode(replicaNum, replicaNum, excludeZones)
+	zones, err := topo.allocZonesForDataNode(zoneName, replicaNum, excludeZones)
 	if err != nil {
 		t.Error(err)
 		return
@@ -52,7 +52,7 @@ func TestSingleZone(t *testing.T) {
 	}
 
 	//single zone normal
-	zones, err = topo.allocZonesForDataNode(replicaNum, replicaNum, nil)
+	zones, err = topo.allocZonesForDataNode(zoneName, replicaNum, nil)
 	if err != nil {
 		t.Error(err)
 		return
@@ -63,6 +63,15 @@ func TestSingleZone(t *testing.T) {
 		return
 	}
 	fmt.Println(newHosts)
+
+	// single zone with exclude hosts
+	excludeHosts := []string{mds1Addr, mds2Addr, mds3Addr}
+	newHosts, _, err = zones[0].getAvailDataNodeHosts(nil, excludeHosts, replicaNum)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	fmt.Println(newHosts)
 	topo.deleteDataNode(createDataNodeForTopo(mds1Addr, zoneName, nodeSet))
 }
 
@@ -98,7 +107,7 @@ func TestAllocZones(t *testing.T) {
 	}
 	//only pass replica num
 	replicaNum := 2
-	zones, err := topo.allocZonesForDataNode(replicaNum, replicaNum, nil)
+	zones, err := topo.allocZonesForDataNode(zoneName3, replicaNum, nil)
 	if err != nil {
 		t.Error(err)
 		return
@@ -110,14 +119,17 @@ func TestAllocZones(t *testing.T) {
 	cluster := new(Cluster)
 	cluster.t = topo
 	cluster.cfg = newClusterConfig()
+	cluster.cfg.DataPartitionsRecoverPoolSize = maxDataPartitionsRecoverPoolSize
+	cluster.cfg.MetaPartitionsRecoverPoolSize = maxMetaPartitionsRecoverPoolSize
+
 	//don't cross zone
-	hosts, _, err := cluster.chooseTargetDataNodes("", nil, nil, replicaNum, 1, "")
+	hosts, _, err := cluster.chooseTargetDataNodes("", nil, nil, replicaNum, "zone1")
 	if err != nil {
 		t.Error(err)
 		return
 	}
 	//cross zone
-	hosts, _, err = cluster.chooseTargetDataNodes("", nil, nil, replicaNum, 2, "")
+	hosts, _, err = cluster.chooseTargetDataNodes("", nil, nil, replicaNum, "zone1,zone2,zone3")
 	if err != nil {
 		t.Error(err)
 		return
@@ -126,7 +138,7 @@ func TestAllocZones(t *testing.T) {
 	// after excluding zone3, alloc zones will be success
 	excludeZones := make([]string, 0)
 	excludeZones = append(excludeZones, zoneName3)
-	zones, err = topo.allocZonesForDataNode(2, replicaNum, excludeZones)
+	zones, err = topo.allocZonesForDataNode(zoneName3, replicaNum, excludeZones)
 	if err != nil {
 		t.Logf("allocZonesForDataNode failed,err[%v]", err)
 	}
diff --git a/master/vol.go b/master/vol.go
index 912393caf2..b8e818e239 100644
--- a/master/vol.go
+++ b/master/vol.go
@@ -17,6 +17,7 @@ package master
 import (
 	"encoding/json"
 	"fmt"
+	"strings"
 	"sync"
 
 	"github.com/chubaofs/chubaofs/proto"
@@ -53,8 +54,9 @@ type Vol struct {
 	NeedToLowerReplica bool
 	FollowerRead       bool
 	authenticate       bool
-	crossZone          bool
+	autoRepair         bool
 	zoneName           string
+	crossZone          bool
 	enableToken        bool
 	tokens             map[string]*proto.Token
 	tokensLock         sync.RWMutex
@@ -72,7 +74,7 @@ type Vol struct {
 	sync.RWMutex
 }
 
-func newVol(id uint64, name, owner, zoneName string, dpSize, capacity uint64, dpReplicaNum, mpReplicaNum uint8, followerRead, authenticate, crossZone bool, enableToken bool, createTime int64, description string) (vol *Vol) {
+func newVol(id uint64, name, owner, zoneName string, dpSize, capacity uint64, dpReplicaNum, mpReplicaNum uint8, followerRead, authenticate, enableToken, autoRepair bool, createTime int64, description string) (vol *Vol) {
 	vol = &Vol{ID: id, Name: name, MetaPartitions: make(map[uint64]*MetaPartition, 0)}
 	vol.dataPartitions = newDataPartitionMap(name)
 	if dpReplicaNum < defaultReplicaNum {
@@ -91,16 +93,20 @@ func newVol(id uint64, name, owner, zoneName string, dpSize, capacity uint64, dp
 	if dpSize < util.GB {
 		dpSize = util.DefaultDataPartitionSize
 	}
+	zoneList := strings.Split(zoneName, ",")
+	if len(zoneList) > 1 {
+		vol.crossZone = true
+	}
 	vol.dataPartitionSize = dpSize
 	vol.Capacity = capacity
 	vol.FollowerRead = followerRead
 	vol.authenticate = authenticate
-	vol.crossZone = crossZone
 	vol.zoneName = zoneName
 	vol.viewCache = make([]byte, 0)
 	vol.mpsCache = make([]byte, 0)
 	vol.createTime = createTime
 	vol.enableToken = enableToken
+	vol.autoRepair = autoRepair
 	vol.tokens = make(map[string]*proto.Token, 0)
 	vol.description = description
 	return
@@ -118,8 +124,8 @@ func newVolFromVolValue(vv *volValue) (vol *Vol) {
 		vv.ReplicaNum,
 		vv.FollowerRead,
 		vv.Authenticate,
-		vv.CrossZone,
 		vv.EnableToken,
+		vv.AutoRepair,
 		vv.CreateTime,
 		vv.Description)
 	// overwrite oss secure
@@ -127,6 +133,8 @@ func newVolFromVolValue(vv *volValue) (vol *Vol) {
 	vol.Status = vv.Status
 	vol.dpSelectorName = vv.DpSelectorName
 	vol.dpSelectorParm = vv.DpSelectorParm
+	vol.crossZone = vv.CrossZone
+
 	return vol
 }
 
@@ -253,10 +261,7 @@ func (vol *Vol) checkDataPartitions(c *Cluster) (cnt int) {
 			cnt++
 		}
 		dp.checkDiskError(c.Name, c.leaderInfo.addr)
-		tasks := dp.checkReplicationTask(c.Name, vol.dataPartitionSize)
-		if len(tasks) != 0 {
-			c.addDataNodeTasks(tasks)
-		}
+		dp.checkReplicationTask(c, vol.dataPartitionSize)
 	}
 	return
 }
@@ -301,9 +306,29 @@ func (vol *Vol) checkReplicaNum(c *Cluster) {
 	}
 	vol.NeedToLowerReplica = false
 }
+func (vol *Vol) checkRepairMetaPartitions(c *Cluster) {
+	var err error
+	mps := vol.cloneMetaPartitionMap()
+	for _, mp := range mps {
+		if err = mp.RepairZone(vol, c); err != nil {
+			log.LogErrorf("action[checkRepairMetaPartitions],vol[%v],partitionID[%v],err[%v]", vol.Name, mp.PartitionID, err)
+			continue
+		}
+	}
+}
+
+func (vol *Vol) checkRepairDataPartitions(c *Cluster) {
+	var err error
+	dps := vol.cloneDataPartitionMap()
+	for _, dp := range dps {
+		if err = dp.RepairZone(vol, c); err != nil {
+			log.LogErrorf("action[checkRepairDataPartitions],vol[%v],partitionID[%v],err[%v]", vol.Name, dp.PartitionID, err)
+			continue
+		}
+	}
+}
 
 func (vol *Vol) checkMetaPartitions(c *Cluster) {
-	var tasks []*proto.AdminTask
 	vol.checkSplitMetaPartition(c)
 	maxPartitionID := vol.maxPartitionID()
 	mps := vol.cloneMetaPartitionMap()
@@ -324,9 +349,8 @@ func (vol *Vol) checkMetaPartitions(c *Cluster) {
 		mp.checkReplicaNum(c, vol.Name, vol.mpReplicaNum)
 		mp.checkEnd(c, maxPartitionID)
 		mp.reportMissingReplicas(c.Name, c.leaderInfo.addr, defaultMetaPartitionTimeOutSec, defaultIntervalToAlarmMissingMetaPartition)
-		tasks = append(tasks, mp.replicaCreationTasks(c.Name, vol.Name)...)
+		mp.replicaCreationTasks(c, vol.Name)
 	}
-	c.addMetaNodeTasks(tasks)
 }
 
 func (vol *Vol) checkSplitMetaPartition(c *Cluster) {
@@ -755,7 +779,7 @@ func (vol *Vol) doCreateMetaPartition(c *Cluster, start, end uint64) (mp *MetaPa
 		wg          sync.WaitGroup
 	)
 	errChannel := make(chan error, vol.mpReplicaNum)
-	if hosts, peers, err = c.chooseTargetMetaHosts("", nil, nil, int(vol.mpReplicaNum), vol.crossZone, vol.zoneName); err != nil {
+	if hosts, peers, err = c.chooseTargetMetaHosts("", nil, nil, int(vol.mpReplicaNum), vol.zoneName); err != nil {
 		log.LogErrorf("action[doCreateMetaPartition] chooseTargetMetaHosts err[%v]", err)
 		return nil, errors.NewError(err)
 	}
diff --git a/master/vol_test.go b/master/vol_test.go
index 660a7a5fe6..094b0d198b 100644
--- a/master/vol_test.go
+++ b/master/vol_test.go
@@ -5,6 +5,7 @@ import (
 	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/util"
 	"github.com/chubaofs/chubaofs/util/log"
+	"strings"
 	"testing"
 	"time"
 )
@@ -39,7 +40,7 @@ func TestCheckVol(t *testing.T) {
 func TestVol(t *testing.T) {
 	capacity := 300
 	name := "test1"
-	createVol(name, t)
+	createVol(name, testZone2, t)
 	//report mp/dp info to master
 	server.cluster.checkDataNodeHeartbeat()
 	server.cluster.checkDataNodeHeartbeat()
@@ -56,7 +57,7 @@ func TestVol(t *testing.T) {
 	}
 	vol.checkStatus(server.cluster)
 	getVol(name, t)
-	updateVol(name, capacity, t)
+	updateVol(name, "", capacity, t)
 	statVol(name, t)
 	markDeleteVol(name, t)
 	getSimpleVol(name, t)
@@ -64,8 +65,9 @@ func TestVol(t *testing.T) {
 	vol.deleteVolFromStore(server.cluster)
 }
 
-func createVol(name string, t *testing.T) {
-	reqURL := fmt.Sprintf("%v%v?name=%v&replicas=3&type=extent&capacity=100&owner=cfs&mpCount=2&zoneName=%v", hostAddr, proto.AdminCreateVol, name, testZone2)
+
+func createVol(name, zone string, t *testing.T) {
+	reqURL := fmt.Sprintf("%v%v?name=%v&replicas=3&type=extent&capacity=100&owner=cfs&mpCount=2&zoneName=%v", hostAddr, proto.AdminCreateVol, name, zone)
 	fmt.Println(reqURL)
 	process(reqURL, t)
 	vol, err := server.cluster.getVol(name)
@@ -77,6 +79,158 @@ func createVol(name string, t *testing.T) {
 	checkMetaPartitionsWritableTest(vol, t)
 }
 
+func TestVolMultiZoneDowngrade(t *testing.T) {
+	var vol *Vol
+	var err error
+	testMultiZone := "multiZoneDowngrade"
+	zoneList := []string{testZone1, testZone2, testZone3}
+	zone := strings.Join(zoneList, ",")
+	fmt.Printf(strings.Join(zoneList, ","))
+	server.cluster.t.putZoneIfAbsent(newZone(testZone3))
+	createVol(testMultiZone, zone, t)
+	//report mp/dp info to master
+	server.cluster.checkDataNodeHeartbeat()
+	server.cluster.checkDataNodeHeartbeat()
+	time.Sleep(3 * time.Second)
+	//check status
+	server.cluster.checkMetaPartitions()
+	server.cluster.checkDataPartitions()
+	server.cluster.checkLoadMetaPartitions()
+	server.cluster.doLoadDataPartitions()
+	vol, err = server.cluster.getVol(testMultiZone)
+	if err != nil {
+		t.Errorf("err is %v", err)
+		return
+	}
+
+	vol.checkStatus(server.cluster)
+	getVol(testMultiZone, t)
+	updateVol(testMultiZone, zone, 200, t)
+	statVol(testMultiZone, t)
+
+	// add meta node
+	addMetaServer(mms7Addr, testZone3)
+	addMetaServer(mms8Addr, testZone3)
+	// add data node
+	addDataServer(mds7Addr, testZone3)
+	addDataServer(mds8Addr, testZone3)
+	time.Sleep(3 * time.Second)
+	server.cluster.cfg = newClusterConfig()
+
+	server.cluster.checkDataNodeHeartbeat()
+	server.cluster.checkMetaNodeHeartbeat()
+
+	server.cluster.checkVolRepairDataPartitions()
+	server.cluster.checkVolRepairMetaPartitions()
+
+	/*time.Sleep(time.Second * 10)
+	var mps map[uint64]*MetaPartition
+	mps = vol.cloneMetaPartitionMap()
+	var isRecover bool
+	if isRecover, err = checkZoneRecover(mps, zoneList, t); err != nil {
+		t.Errorf("err is %v", err)
+	}
+	if isRecover {
+		t.Errorf("checkVolRepairMetaPartition is forbidden when recover pool size equals -1")
+	}*/
+	//test normal recover
+	server.cluster.cfg.MetaPartitionsRecoverPoolSize = maxMetaPartitionsRecoverPoolSize
+	server.cluster.cfg.DataPartitionsRecoverPoolSize = maxDataPartitionsRecoverPoolSize
+	server.cluster.checkVolRepairDataPartitions()
+	server.cluster.checkVolRepairMetaPartitions()
+	//wait for the partitions to be repaired
+	/*time.Sleep(time.Second * 10)
+	mps = vol.cloneMetaPartitionMap()
+	if isRecover, err = checkZoneRecover(mps, zoneList, t); err != nil {
+		t.Errorf("err is %v", err)
+	}
+	if !isRecover {
+		t.Errorf("checkVolRepairMetaPartition recover failed")
+	}*/
+	markDeleteVol(testMultiZone, t)
+	getSimpleVol(testMultiZone, t)
+	vol.checkStatus(server.cluster)
+	vol.deleteVolFromStore(server.cluster)
+}
+
+func checkZoneRecover(mps map[uint64]*MetaPartition, zoneList []string, t *testing.T) (isRecover bool, err error) {
+	var curZone []string
+	isRecover = true
+	for _, mp := range mps {
+		curZone = make([]string, 0)
+		for _, host := range mp.Hosts {
+			var mn *MetaNode
+			if mn, err = server.cluster.metaNode(host); err != nil {
+				return
+			}
+			if !contains(curZone, mn.ZoneName) {
+				curZone = append(curZone, mn.ZoneName)
+			}
+		}
+		if len(curZone) != len(zoneList) {
+			t.Logf("vol[%v], meta partition[%v] recover from downgrade failed, curZone:%v, zoneList:%v", mp.volName, mp.PartitionID, curZone, zoneList)
+			isRecover = false
+			continue
+		}
+		t.Logf("vol[%v], meta partition[%v] recover from downgrade successfully!", mp.volName, mp.PartitionID)
+	}
+	return
+}
+func TestVolMultiZone(t *testing.T) {
+	var vol *Vol
+	var err error
+	testMultiZone := "multiZone"
+	zoneList := []string{testZone1, testZone2, testZone3}
+	zone := strings.Join(zoneList, ",")
+	fmt.Printf(strings.Join(zoneList, ","))
+
+	createVol(testMultiZone, zone, t)
+	//report mp/dp info to master
+	server.cluster.checkDataNodeHeartbeat()
+	server.cluster.checkMetaNodeHeartbeat()
+	time.Sleep(3 * time.Second)
+	//check status
+	server.cluster.checkMetaPartitions()
+	server.cluster.checkDataPartitions()
+	server.cluster.checkLoadMetaPartitions()
+	server.cluster.doLoadDataPartitions()
+	vol, err = server.cluster.getVol(testMultiZone)
+	if err != nil {
+		t.Errorf("err is %v", err)
+		return
+	}
+	vol.checkStatus(server.cluster)
+	getVol(testMultiZone, t)
+	updateVol(testMultiZone, testZone1+","+testZone2, 200, t)
+	statVol(testMultiZone, t)
+	//check repair the first replica
+	server.cluster.checkVolRepairDataPartitions()
+	server.cluster.checkVolRepairMetaPartitions()
+	//set partition isRecovering to false
+	server.cluster.checkDiskRecoveryProgress()
+	server.cluster.checkMigratedDataPartitionsRecoveryProgress()
+	server.cluster.checkMetaPartitionRecoveryProgress()
+	server.cluster.checkMigratedMetaPartitionRecoveryProgress()
+	//check repair the second replica, so all replicas should have been repaired
+	server.cluster.checkVolRepairDataPartitions()
+	server.cluster.checkVolRepairMetaPartitions()
+	//wait for the partitions to be repaired
+	/*time.Sleep(time.Second * 5)
+	mps := vol.cloneMetaPartitionMap()
+	var isRecover bool
+	if isRecover, err = checkZoneRecover(mps, []string{testZone1, testZone2}, t); err != nil {
+		t.Errorf("err is %v", err)
+	}
+	if !isRecover {
+		t.Errorf("checkVolRepairMetaPartition recover failed")
+	}*/
+
+	markDeleteVol(testMultiZone, t)
+	getSimpleVol(testMultiZone, t)
+	vol.checkStatus(server.cluster)
+	vol.deleteVolFromStore(server.cluster)
+}
+
 func checkDataPartitionsWritableTest(vol *Vol, t *testing.T) {
 	if len(vol.dataPartitions.partitions) == 0 {
 		return
@@ -130,9 +284,9 @@ func getVol(name string, t *testing.T) {
 	process(reqURL, t)
 }
 
-func updateVol(name string, capacity int, t *testing.T) {
-	reqURL := fmt.Sprintf("%v%v?name=%v&capacity=%v&authKey=%v",
-		hostAddr, proto.AdminUpdateVol, name, capacity, buildAuthKey("cfs"))
+func updateVol(name, zone string, capacity int, t *testing.T) {
+	reqURL := fmt.Sprintf("%v%v?name=%v&capacity=%v&authKey=%v&zoneName=%v",
+		hostAddr, proto.AdminUpdateVol, name, capacity, buildAuthKey("cfs"), zone)
 	fmt.Println(reqURL)
 	process(reqURL, t)
 	vol, err := server.cluster.getVol(name)
@@ -144,6 +298,13 @@ func updateVol(name string, capacity int, t *testing.T) {
 		t.Errorf("update vol failed,expect[%v],real[%v]", capacity, vol.Capacity)
 		return
 	}
+	if zone == "" {
+		return
+	}
+	if vol.zoneName != zone {
+		t.Errorf("update vol failed,expect[%v],real[%v]", zone, vol.zoneName)
+		return
+	}
 }
 
 func statVol(name string, t *testing.T) {
@@ -213,7 +374,7 @@ func TestConcurrentReadWriteDataPartitionMap(t *testing.T) {
 	var volID uint64 = 1
 	var createTime = time.Now().Unix()
 	vol := newVol(volID, name, name, "", util.DefaultDataPartitionSize, 100, defaultReplicaNum,
-		defaultReplicaNum, false, false, false, false, createTime, "")
+		defaultReplicaNum, false, false, false, true, createTime, "")
 	// unavailable mp
 	mp1 := newMetaPartition(1, 1, defaultMaxMetaPartitionInodeID, 3, name, volID)
 	vol.addMetaPartition(mp1)
diff --git a/metanode/api_handler.go b/metanode/api_handler.go
index 948bab072b..43d76c8f89 100644
--- a/metanode/api_handler.go
+++ b/metanode/api_handler.go
@@ -59,9 +59,21 @@ func (m *MetaNode) registerAPIHandler() (err error) {
 	http.HandleFunc("/getDirectory", m.getDirectoryHandler)
 	http.HandleFunc("/getAllDentry", m.getAllDentriesHandler)
 	http.HandleFunc("/getParams", m.getParamsHandler)
+	http.HandleFunc("/getDiskStat", m.getDiskStatHandler)
+
 	return
 }
 
+func (m *MetaNode) getDiskStatHandler(w http.ResponseWriter,
+	r *http.Request) {
+	resp := NewAPIResponse(http.StatusOK, http.StatusText(http.StatusOK))
+	resp.Data = m.getDiskStat()
+	data, _ := resp.Marshal()
+	if _, err := w.Write(data); err != nil {
+		log.LogErrorf("[getPartitionsHandler] response %s", err)
+	}
+}
+
 func (m *MetaNode) getParamsHandler(w http.ResponseWriter,
 	r *http.Request) {
 	resp := NewAPIResponse(http.StatusOK, http.StatusText(http.StatusOK))
diff --git a/metanode/const.go b/metanode/const.go
index 1dd7b034dd..f5e9ae046e 100644
--- a/metanode/const.go
+++ b/metanode/const.go
@@ -147,6 +147,7 @@ const (
 	cfgDeleteBatchCount  = "deleteBatchCount"
 	cfgTotalMem          = "totalMem"
 	cfgZoneName          = "zoneName"
+	cfgTickIntervalMs    = "tickIntervalMs"
 
 	metaNodeDeleteBatchCountKey = "batchCount"
 )
diff --git a/metanode/disk.go b/metanode/disk.go
new file mode 100644
index 0000000000..0b51167362
--- /dev/null
+++ b/metanode/disk.go
@@ -0,0 +1,106 @@
+// Copyright 2018 The Chubao Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package metanode
+
+import (
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/chubaofs/chubaofs/util/log"
+)
+
+// Disk represents the structure of the disk
+type Disk struct {
+	sync.RWMutex
+	Path      string
+	Total     float64
+	Used      float64
+	Available float64
+
+	stopCh chan struct{}
+}
+
+func NewDisk(path string) (d *Disk) {
+	d = new(Disk)
+	d.Path = path
+	d.stopCh = make(chan struct{}, 1)
+	d.computeUsage()
+	d.startScheduleToUpdateSpaceInfo()
+	return
+}
+
+// Compute the disk usage
+func (d *Disk) computeUsage() (err error) {
+	d.RLock()
+	defer d.RUnlock()
+	fs := syscall.Statfs_t{}
+	err = syscall.Statfs(d.Path, &fs)
+	if err != nil {
+		return
+	}
+
+	d.Total = float64(fs.Blocks) * float64(fs.Bsize)
+	d.Available = float64(fs.Bavail) * float64(fs.Bsize)
+	d.Used = d.Total - d.Available
+
+	log.LogDebugf("action[computeUsage] disk(%v) all(%v) available(%v) used(%v)", d.Path, d.Total, d.Available, d.Used)
+
+	return
+}
+
+func (d *Disk) startScheduleToUpdateSpaceInfo() {
+	go func() {
+		updateSpaceInfoTicker := time.NewTicker(10 * time.Second)
+		defer func() {
+			updateSpaceInfoTicker.Stop()
+		}()
+		for {
+			select {
+			case <-d.stopCh:
+				log.LogInfof("[MetaNode]stop disk: %v stat  \n", d.Path)
+				break
+			case <-updateSpaceInfoTicker.C:
+				d.computeUsage()
+			}
+		}
+	}()
+}
+
+func (d *Disk) stopScheduleToUpdateSpaceInfo() {
+	d.stopCh <- struct{}{}
+}
+
+func (m *MetaNode) startDiskStat() error {
+	m.disks = make(map[string]*Disk)
+	m.disks[m.metadataDir] = NewDisk(m.metadataDir)
+	m.disks[m.raftDir] = NewDisk(m.raftDir)
+	return nil
+}
+
+func (m *MetaNode) stopDiskStat() {
+	for _, d := range m.disks {
+		d.stopScheduleToUpdateSpaceInfo()
+	}
+}
+
+func (m *MetaNode) getDiskStat() []*Disk {
+	ds := make([]*Disk, 0)
+	for _, d := range m.disks {
+		ds = append(ds, d)
+	}
+
+	return ds
+}
diff --git a/metanode/manager.go b/metanode/manager.go
index 8c56f65b85..9f653c1045 100644
--- a/metanode/manager.go
+++ b/metanode/manager.go
@@ -17,6 +17,13 @@ package metanode
 import (
 	"encoding/json"
 	"fmt"
+	"github.com/chubaofs/chubaofs/cmd/common"
+	"github.com/chubaofs/chubaofs/proto"
+	"github.com/chubaofs/chubaofs/raftstore"
+	"github.com/chubaofs/chubaofs/util"
+	"github.com/chubaofs/chubaofs/util/errors"
+	"github.com/chubaofs/chubaofs/util/exporter"
+	"github.com/chubaofs/chubaofs/util/log"
 	"io/ioutil"
 	"net"
 	syslog "log"
@@ -27,14 +34,7 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
-
-	"github.com/chubaofs/chubaofs/cmd/common"
-	"github.com/chubaofs/chubaofs/proto"
-	"github.com/chubaofs/chubaofs/raftstore"
-	"github.com/chubaofs/chubaofs/util"
-	"github.com/chubaofs/chubaofs/util/errors"
-	"github.com/chubaofs/chubaofs/util/exporter"
-	"github.com/chubaofs/chubaofs/util/log"
+	"time"
 )
 
 const partitionPrefix = "partition_"
@@ -120,7 +120,7 @@ func (m *metadataManager) HandleMetadataOperation(conn net.Conn, p *Packet,
 	case proto.OpMetaLookup:
 		err = m.opMetaLookup(conn, p, remoteAddr)
 	case proto.OpDeleteMetaPartition:
-		err = m.opDeleteMetaPartition(conn, p, remoteAddr)
+		err = m.opExpiredMetaPartition(conn, p, remoteAddr)
 	case proto.OpUpdateMetaPartition:
 		err = m.opUpdateMetaPartition(conn, p, remoteAddr)
 	case proto.OpLoadMetaPartition:
@@ -246,9 +246,11 @@ func (m *metadataManager) loadPartitions() (err error) {
 	// Check metadataDir directory
 	fileInfo, err := os.Stat(m.rootDir)
 	if err != nil {
-		os.MkdirAll(m.rootDir, 0755)
-		err = nil
-		return
+		if os.IsNotExist(err) {
+			err = os.MkdirAll(m.rootDir, 0755)
+		} else {
+			return err
+		}
 	}
 	if !fileInfo.IsDir() {
 		err = errors.New("metadataDir must be directory")
@@ -257,7 +259,7 @@ func (m *metadataManager) loadPartitions() (err error) {
 	// scan the data directory
 	fileInfoList, err := ioutil.ReadDir(m.rootDir)
 	if err != nil {
-		return
+		return err
 	}
 	var wg sync.WaitGroup
 	for _, fileInfo := range fileInfoList {
@@ -267,8 +269,27 @@ func (m *metadataManager) loadPartitions() (err error) {
 				log.LogErrorf("loadPartitions: find expired partition[%s], rename it and you can delete him manually",
 					fileInfo.Name())
 				oldName := path.Join(m.rootDir, fileInfo.Name())
-				newName := path.Join(m.rootDir, ExpiredPartitionPrefix+fileInfo.Name())
-				os.Rename(oldName, newName)
+				newName := path.Join(m.rootDir, ExpiredPartitionPrefix+fileInfo.Name()+"_"+strconv.FormatInt(time.Now().Unix(), 10))
+				if tempErr := os.Rename(oldName, newName); tempErr != nil {
+					log.LogErrorf("rename file has err:[%s]", tempErr.Error())
+				}
+
+				if len(fileInfo.Name()) > 10 && strings.HasPrefix(fileInfo.Name(), partitionPrefix) {
+					log.LogErrorf("loadPartitions: find expired partition[%s], rename raft file",
+						fileInfo.Name())
+					partitionId := fileInfo.Name()[len(partitionPrefix):]
+					oldRaftName := path.Join(m.metaNode.raftDir, partitionId)
+					newRaftName := path.Join(m.metaNode.raftDir, ExpiredPartitionPrefix+partitionId+"_"+strconv.FormatInt(time.Now().Unix(), 10))
+					log.LogErrorf("loadPartitions: find expired try rename raft file [%s] -> [%s]", oldRaftName, newRaftName)
+					if _, tempErr := os.Stat(oldRaftName); tempErr != nil {
+						log.LogWarnf("stat file [%s] has err:[%s]", oldRaftName, tempErr.Error())
+					} else {
+						if tempErr := os.Rename(oldRaftName, newRaftName); tempErr != nil {
+							log.LogErrorf("rename file has err:[%s]", tempErr.Error())
+						}
+					}
+				}
+
 				continue
 			}
 
@@ -421,6 +442,18 @@ func (m *metadataManager) deletePartition(id uint64) (err error) {
 	return
 }
 
+func (m *metadataManager) expiredPartition(id uint64) (err error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	mp, has := m.partitions[id]
+	if !has {
+		return
+	}
+	mp.Expired()
+	delete(m.partitions, id)
+	return
+}
+
 // Range scans all the meta partitions.
 func (m *metadataManager) Range(f func(i uint64, p MetaPartition) bool) {
 	m.mu.RLock()
diff --git a/metanode/manager_op.go b/metanode/manager_op.go
index f464b1b3c6..e6ebfcbcf1 100644
--- a/metanode/manager_op.go
+++ b/metanode/manager_op.go
@@ -18,15 +18,14 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"net"
-	"os"
-	"runtime"
-
 	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/util"
 	"github.com/chubaofs/chubaofs/util/errors"
 	"github.com/chubaofs/chubaofs/util/log"
 	raftProto "github.com/tiglabs/raft/proto"
+	"net"
+	"os"
+	"runtime"
 )
 
 const (
@@ -622,7 +621,40 @@ func (m *metadataManager) opMetaExtentsTruncate(conn net.Conn, p *Packet,
 }
 
 // Delete a meta partition.
-func (m *metadataManager) opDeleteMetaPartition(conn net.Conn,
+//func (m *metadataManager) opDeleteMetaPartition(conn net.Conn,
+//	p *Packet, remoteAddr string) (err error) {
+//	req := &proto.DeleteMetaPartitionRequest{}
+//	adminTask := &proto.AdminTask{
+//		Request: req,
+//	}
+//	decode := json.NewDecoder(bytes.NewBuffer(p.Data))
+//	decode.UseNumber()
+//	if err = decode.Decode(adminTask); err != nil {
+//		p.PacketErrorWithBody(proto.OpErr, ([]byte)(err.Error()))
+//		m.respondToClient(conn, p)
+//		return
+//	}
+//	mp, err := m.getPartition(req.PartitionID)
+//	if err != nil {
+//		p.PacketOkReply()
+//		m.respondToClient(conn, p)
+//		return
+//	}
+//	// Ack the master request
+//	conf := mp.GetBaseConfig()
+//	mp.Stop()
+//	mp.DeleteRaft()
+//	m.deletePartition(mp.GetBaseConfig().PartitionId)
+//	os.RemoveAll(conf.RootDir)
+//	p.PacketOkReply()
+//	m.respondToClient(conn, p)
+//	runtime.GC()
+//	log.LogInfof("%s [opDeleteMetaPartition] req: %d - %v, resp: %v",
+//		remoteAddr, p.GetReqID(), req, err)
+//	return
+//}
+
+func (m *metadataManager) opExpiredMetaPartition(conn net.Conn,
 	p *Packet, remoteAddr string) (err error) {
 	req := &proto.DeleteMetaPartitionRequest{}
 	adminTask := &proto.AdminTask{
@@ -643,11 +675,8 @@ func (m *metadataManager) opDeleteMetaPartition(conn net.Conn,
 		return
 	}
 	// Ack the master request
-	conf := mp.GetBaseConfig()
-	mp.Stop()
-	mp.DeleteRaft()
-	m.deletePartition(mp.GetBaseConfig().PartitionId)
-	os.RemoveAll(conf.RootDir)
+	mp.ExpiredRaft()
+	m.expiredPartition(mp.GetBaseConfig().PartitionId)
 	p.PacketOkReply()
 	m.respondToClient(conn, p)
 	runtime.GC()
@@ -806,6 +835,7 @@ func (m *metadataManager) opAddMetaPartitionRaftMember(conn net.Conn,
 	}
 	mp, err := m.getPartition(req.PartitionId)
 	if err != nil {
+		log.LogErrorf("get parititon has err by id:[%d] err:[%s]", req.PartitionId, err.Error())
 		p.PacketErrorWithBody(proto.OpTryOtherAddr, ([]byte)(proto.ErrMetaPartitionNotExists.Error()))
 		m.respondToClient(conn, p)
 		return err
@@ -862,6 +892,7 @@ func (m *metadataManager) opRemoveMetaPartitionRaftMember(conn net.Conn,
 		m.respondToClient(conn, p)
 		return err
 	}
+	req.ReserveResource = adminTask.ReserveResource
 	mp, err := m.getPartition(req.PartitionId)
 	if err != nil {
 		p.PacketErrorWithBody(proto.OpErr, ([]byte)(err.Error()))
diff --git a/metanode/metanode.go b/metanode/metanode.go
index f0e0076d3e..ea7576f76e 100644
--- a/metanode/metanode.go
+++ b/metanode/metanode.go
@@ -55,8 +55,10 @@ type MetaNode struct {
 	raftStore         raftstore.RaftStore
 	raftHeartbeatPort string
 	raftReplicatePort string
+	tickInterval      int
 	zoneName          string
 	httpStopC         chan uint8
+	disks             map[string]*Disk
 
 	control common.Control
 }
@@ -110,6 +112,9 @@ func doStart(s common.Server, cfg *config.Config) (err error) {
 	if err = m.parseConfig(cfg); err != nil {
 		return
 	}
+	if err = m.startDiskStat(); err != nil {
+		return
+	}
 	if err = m.register(); err != nil {
 		return
 	}
@@ -151,6 +156,7 @@ func doShutdown(s common.Server) {
 	m.stopServer()
 	m.stopMetaManager()
 	m.stopRaftServer()
+	m.stopDiskStat()
 }
 
 // Sync blocks the invoker's goroutine until the meta node shuts down.
@@ -173,6 +179,12 @@ func (m *MetaNode) parseConfig(cfg *config.Config) (err error) {
 	m.zoneName = cfg.GetString(cfgZoneName)
 	configTotalMem, _ = strconv.ParseUint(cfg.GetString(cfgTotalMem), 10, 64)
 
+	m.tickInterval = int(cfg.GetFloat(cfgTickIntervalMs))
+	if m.tickInterval <= 300 {
+		log.LogWarnf("get config [%s]:[%v] less than 300 so set it to 500 ", cfgTickIntervalMs, cfg.GetString(cfgTickIntervalMs))
+		m.tickInterval = 500
+	}
+
 	if configTotalMem == 0 {
 		return fmt.Errorf("bad totalMem config,Recommended to be configured as 80 percent of physical machine memory")
 	}
diff --git a/metanode/partition.go b/metanode/partition.go
index 6b06968b63..e30889cea7 100644
--- a/metanode/partition.go
+++ b/metanode/partition.go
@@ -21,6 +21,7 @@ import (
 	"strconv"
 	"strings"
 	"sync/atomic"
+	"time"
 
 	"fmt"
 	"io/ioutil"
@@ -175,8 +176,10 @@ type OpPartition interface {
 	PersistMetadata() (err error)
 	ChangeMember(changeType raftproto.ConfChangeType, peer raftproto.Peer, context []byte) (resp interface{}, err error)
 	Reset() (err error)
+	Expired() error
 	UpdatePartition(req *UpdatePartitionReq, resp *UpdatePartitionResp) (err error)
 	DeleteRaft() error
+	ExpiredRaft() error
 	IsExsitPeer(peer proto.Peer) bool
 	TryToLeader(groupID uint64) error
 	CanRemoveRaftMember(peer proto.Peer) error
@@ -549,6 +552,12 @@ func (mp *metaPartition) DeleteRaft() (err error) {
 	return
 }
 
+// ExpiredRaft deletes the raft partition.
+func (mp *metaPartition) ExpiredRaft() (err error) {
+	err = mp.raftPartition.Expired()
+	return
+}
+
 // Return a new inode ID and update the offset.
 func (mp *metaPartition) nextInodeID() (inodeId uint64, err error) {
 	for {
@@ -668,6 +677,35 @@ func (mp *metaPartition) Reset() (err error) {
 
 	return
 }
+
+
+func (mp *metaPartition) Expired() (err error) {
+	mp.stop()
+	if mp.delInodeFp != nil {
+		// TODO Unhandled errors
+		mp.delInodeFp.Sync()
+		mp.delInodeFp.Close()
+	}
+	
+	mp.inodeTree.Reset()
+	mp.dentryTree.Reset()
+	mp.config.Cursor = 0
+	mp.applyID = 0
+
+	currentPath := path.Clean(mp.config.RootDir)
+
+	var newPath = path.Join(path.Dir(currentPath),
+		ExpiredPartitionPrefix+path.Base(currentPath)+"_"+strconv.FormatInt(time.Now().Unix(), 10))
+
+	if err := os.Rename(currentPath, newPath); err != nil {
+		log.LogErrorf("ExpiredPartition: mark expired partition fail: partitionID(%v) path(%v) newPath(%v) err(%v)", mp.config.PartitionId, currentPath, newPath, err)
+		return err
+	}
+	log.LogInfof("ExpiredPartition: mark expired partition: partitionID(%v) path(%v) newPath(%v)",
+		mp.config.PartitionId, currentPath, newPath)
+	return nil
+}
+
 //
 func (mp *metaPartition) canRemoveSelf() (canRemove bool, err error) {
 	var partition *proto.MetaPartitionInfo
diff --git a/metanode/partition_fsm.go b/metanode/partition_fsm.go
index d6d01d8d98..e65c395e08 100644
--- a/metanode/partition_fsm.go
+++ b/metanode/partition_fsm.go
@@ -257,7 +257,9 @@ func (mp *metaPartition) ApplySnapshot(peers []raftproto.Peer, iter raftproto.Sn
 			mp.dentryTree = dentryTree
 			mp.extendTree = extendTree
 			mp.multipartTree = multipartTree
-			mp.config.Cursor = cursor
+			if cursor != 0 {
+				mp.config.Cursor = cursor
+			}
 			err = nil
 			// store message
 			mp.storeChan <- &storeMsg{
@@ -269,7 +271,7 @@ func (mp *metaPartition) ApplySnapshot(peers []raftproto.Peer, iter raftproto.Sn
 				multipartTree: mp.multipartTree,
 			}
 			mp.extReset <- struct{}{}
-			log.LogDebugf("ApplySnapshot: finish with EOF: partitionID(%v) applyID(%v)", mp.config.PartitionId, mp.applyID)
+			log.LogDebugf("ApplySnapshot: finish with EOF: partitionID(%v) applyID(%v),cursor(%v)", mp.config.PartitionId, mp.applyID, mp.config.Cursor)
 			return
 		}
 		log.LogErrorf("ApplySnapshot: stop with error: partitionID(%v) err(%v)", mp.config.PartitionId, err)
diff --git a/metanode/partition_fsmop.go b/metanode/partition_fsmop.go
index 2f530806e6..5800c0cb54 100644
--- a/metanode/partition_fsmop.go
+++ b/metanode/partition_fsmop.go
@@ -130,14 +130,18 @@ func (mp *metaPartition) confRemoveNode(req *proto.RemoveMetaPartitionRaftMember
 	}
 	mp.config.Peers = append(mp.config.Peers[:peerIndex], mp.config.Peers[peerIndex+1:]...)
 	if mp.config.NodeId == req.RemovePeer.ID && !mp.isLoadingMetaPartition && canRemoveSelf {
-		mp.Stop()
-		mp.DeleteRaft()
-		mp.manager.deletePartition(mp.GetBaseConfig().PartitionId)
-		os.RemoveAll(mp.config.RootDir)
+		mp.ExpiredRaft()
+		mp.manager.expiredPartition(mp.GetBaseConfig().PartitionId)
 		updated = false
 	}
 	log.LogInfof("Fininsh RemoveRaftNode  PartitionID(%v) nodeID(%v)  do RaftLog (%v) ",
 		req.PartitionId, mp.config.NodeId, string(data))
+
+	return
+}
+
+func (mp *metaPartition) confUpdateNode(req *proto.MetaPartitionDecommissionRequest,
+	index uint64) (updated bool, err error) {
 	return
 }
 
diff --git a/metanode/raft_server.go b/metanode/raft_server.go
index 3970c26e46..c9a2abe27a 100644
--- a/metanode/raft_server.go
+++ b/metanode/raft_server.go
@@ -37,6 +37,7 @@ func (m *MetaNode) startRaftServer() (err error) {
 	raftConf := &raftstore.Config{
 		NodeID:            m.nodeId,
 		RaftPath:          m.raftDir,
+		TickInterval:      m.tickInterval,
 		IPAddr:            m.localAddr,
 		HeartbeatPort:     heartbeatPort,
 		ReplicaPort:       replicaPort,
diff --git a/proto/admin_proto.go b/proto/admin_proto.go
index 5960008d43..db283d4b9a 100644
--- a/proto/admin_proto.go
+++ b/proto/admin_proto.go
@@ -39,6 +39,7 @@ const (
 	AdminListVols                  = "/vol/list"
 	AdminSetNodeInfo               = "/admin/setNodeInfo"
 	AdminGetNodeInfo               = "/admin/getNodeInfo"
+	AdminSetNodeState              = "/admin/setNodeState"
 
 	//graphql master api
 	AdminClusterAPI = "/api/cluster"
@@ -201,8 +202,9 @@ type AddDataPartitionRaftMemberRequest struct {
 
 // RemoveDataPartitionRaftMemberRequest defines the request of add raftMember a data partition.
 type RemoveDataPartitionRaftMemberRequest struct {
-	PartitionId uint64
-	RemovePeer  Peer
+	PartitionId     uint64
+	RemovePeer      Peer
+	ReserveResource bool
 }
 
 // AddMetaPartitionRaftMemberRequest defines the request of add raftMember a meta partition.
@@ -213,8 +215,9 @@ type AddMetaPartitionRaftMemberRequest struct {
 
 // RemoveMetaPartitionRaftMemberRequest defines the request of add raftMember a meta partition.
 type RemoveMetaPartitionRaftMemberRequest struct {
-	PartitionId uint64
-	RemovePeer  Peer
+	PartitionId     uint64
+	RemovePeer      Peer
+	ReserveResource bool
 }
 
 // LoadDataPartitionRequest defines the request of loading a data partition.
@@ -492,6 +495,7 @@ type SimpleVolView struct {
 	NeedToLowerReplica bool
 	Authenticate       bool
 	CrossZone          bool
+	AutoRepair         bool
 	CreateTime         string
 	EnableToken        bool
 	Tokens             map[string]*Token `graphql:"-"`
diff --git a/proto/admin_task.go b/proto/admin_task.go
index 6ac0a3964d..6892fee29a 100644
--- a/proto/admin_task.go
+++ b/proto/admin_task.go
@@ -31,16 +31,17 @@ const (
 
 // AdminTask defines the administration task.
 type AdminTask struct {
-	ID           string
-	PartitionID  uint64
-	OpCode       uint8
-	OperatorAddr string
-	Status       int8
-	SendTime     int64
-	CreateTime   int64
-	SendCount    uint8
-	Request      interface{}
-	Response     interface{}
+	ID              string
+	PartitionID     uint64
+	OpCode          uint8
+	OperatorAddr    string
+	Status          int8
+	SendTime        int64
+	CreateTime      int64
+	SendCount       uint8
+	ReserveResource bool
+	Request         interface{}
+	Response        interface{}
 }
 
 // ToString returns the string format of the task.
diff --git a/proto/model.go b/proto/model.go
index d502e7daa9..82f74e9350 100644
--- a/proto/model.go
+++ b/proto/model.go
@@ -39,6 +39,8 @@ type MetaNodeInfo struct {
 	MetaPartitionCount        int
 	NodeSetID                 uint64
 	PersistenceMetaPartitions []uint64
+	ToBeOffline               bool
+	ToBeMigrated              bool
 }
 
 // DataNode stores all the information about a data node
@@ -59,6 +61,8 @@ type DataNodeInfo struct {
 	NodeSetID                 uint64
 	PersistenceDataPartitions []uint64
 	BadDisks                  []string
+	ToBeOffline               bool
+	ToBeMigrated              bool
 }
 
 // MetaPartition defines the structure of a meta partition
@@ -84,29 +88,35 @@ type MetaPartitionInfo struct {
 
 // MetaReplica defines the replica of a meta partition
 type MetaReplicaInfo struct {
-	Addr       string
-	ReportTime int64
-	Status     int8 // unavailable, readOnly, readWrite
-	IsLeader   bool
+	Addr        string
+	ReportTime  int64
+	Status      int8 // unavailable, readOnly, readWrite
+	IsLeader    bool
+	InodeCount  uint64
+	DentryCount uint64
 }
 
 // ClusterView provides the view of a cluster.
 type ClusterView struct {
-	Name                string
-	LeaderAddr          string
-	DisableAutoAlloc    bool
-	MetaNodeThreshold   float32
-	Applied             uint64
-	MaxDataPartitionID  uint64
-	MaxMetaNodeID       uint64
-	MaxMetaPartitionID  uint64
-	DataNodeStatInfo    *NodeStatInfo
-	MetaNodeStatInfo    *NodeStatInfo
-	VolStatInfo         []*VolStatInfo
-	BadPartitionIDs     []BadPartitionView
-	BadMetaPartitionIDs []BadPartitionView
-	MetaNodes           []NodeView
-	DataNodes           []NodeView
+	Name                   string
+	LeaderAddr             string
+	DisableAutoAlloc       bool
+	MetaNodeThreshold      float32
+	DpRecoverPool          int32
+	MpRecoverPool          int32
+	Applied                uint64
+	MaxDataPartitionID     uint64
+	MaxMetaNodeID          uint64
+	MaxMetaPartitionID     uint64
+	DataNodeStatInfo       *NodeStatInfo
+	MetaNodeStatInfo       *NodeStatInfo
+	VolStatInfo            []*VolStatInfo
+	BadPartitionIDs        []BadPartitionView
+	BadMetaPartitionIDs    []BadPartitionView
+	MigratedDataPartitions []BadPartitionView
+	MigratedMetaPartitions []BadPartitionView
+	MetaNodes              []NodeView
+	DataNodes              []NodeView
 }
 
 // NodeView provides the view of the data or meta node.
@@ -217,3 +227,64 @@ type MetaPartitionDiagnosis struct {
 	LackReplicaMetaPartitionIDs []uint64
 	BadMetaPartitionIDs         []BadPartitionView
 }
+type ExtentInfo struct {
+	FileID     uint64 `json:"fileId"`
+	Size       uint64 `json:"size"`
+	Crc        uint32 `json:"Crc"`
+	IsDeleted  bool   `json:"deleted"`
+	ModifyTime int64  `json:"modTime"`
+	Source     string `json:"src"`
+}
+
+// Status raft status
+type Status struct {
+	ID                uint64
+	NodeID            uint64
+	Leader            uint64
+	Term              uint64
+	Index             uint64
+	Commit            uint64
+	Applied           uint64
+	Vote              uint64
+	PendQueue         int
+	RecvQueue         int
+	AppQueue          int
+	Stopped           bool
+	RestoringSnapshot bool
+	State             string // leader、follower、candidate
+	Replicas          map[uint64]*ReplicaStatus
+}
+
+// ReplicaStatus  replica status
+type ReplicaStatus struct {
+	Match       uint64 // copy progress
+	Commit      uint64 // commmit position
+	Next        uint64
+	State       string
+	Snapshoting bool
+	Paused      bool
+	Active      bool
+	LastActive  time.Time
+	Inflight    int
+}
+type DNDataPartitionInfo struct {
+	VolName              string        `json:"volName"`
+	ID                   uint64        `json:"id"`
+	Size                 int           `json:"size"`
+	Used                 int           `json:"used"`
+	Status               int           `json:"status"`
+	Path                 string        `json:"path"`
+	Files                []*ExtentInfo `json:"extents"`
+	FileCount            int           `json:"fileCount"`
+	Replicas             []string      `json:"replicas"`
+	TinyDeleteRecordSize int64         `json:"tinyDeleteRecordSize"`
+	RaftStatus           *Status       `json:"raftStatus"`
+	Peers                []*Peer       `json:"peers"`
+}
+
+type MNMetaPartitionInfo struct {
+	LeaderAddr string  `json:"leaderAddr"`
+	Peers      []*Peer `json:"peers"`
+	NodeId     uint64  `json:"nodeId"`
+	Cursor     uint64  `json:"cursor"`
+}
diff --git a/proto/packet.go b/proto/packet.go
index aafa4d36af..2d9f96be24 100644
--- a/proto/packet.go
+++ b/proto/packet.go
@@ -156,12 +156,13 @@ const (
 )
 
 const (
-	WriteDeadlineTime                 = 5
-	ReadDeadlineTime                  = 5
-	SyncSendTaskDeadlineTime          = 20
-	NoReadDeadlineTime                = -1
 	BatchDeleteExtentReadDeadLineTime = 120
-	GetAllWatermarksDeadLineTime      = 60
+	WriteDeadlineTime            = 5
+	ReadDeadlineTime             = 5
+	SyncSendTaskDeadlineTime     = 20
+	NoReadDeadlineTime           = -1
+	MaxWaitFollowerRepairTime    = 60*30
+	GetAllWatermarksDeadLineTime = 60
 )
 
 const (
diff --git a/raftstore/partition.go b/raftstore/partition.go
index 5098ecfca2..ca1e2bfb92 100644
--- a/raftstore/partition.go
+++ b/raftstore/partition.go
@@ -16,11 +16,20 @@ package raftstore
 
 import (
 	"os"
+	"path"
+	"strconv"
+	"time"
+
+	"github.com/chubaofs/chubaofs/util/log"
 
 	"github.com/tiglabs/raft"
 	"github.com/tiglabs/raft/proto"
 )
 
+const (
+	ExpiredPartitionPrefix = "expired_"
+)
+
 // PartitionStatus is a type alias of raft.Status
 type PartitionStatus = raft.Status
 
@@ -46,6 +55,9 @@ type Partition interface {
 	// Delete stops and deletes the partition.
 	Delete() error
 
+	// Expired stops and marks specified partition as expired.
+	Expired() error
+
 	// Status returns the current raft status.
 	Status() (status *PartitionStatus)
 
@@ -110,8 +122,31 @@ func (p *partition) Delete() (err error) {
 	return
 }
 
+// Expired stops and marks specified partition as expired.
+// It renames data path to a new name which add 'expired_' as prefix and operation timestamp as suffix.
+// (e.g. '/path/1' to '/path/expired_1_1600054521')
+func (p *partition) Expired() (err error) {
+	if err = p.Stop(); err != nil {
+		return
+	}
+	var currentPath = path.Clean(p.walPath)
+	var newPath = path.Join(path.Dir(currentPath),
+		ExpiredPartitionPrefix+path.Base(currentPath)+"_"+strconv.FormatInt(time.Now().Unix(), 10))
+	if err = os.Rename(currentPath, newPath); err != nil {
+		log.LogErrorf("Expired: mark expired partition fail: partitionID(%v) path(%v) newPath(%v) err(%v)",
+			p.id, p.walPath, newPath, err)
+		return
+	}
+	log.LogInfof("ExpiredPartition: mark expired partition: partitionID(%v) path(%v) newPath(%v)",
+		p.id, p.walPath, newPath)
+	return
+}
+
 // Status returns the current raft status.
 func (p *partition) Status() (status *PartitionStatus) {
+	if p == nil || p.raft == nil {
+		return nil
+	}
 	status = p.raft.Status(p.id)
 	return
 }
diff --git a/sdk/master/api_admin.go b/sdk/master/api_admin.go
index 68151a0059..e6ce5b377f 100644
--- a/sdk/master/api_admin.go
+++ b/sdk/master/api_admin.go
@@ -207,7 +207,7 @@ func (api *AdminAPI) DeleteVolume(volName, authKey string) (err error) {
 	return
 }
 
-func (api *AdminAPI) UpdateVolume(volName string, capacity uint64, replicas int, followerRead, authenticate, enableToken bool, authKey, zoneName string) (err error) {
+func (api *AdminAPI) UpdateVolume(volName string, capacity uint64, replicas int, followerRead, authenticate, enableToken, autoRepair bool, authKey, zoneName string) (err error) {
 	var request = newAPIRequest(http.MethodGet, proto.AdminUpdateVol)
 	request.addParam("name", volName)
 	request.addParam("authKey", authKey)
@@ -216,6 +216,7 @@ func (api *AdminAPI) UpdateVolume(volName string, capacity uint64, replicas int,
 	request.addParam("followerRead", strconv.FormatBool(followerRead))
 	request.addParam("enableToken", strconv.FormatBool(enableToken))
 	request.addParam("authenticate", strconv.FormatBool(authenticate))
+	request.addParam("autoRepair", strconv.FormatBool(autoRepair))
 	request.addParam("zoneName", zoneName)
 	if _, err = api.mc.serveRequest(request); err != nil {
 		return
@@ -246,7 +247,7 @@ func (api *AdminAPI) VolExpand(volName string, capacity uint64, authKey string)
 }
 
 func (api *AdminAPI) CreateVolume(volName, owner string, mpCount int,
-	dpSize uint64, capacity uint64, replicas int, followerRead bool, zoneName string) (err error) {
+	dpSize uint64, capacity uint64, replicas int, followerRead bool, autoRepair bool, zoneName string) (err error) {
 	var request = newAPIRequest(http.MethodGet, proto.AdminCreateVol)
 	request.addParam("name", volName)
 	request.addParam("owner", owner)
@@ -254,6 +255,7 @@ func (api *AdminAPI) CreateVolume(volName, owner string, mpCount int,
 	request.addParam("size", strconv.FormatUint(dpSize, 10))
 	request.addParam("capacity", strconv.FormatUint(capacity, 10))
 	request.addParam("followerRead", strconv.FormatBool(followerRead))
+	request.addParam("autoRepair", strconv.FormatBool(autoRepair))
 	request.addParam("zoneName", zoneName)
 	if _, err = api.mc.serveRequest(request); err != nil {
 		return
diff --git a/sdk/master/api_node.go b/sdk/master/api_node.go
index c91f456c0c..832827502d 100644
--- a/sdk/master/api_node.go
+++ b/sdk/master/api_node.go
@@ -16,6 +16,7 @@ package master
 
 import (
 	"encoding/json"
+	"fmt"
 	"net/http"
 	"strconv"
 
@@ -124,3 +125,37 @@ func (api *NodeAPI) MetaNodeDecommission(nodeAddr string) (err error) {
 	}
 	return
 }
+
+func (api *NodeAPI) DataNodeGetPartition(addr string, id uint64) (node *proto.DNDataPartitionInfo, err error) {
+	var request = newAPIRequest(http.MethodGet, "/partition")
+	var buf []byte
+	nodeClient := NewNodeClient(fmt.Sprintf("%v:%v", addr, api.mc.DataNodeProfPort), false, DATANODE)
+	nodeClient.DataNodeProfPort = api.mc.DataNodeProfPort
+	request.addParam("id", strconv.FormatUint(id, 10))
+	request.addHeader("isTimeOut", "false")
+	if buf, err = nodeClient.serveRequest(request); err != nil {
+		return
+	}
+	node = &proto.DNDataPartitionInfo{}
+	if err = json.Unmarshal(buf, &node); err != nil {
+		return
+	}
+	return
+}
+
+func (api *NodeAPI) MetaNodeGetPartition(addr string, id uint64) (node *proto.MNMetaPartitionInfo, err error) {
+	var request = newAPIRequest(http.MethodGet, "/getPartitionById")
+	var buf []byte
+	nodeClient := NewNodeClient(fmt.Sprintf("%v:%v", addr, api.mc.MetaNodeProfPort), false, METANODE)
+	nodeClient.MetaNodeProfPort = api.mc.MetaNodeProfPort
+	request.addParam("pid", strconv.FormatUint(id, 10))
+	request.addHeader("isTimeOut", "false")
+	if buf, err = nodeClient.serveRequest(request); err != nil {
+		return
+	}
+	node = &proto.MNMetaPartitionInfo{}
+	if err = json.Unmarshal(buf, &node); err != nil {
+		return
+	}
+	return
+}
diff --git a/sdk/master/client.go b/sdk/master/client.go
index d56e844215..87a2cb5066 100644
--- a/sdk/master/client.go
+++ b/sdk/master/client.go
@@ -19,6 +19,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"github.com/chubaofs/chubaofs/proto"
 	"io/ioutil"
 	"net/http"
 	"strconv"
@@ -26,7 +27,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/chubaofs/chubaofs/proto"
 	"github.com/chubaofs/chubaofs/util/log"
 )
 
@@ -38,12 +38,24 @@ var (
 	ErrNoValidMaster = errors.New("no valid master")
 )
 
+type ClientType int
+
+const (
+	MASTER ClientType = iota
+	DATANODE
+	METANODE
+)
+
 type MasterClient struct {
 	sync.RWMutex
-	masters    []string
-	useSSL     bool
-	leaderAddr string
 	timeout    time.Duration
+	masters          []string
+	useSSL           bool
+	leaderAddr       string
+	nodeAddr         string
+	ClientType       ClientType
+	DataNodeProfPort uint16
+	MetaNodeProfPort uint16
 
 	adminAPI  *AdminAPI
 	clientAPI *ClientAPI
@@ -97,8 +109,8 @@ func (c *MasterClient) SetTimeout(timeout uint16) {
 }
 
 func (c *MasterClient) serveRequest(r *request) (repsData []byte, err error) {
-	leaderAddr, nodes := c.prepareRequest()
-	host := leaderAddr
+	requestAddr, nodes := c.prepareRequest()
+	host := requestAddr
 	for i := -1; i < len(nodes); i++ {
 		if i == -1 {
 			if host == "" {
@@ -141,7 +153,7 @@ func (c *MasterClient) serveRequest(r *request) (repsData []byte, err error) {
 			repsData, err = c.serveRequest(r)
 			return
 		case http.StatusOK:
-			if leaderAddr != host {
+			if requestAddr != host {
 				c.setLeader(host)
 			}
 			var body = &struct {
@@ -158,7 +170,20 @@ func (c *MasterClient) serveRequest(r *request) (repsData []byte, err error) {
 			if body.Code != 0 {
 				log.LogWarnf("serveRequest: code[%v], msg[%v], data[%v] ", body.Code, body.Msg, body.Data)
 				return nil, proto.ParseErrorCode(body.Code)
+				switch c.ClientType {
+				case MASTER:
+					// o represent proto.ErrCodeSuccess
+					if body.Code != 0 {
+						return nil, proto.ParseErrorCode(body.Code)
+					}
+				case DATANODE, METANODE:
+					// o represent proto.ErrCodeSuccess
+					if body.Code != 200 {
+						return nil, proto.ParseErrorCode(body.Code)
+					}
+				}
 			}
+
 			return []byte(body.Data), nil
 		default:
 			log.LogErrorf("serveRequest: unknown status: host(%v) uri(%v) status(%v) body(%s).",
@@ -180,10 +205,16 @@ func (c *MasterClient) Nodes() (nodes []string) {
 
 // prepareRequest returns the leader address and all master addresses.
 func (c *MasterClient) prepareRequest() (addr string, nodes []string) {
-	c.RLock()
-	addr = c.leaderAddr
-	nodes = c.masters
-	c.RUnlock()
+	c.Lock()
+	switch c.ClientType {
+	case MASTER:
+		addr = c.leaderAddr
+		nodes = c.masters
+	case DATANODE, METANODE:
+		addr = c.nodeAddr
+		nodes = []string{addr}
+	}
+	c.Unlock()
 	return
 }
 
@@ -253,6 +284,18 @@ func (c *MasterClient) mergeRequestUrl(url string, params map[string]string) str
 // NewMasterHelper returns a new MasterClient instance.
 func NewMasterClient(masters []string, useSSL bool) *MasterClient {
 	var mc = &MasterClient{masters: masters, useSSL: useSSL, timeout: requestTimeout}
+	mc.ClientType = MASTER
+	mc.adminAPI = &AdminAPI{mc: mc}
+	mc.clientAPI = &ClientAPI{mc: mc}
+	mc.nodeAPI = &NodeAPI{mc: mc}
+	mc.userAPI = &UserAPI{mc: mc}
+	return mc
+}
+
+// NewMasterHelper returns a new MasterClient instance.
+func NewNodeClient(node string, useSSL bool, clientType ClientType) *MasterClient {
+	var mc = &MasterClient{nodeAddr: node, useSSL: useSSL}
+	mc.ClientType = clientType
 	mc.adminAPI = &AdminAPI{mc: mc}
 	mc.clientAPI = &ClientAPI{mc: mc}
 	mc.nodeAPI = &NodeAPI{mc: mc}
diff --git a/storage/extent_store.go b/storage/extent_store.go
index cb835e414d..ea63dfcfcc 100644
--- a/storage/extent_store.go
+++ b/storage/extent_store.go
@@ -421,6 +421,9 @@ func (s *ExtentStore) PutNormalExtentToDeleteCache(extentID uint64) {
 
 func (s *ExtentStore) IsDeletedNormalExtent(extentID uint64) (ok bool) {
 	_, ok = s.hasDeleteNormalExtentsCache.Load(extentID)
+	s.eiMutex.Lock()
+	delete(s.extentInfoMap,extentID)
+	s.eiMutex.Unlock()
 	return
 }
 
@@ -473,35 +476,7 @@ func (s *ExtentStore) GetTinyExtentOffset(extentID uint64) (watermark int64, err
 	return
 }
 
-// Sector size
-const (
-	DiskSectorSize = 512
-)
 
-func (s *ExtentStore) GetStoreUsedSize() (used int64) {
-	extentInfoSlice := make([]*ExtentInfo, 0, s.GetExtentCount())
-	s.eiMutex.RLock()
-	for _, extentID := range s.extentInfoMap {
-		extentInfoSlice = append(extentInfoSlice, extentID)
-	}
-	s.eiMutex.RUnlock()
-	for _, einfo := range extentInfoSlice {
-		if einfo.IsDeleted {
-			continue
-		}
-		if IsTinyExtent(einfo.FileID) {
-			stat := new(syscall.Stat_t)
-			err := syscall.Stat(fmt.Sprintf("%v/%v", s.dataPath, einfo.FileID), stat)
-			if err != nil {
-				continue
-			}
-			used += (stat.Blocks * DiskSectorSize)
-		} else {
-			used += int64(einfo.Size)
-		}
-	}
-	return
-}
 
 // GetAllWatermarks returns all the watermarks.
 func (s *ExtentStore) GetAllWatermarks(filter ExtentFilter) (extents []*ExtentInfo, tinyDeleteFileSize int64, err error) {
@@ -999,3 +974,32 @@ func (s *ExtentStore) TinyExtentAvaliOffset(extentID uint64, offset int64) (newO
 
 	return
 }
+
+const (
+	DiskSectorSize=512
+)
+
+func (s *ExtentStore)GetStoreUsedSize()(used int64){
+	extentInfoSlice := make([]*ExtentInfo, 0, s.GetExtentCount())
+	s.eiMutex.RLock()
+	for _, extentID := range s.extentInfoMap {
+		extentInfoSlice = append(extentInfoSlice, extentID)
+	}
+	s.eiMutex.RUnlock()
+	for _,einfo:=range extentInfoSlice{
+		if einfo.IsDeleted {
+			continue
+		}
+		if IsTinyExtent(einfo.FileID){
+			stat := new(syscall.Stat_t)
+			err := syscall.Stat(fmt.Sprintf("%v/%v", s.dataPath, einfo.FileID), stat)
+			if err != nil {
+				continue
+			}
+			used +=(stat.Blocks * DiskSectorSize)
+		}else {
+			used +=int64(einfo.Size)
+		}
+	}
+	return
+}
\ No newline at end of file
diff --git a/util/string.go b/util/string.go
index 8ae4aaa765..ae1e158557 100644
--- a/util/string.go
+++ b/util/string.go
@@ -65,3 +65,40 @@ func RandomString(length int, seed RandomSeed) string {
 	}
 	return result
 }
+func Intersect(string1, string2 []string) (inter []string) {
+	m := make(map[string]int)
+	for _, v := range string1 {
+		m[v]++
+	}
+
+	for _, v := range string2 {
+		times, ok := m[v]
+		if ok && times > 0 {
+			inter = append(inter, v)
+			m[v]--
+		}
+	}
+	return
+}
+
+func Projective(long, short []string) (result []string) {
+	if len(short) == 0 {
+		return long
+	}
+	if len(Intersect(long, short)) < len(short) {
+		return make([]string, 0)
+	}
+	m := make(map[string]int)
+	for _, v := range short {
+		m[v]++
+	}
+	for _, s := range long {
+		times, ok := m[s]
+		if times > 0 && ok {
+			m[s]--
+		} else {
+			result = append(result, s)
+		}
+	}
+	return result
+}
diff --git a/vendor/github.com/tiglabs/raft/raft.go b/vendor/github.com/tiglabs/raft/raft.go
index 74f3292acd..ef5a4cb63f 100644
--- a/vendor/github.com/tiglabs/raft/raft.go
+++ b/vendor/github.com/tiglabs/raft/raft.go
@@ -109,6 +109,7 @@ type raft struct {
 	prevHardSt        proto.HardState
 	peerState         peerState
 	pending           map[uint64]*Future
+	pendingCmd        map[uint64]proto.EntryType
 	snapping          map[uint64]*snapshotStatus
 	mStatus           *monitorStatus
 	propc             chan *proposal
@@ -149,6 +150,7 @@ func newRaft(config *Config, raftConfig *RaftConfig) (*raft, error) {
 		raftConfig:    raftConfig,
 		mStatus:       mStatus,
 		pending:       make(map[uint64]*Future),
+		pendingCmd:    make(map[uint64]proto.EntryType),
 		snapping:      make(map[uint64]*snapshotStatus),
 		recvc:         make(chan *proto.Message, config.ReqBufferSize),
 		applyc:        make(chan *apply, config.AppBufferSize),
@@ -230,7 +232,9 @@ func (s *raft) runApply() {
 			)
 			switch cmd := apply.command.(type) {
 			case *proto.ConfChange:
+				logger.Error("raft[%v] invoke ApplyMemberChange: cmd(%v) index(%v) futre(%v)", s.raftFsm.id, cmd, apply.index, apply.future)
 				resp, err = s.raftConfig.StateMachine.ApplyMemberChange(cmd, apply.index)
+				logger.Error("raft[%v] finish ApplyMemberChange: cmd(%v) index(%v) futre(%v)", s.raftFsm.id, cmd, apply.index, apply.future)
 			case []byte:
 				resp, err = s.raftConfig.StateMachine.Apply(cmd, apply.index)
 			}
@@ -290,6 +294,7 @@ func (s *raft) run() {
 			msg.From = s.config.NodeID
 			starti := s.raftFsm.raftLog.lastIndex() + 1
 			s.pending[starti] = pr.future
+			s.pendingCmd[starti] = pr.cmdType
 			msg.Entries = append(msg.Entries, &proto.Entry{Term: s.raftFsm.term, Index: starti, Type: pr.cmdType, Data: pr.data})
 			pool.returnProposal(pr)
 
@@ -299,6 +304,7 @@ func (s *raft) run() {
 				select {
 				case pr := <-s.propc:
 					s.pending[starti] = pr.future
+					s.pendingCmd[starti] = pr.cmdType
 					msg.Entries = append(msg.Entries, &proto.Entry{Term: s.raftFsm.term, Index: starti, Type: pr.cmdType, Data: pr.data})
 					pool.returnProposal(pr)
 				default:
@@ -308,6 +314,11 @@ func (s *raft) run() {
 					break
 				}
 			}
+			for _, entry := range msg.Entries {
+				if entry.Type == proto.EntryConfChange {
+					logger.Error("raft[%v] step EntryConfChange: index(%v) term(%v)", s.raftFsm.id, entry.Index, entry.Term)
+				}
+			}
 			s.raftFsm.Step(msg)
 
 		case m := <-s.recvc:
@@ -679,6 +690,7 @@ func (s *raft) apply() {
 		if future, ok := s.pending[entry.Index]; ok {
 			apply.future = future
 			delete(s.pending, entry.Index)
+			delete(s.pendingCmd, entry.Index)
 		}
 		apply.readIndexes = s.raftFsm.readOnly.getReady(entry.Index)
 
@@ -731,6 +743,7 @@ func (s *raft) resetPending(err error) {
 		for k, v := range s.pending {
 			v.respond(nil, err)
 			delete(s.pending, k)
+			delete(s.pendingCmd, k)
 		}
 	}
 }
@@ -770,6 +783,11 @@ func (s *raft) getStatus() *Status {
 	default:
 	}
 
+	pendingCmd := make(map[uint64]proto.EntryType)
+	for k, v := range s.pendingCmd {
+		pendingCmd[k] = v
+	}
+
 	st := &Status{
 		ID:                s.raftFsm.id,
 		NodeID:            s.config.NodeID,
@@ -782,6 +800,7 @@ func (s *raft) getStatus() *Status {
 		State:             s.raftFsm.state.String(),
 		RestoringSnapshot: s.restoringSnapshot.Get(),
 		PendQueue:         len(s.pending),
+		PendCmd:           pendingCmd,
 		RecvQueue:         len(s.recvc),
 		AppQueue:          len(s.applyc),
 		Stopped:           stopped,
diff --git a/vendor/github.com/tiglabs/raft/raft_fsm.go b/vendor/github.com/tiglabs/raft/raft_fsm.go
index 12e6cfad16..54a03ef9a9 100644
--- a/vendor/github.com/tiglabs/raft/raft_fsm.go
+++ b/vendor/github.com/tiglabs/raft/raft_fsm.go
@@ -20,9 +20,10 @@ import (
 	"math/rand"
 	"strings"
 
+	"time"
+
 	"github.com/tiglabs/raft/logger"
 	"github.com/tiglabs/raft/proto"
-	"time"
 )
 
 // NoLeader is a placeholder nodeID used when there is no leader.
diff --git a/vendor/github.com/tiglabs/raft/status.go b/vendor/github.com/tiglabs/raft/status.go
index b13d50744b..45f0e44719 100644
--- a/vendor/github.com/tiglabs/raft/status.go
+++ b/vendor/github.com/tiglabs/raft/status.go
@@ -17,6 +17,8 @@ package raft
 import (
 	"fmt"
 	"time"
+
+	"github.com/tiglabs/raft/proto"
 )
 
 // DownReplica  down replica
@@ -49,6 +51,7 @@ type Status struct {
 	Applied           uint64
 	Vote              uint64
 	PendQueue         int
+	PendCmd           map[uint64]proto.EntryType
 	RecvQueue         int
 	AppQueue          int
 	Stopped           bool