Skip to content

Commit

Permalink
attempt to real resolve when there is a quorum failure on reads (mini…
Browse files Browse the repository at this point in the history
  • Loading branch information
harshavardhana authored Apr 20, 2022
1 parent 73a6a60 commit 507f993
Show file tree
Hide file tree
Showing 28 changed files with 595 additions and 49 deletions.
1 change: 1 addition & 0 deletions .github/workflows/go-healing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ jobs:
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
make verify-healing
make verify-healing-inconsistent-versions
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ verify-healing: ## verify healing and replacing disks with minio binary
@(env bash $(PWD)/buildscripts/verify-healing.sh)
@(env bash $(PWD)/buildscripts/unaligned-healing.sh)

verify-healing-inconsistent-versions: ## verify resolving inconsistent versions
@echo "Verify resolving inconsistent versions build with race"
@CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
@(env bash $(PWD)/buildscripts/resolve-right-versions.sh)

build: checks ## builds minio to $(PWD)
@echo "Building minio binary to './minio'"
@CGO_ENABLED=0 go build -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
72 changes: 72 additions & 0 deletions buildscripts/resolve-right-versions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash -e

set -E
set -o pipefail
set -x

WORK_DIR="$PWD/.verify-$RANDOM"
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )

if [ ! -x "$PWD/minio" ]; then
echo "minio executable binary not found in current directory"
exit 1
fi

function start_minio_5drive() {
start_port=$1

export MINIO_ROOT_USER=minio
export MINIO_ROOT_PASSWORD=minio123
export MC_HOST_minio="http://minio:[email protected]:${start_port}/"
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
export MINIO_CI_CD=1

MC_BUILD_DIR="mc-$RANDOM"
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
echo "failed to download https://github.com/minio/mc"
purge "${MC_BUILD_DIR}"
exit 1
fi

(cd "${MC_BUILD_DIR}" && go build -o "$WORK_DIR/mc")

# remove mc source.
purge "${MC_BUILD_DIR}"

"${WORK_DIR}/mc" cp --quiet -r "buildscripts/cicd-corpus/" "${WORK_DIR}/cicd-corpus/"

"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/cicd-corpus/disk{1...5}" > "${WORK_DIR}/server1.log" 2>&1 &
pid=$!
disown $pid
sleep 30

if ! ps -p ${pid} 1>&2 >/dev/null; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi

"${WORK_DIR}/mc" stat minio/bucket/testobj

pkill minio
sleep 3
}

function main() {
start_port=$(shuf -i 10000-65000 -n 1)

start_minio_5drive ${start_port}
}

function purge()
{
rm -rf "$1"
}

( main "$@" )
rv=$?
purge "$WORK_DIR"
exit "$rv"
2 changes: 1 addition & 1 deletion cmd/erasure-healing-common.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime tim
groupNano := group.Nanoseconds()
// Ignore the uuid sentinel and count the rest.
for _, t := range times {
if t.Equal(timeSentinel) {
if t.Equal(timeSentinel) || t.IsZero() {
continue
}
nano := t.UnixNano()
Expand Down
12 changes: 10 additions & 2 deletions cmd/erasure-healing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,8 +758,10 @@ func TestHealObjectCorruptedPools(t *testing.T) {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}

fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing")
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}

err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
Expand All @@ -784,8 +786,10 @@ func TestHealObjectCorruptedPools(t *testing.T) {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}

fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing")
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}

// Test 4: checks if HealObject returns an error when xl.meta is not found
Expand Down Expand Up @@ -904,6 +908,8 @@ func TestHealObjectCorruptedXLMeta(t *testing.T) {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}

fi.DiskMTime = time.Time{}
nfi1.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi1) {
t.Fatalf("FileInfo not equal after healing")
}
Expand All @@ -925,6 +931,8 @@ func TestHealObjectCorruptedXLMeta(t *testing.T) {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}

fi.DiskMTime = time.Time{}
nfi2.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi2) {
t.Fatalf("FileInfo not equal after healing")
}
Expand Down
131 changes: 125 additions & 6 deletions cmd/erasure-object.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,26 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d
}
// Read metadata associated with the object from all disks.
storageDisks := er.getDisks()
metaArr, errs := readAllFileInfo(ctx, storageDisks, srcBucket, srcObject, srcOpts.VersionID, true)

// get Quorum for this object
var metaArr []FileInfo
var errs []error

// Read metadata associated with the object from all disks.
if srcOpts.VersionID != "" {
metaArr, errs = readAllFileInfo(ctx, storageDisks, srcBucket, srcObject, srcOpts.VersionID, true)
} else {
metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true)
}

readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount)
if err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(srcBucket, minioMetaBucket) {
_, derr := er.deleteIfDangling(ctx, srcBucket, srcObject, metaArr, errs, nil, srcOpts)
if derr != nil {
err = derr
}
}
return ObjectInfo{}, toObjectErr(err, srcBucket, srcObject)
}

// List all online disks.
Expand Down Expand Up @@ -436,11 +450,90 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st
return m, err
}

func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData bool) ([]FileInfo, []error) {
metadataArray := make([]*xlMetaV2, len(disks))
metaFileInfos := make([]FileInfo, len(metadataArray))
metadataShallowVersions := make([][]xlMetaV2ShallowVersion, len(disks))

g := errgroup.WithNErrs(len(disks))
// Read `xl.meta` in parallel across disks.
for index := range disks {
index := index
g.Go(func() (err error) {
if disks[index] == nil {
return errDiskNotFound
}
rf, err := disks[index].ReadXL(ctx, bucket, object, readData)
if err != nil {
if !IsErr(err, []error{
errFileNotFound,
errVolumeNotFound,
errFileVersionNotFound,
errDiskNotFound,
}...) {
logger.LogOnceIf(ctx, fmt.Errorf("Drive %s, path (%s/%s) returned an error (%w)",
disks[index], bucket, object, err),
disks[index].String())
}
return err
}

var xl xlMetaV2
if err = xl.LoadOrConvert(rf.Buf); err != nil {
return err
}
metadataArray[index] = &xl
metaFileInfos[index] = FileInfo{
DiskMTime: rf.DiskMTime,
}
return nil
}, index)
}

errs := g.Wait()
for index := range metadataArray {
if metadataArray[index] != nil {
metadataShallowVersions[index] = metadataArray[index].versions
}
}

readQuorum := (len(disks) + 1) / 2
merged := mergeXLV2Versions(readQuorum, false, 1, metadataShallowVersions...)
for index := range metadataArray {
if metadataArray[index] == nil {
continue
}

metadataArray[index].versions = merged

// make sure to preserve this for diskmtime based healing bugfix.
diskMTime := metaFileInfos[index].DiskMTime
metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, "")
if errs[index] == nil {
versionID := metaFileInfos[index].VersionID
if versionID == "" {
versionID = nullVersionID
}
metaFileInfos[index].Data = metadataArray[index].data.find(versionID)
metaFileInfos[index].DiskMTime = diskMTime
}
}

// Return all the metadata.
return metaFileInfos, errs
}

func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object string, opts ObjectOptions, readData bool) (fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI, err error) {
disks := er.getDisks()

var errs []error

// Read metadata associated with the object from all disks.
metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, readData)
if opts.VersionID != "" {
metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, readData)
} else {
metaArr, errs = readAllXL(ctx, disks, bucket, object, readData)
}

readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount)
if err != nil {
Expand Down Expand Up @@ -1453,11 +1546,24 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s

disks := er.getDisks()

var metaArr []FileInfo
var errs []error

// Read metadata associated with the object from all disks.
metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false)
if opts.VersionID != "" {
metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false)
} else {
metaArr, errs = readAllXL(ctx, disks, bucket, object, false)
}

readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount)
if err != nil {
if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(bucket, minioMetaBucket) {
_, derr := er.deleteIfDangling(ctx, bucket, object, metaArr, errs, nil, opts)
if derr != nil {
err = derr
}
}
return ObjectInfo{}, toObjectErr(err, bucket, object)
}

Expand Down Expand Up @@ -1513,11 +1619,24 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin

disks := er.getDisks()

var metaArr []FileInfo
var errs []error

// Read metadata associated with the object from all disks.
metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false)
if opts.VersionID != "" {
metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false)
} else {
metaArr, errs = readAllXL(ctx, disks, bucket, object, false)
}

readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount)
if err != nil {
if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(bucket, minioMetaBucket) {
_, derr := er.deleteIfDangling(ctx, bucket, object, metaArr, errs, nil, opts)
if derr != nil {
err = derr
}
}
return ObjectInfo{}, toObjectErr(err, bucket, object)
}

Expand Down
7 changes: 7 additions & 0 deletions cmd/naughty-disk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,13 @@ func (d *naughtyDisk) ReadAll(ctx context.Context, volume string, path string) (
return d.disk.ReadAll(ctx, volume, path)
}

func (d *naughtyDisk) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) {
if err := d.calcError(); err != nil {
return rf, err
}
return d.disk.ReadXL(ctx, volume, path, readData)
}

func (d *naughtyDisk) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error {
if err := d.calcError(); err != nil {
return err
Expand Down
14 changes: 14 additions & 0 deletions cmd/storage-datatypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,23 @@ func (f *FileInfoVersions) findVersionIndex(v string) int {
return -1
}

// RawFileInfo - represents raw file stat information as byte array.
// The above means that any added/deleted fields are incompatible.
// Make sure to bump the internode version at storage-rest-common.go
type RawFileInfo struct {
// Content of entire xl.meta (may contain data depending on what was requested by the caller.
Buf []byte `msg:"b"`

// DiskMTime indicates the mtime of the xl.meta on disk
// This is mainly used for detecting a particular issue
// reported in https://github.com/minio/minio/pull/13803
DiskMTime time.Time `msg:"dmt"`
}

// FileInfo - represents file stat information.
//msgp:tuple FileInfo
// The above means that any added/deleted fields are incompatible.
// Make sure to bump the internode version at storage-rest-common.go
type FileInfo struct {
// Name of the volume.
Volume string `msg:"v,omitempty"`
Expand Down
Loading

0 comments on commit 507f993

Please sign in to comment.