Skip to content

Commit

Permalink
add hash-set debugging currently supports SIPMOD (minio#13911)
Browse files Browse the repository at this point in the history
fixes a bug in s3-check-md5 for single part
uploaded multipart objects.
  • Loading branch information
harshavardhana authored Dec 15, 2021
1 parent b42cfce commit 4fa250a
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 3 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ xl-meta*
healing-bin*
inspect*
200M*
hash-set
minio.RELEASE*
mc
nancy
99 changes: 99 additions & 0 deletions docs/debugging/hash-set/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

package main

import (
"encoding/binary"
"flag"
"fmt"
"hash/crc32"
"log"

"github.com/dchest/siphash"
"github.com/google/uuid"
)

// hashes the key returning an integer based on the input algorithm.
// This function currently supports
// - SIPMOD
func sipHashMod(key string, cardinality int, id [16]byte) int {
if cardinality <= 0 {
return -1
}
// use the faster version as per siphash docs
// https://github.com/dchest/siphash#usage
k0, k1 := binary.LittleEndian.Uint64(id[0:8]), binary.LittleEndian.Uint64(id[8:16])
sum64 := siphash.Hash(k0, k1, []byte(key))
return int(sum64 % uint64(cardinality))
}

// hashOrder - hashes input key to return consistent
// hashed integer slice. Returned integer order is salted
// with an input key. This results in consistent order.
// NOTE: collisions are fine, we are not looking for uniqueness
// in the slices returned.
func hashOrder(key string, cardinality int) []int {
if cardinality <= 0 {
// Returns an empty int slice for cardinality < 0.
return nil
}

nums := make([]int, cardinality)
keyCrc := crc32.Checksum([]byte(key), crc32.IEEETable)

start := int(keyCrc % uint32(cardinality))
for i := 1; i <= cardinality; i++ {
nums[i-1] = 1 + ((start + i) % cardinality)
}
return nums
}

var (
object, deploymentID string
setCount, shards int
)

func main() {
flag.StringVar(&object, "object", "", "Select an object")
flag.StringVar(&deploymentID, "deployment-id", "", "MinIO deployment ID, obtained from 'format.json'")
flag.IntVar(&setCount, "set-count", 0, "Total set count")
flag.IntVar(&shards, "shards", 0, "Total shards count")

flag.Parse()

if object == "" {
log.Fatalln("object name is mandatory")
}

if deploymentID == "" {
log.Fatalln("deployment ID is mandatory")
}

if setCount == 0 {
log.Fatalln("set count cannot be zero")
}

if shards == 0 {
log.Fatalln("total shards cannot be zero")
}

id := uuid.MustParse(deploymentID)

fmt.Println("Erasure distribution for the object", hashOrder(object, shards))
fmt.Println("Erasure setNumber for the object", sipHashMod(object, setCount, id)+1)
}
7 changes: 4 additions & 3 deletions docs/debugging/s3-check-md5/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ func main() {
continue
}
parts := 1
multipart := false
s := strings.Split(object.ETag, "-")
switch len(s) {
case 1:
Expand All @@ -124,6 +125,7 @@ func main() {
log.Println("ETAG: wrong format:", err)
continue
}
multipart = true
default:
log.Println("Unexpected ETAG format", object.ETag)
continue
Expand All @@ -148,13 +150,12 @@ func main() {

corrupted := false

switch parts {
case 1:
if !multipart {
md5sum := fmt.Sprintf("%x", partsMD5Sum[0])
if md5sum != object.ETag {
corrupted = true
}
default:
} else {
var totalMD5SumBytes []byte
for _, sum := range partsMD5Sum {
totalMD5SumBytes = append(totalMD5SumBytes, sum...)
Expand Down

0 comments on commit 4fa250a

Please sign in to comment.