Skip to content

Commit

Permalink
check: Add --download flag to check all the data, not just hashes
Browse files Browse the repository at this point in the history
  • Loading branch information
ncw committed Feb 13, 2017
1 parent 370f242 commit 5419292
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 34 deletions.
23 changes: 19 additions & 4 deletions cmd/check/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,39 @@ import (
"github.com/spf13/cobra"
)

// Globals
var (
download = false
)

func init() {
cmd.Root.AddCommand(commandDefintion)
commandDefintion.Flags().BoolVarP(&download, "download", "", download, "Check by downloading rather than with hash.")
}

var commandDefintion = &cobra.Command{
Use: "check source:path dest:path",
Short: `Checks the files in the source and destination match.`,
Long: `
Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination.
Checks the files in the source and destination match. It compares
sizes and hashes (MD5 or SHA1) and logs a report of files which don't
match. It doesn't alter the source or destination.
If you supply the --size-only flag, it will only compare the sizes not
the hashes as well. Use this for a quick check.
` + "`" + `--size-only` + "`" + ` may be used to only compare the sizes, not the MD5SUMs.
If you supply the --download flag, it will download the data from
both remotes and check them against each other on the fly. This can
be useful for remotes that don't support hashes or if you really want
to check all the data.
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(2, 2, command, args)
fsrc, fdst := cmd.NewFsSrcDst(args)
cmd.Run(false, false, command, func() error {
if download {
return fs.CheckDownload(fdst, fsrc)
}
return fs.Check(fdst, fsrc)
})
},
Expand Down
7 changes: 0 additions & 7 deletions cmd/cryptcheck/cryptcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,6 @@ func cryptCheck(fdst, fsrc fs.Fs) error {
// it returns true if differences were found
// it also returns whether it couldn't be hashed
checkIdentical := func(dst, src fs.Object) (differ bool, noHash bool) {
fs.Stats.Checking(src.Remote())
defer fs.Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
fs.Stats.Error()
fs.Errorf(src, "Sizes differ")
return true, false
}
cryptDst := dst.(*crypt.Object)
underlyingDst := cryptDst.UnWrap()
underlyingHash, err := underlyingDst.Hash(hashType)
Expand Down
131 changes: 110 additions & 21 deletions fs/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package fs

import (
"bytes"
"fmt"
"io"
"log"
Expand Down Expand Up @@ -669,29 +670,19 @@ func Overlapping(fdst, fsrc Info) bool {
// it returns true if differences were found
// it also returns whether it couldn't be hashed
func checkIdentical(dst, src Object) (differ bool, noHash bool) {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
Stats.Error()
Errorf(src, "Sizes differ")
same, hash, err := CheckHashes(src, dst)
if err != nil {
// CheckHashes will log and count errors
return true, false
}
if !Config.SizeOnly {
same, hash, err := CheckHashes(src, dst)
if err != nil {
// CheckHashes will log and count errors
return true, false
}
if hash == HashNone {
return false, true
}
if !same {
Stats.Error()
Errorf(src, "%v differ", hash)
return true, false
}
if hash == HashNone {
return false, true
}
if !same {
Stats.Error()
Errorf(src, "%v differ", hash)
return true, false
}
Debugf(src, "OK")
return false, false
}

Expand Down Expand Up @@ -746,15 +737,31 @@ func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash
close(checks)
}()

checkIdentical := func(dst, src Object) (differ bool, noHash bool) {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
Stats.Error()
Errorf(src, "Sizes differ")
return true, false
}
if Config.SizeOnly {
return false, false
}
return checkFunction(dst, src)
}

var checkerWg sync.WaitGroup
checkerWg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go func() {
defer checkerWg.Done()
for check := range checks {
differ, noHash := checkFunction(check[0], check[1])
differ, noHash := checkIdentical(check[0], check[1])
if differ {
atomic.AddInt32(&differences, 1)
} else {
Debugf(check[0], "OK")
}
if noHash {
atomic.AddInt32(&noHashes, 1)
Expand All @@ -780,6 +787,88 @@ func Check(fdst, fsrc Fs) error {
return CheckFn(fdst, fsrc, checkIdentical)
}

// ReadFill reads as much data from r into buf as it can
//
// It reads until the buffer is full or r.Read returned an error.
//
// This is io.ReadFull but when you just want as much data as
// possible, not an exact size of block.
func ReadFill(r io.Reader, buf []byte) (n int, err error) {
var nn int
for n < len(buf) && err == nil {
nn, err = r.Read(buf[n:])
n += nn
}
return n, err
}

// CheckEqualReaders checks to see if in1 and in2 have the same
// content when read.
//
// it returns true if differences were found
func CheckEqualReaders(in1, in2 io.Reader) (differ bool, err error) {
const bufSize = 64 * 1024
buf1 := make([]byte, bufSize)
buf2 := make([]byte, bufSize)
for {
n1, err1 := ReadFill(in1, buf1)
n2, err2 := ReadFill(in2, buf2)
// check errors
if err1 != nil && err1 != io.EOF {
return true, err1
} else if err2 != nil && err2 != io.EOF {
return true, err2
}
// err1 && err2 are nil or io.EOF here
// process the data
if n1 != n2 || !bytes.Equal(buf1[:n1], buf2[:n2]) {
return true, nil
}
// if both streams finished the we have finished
if err1 == io.EOF && err2 == io.EOF {
break
}
}
return false, nil
}

// CheckIdentical checks to see if dst and src are identical by
// reading all their bytes if necessary.
//
// it returns true if differences were found
func CheckIdentical(dst, src Object) (differ bool, err error) {
in1, err := dst.Open()
if err != nil {
return true, errors.Wrapf(err, "failed to open %q", dst)
}
in1 = NewAccountWithBuffer(in1, dst) // account and buffer the transfer
defer CheckClose(in1, &err)

in2, err := src.Open()
if err != nil {
return true, errors.Wrapf(err, "failed to open %q", src)
}
in2 = NewAccountWithBuffer(in2, src) // account and buffer the transfer
defer CheckClose(in2, &err)

return CheckEqualReaders(in1, in2)
}

// CheckDownload checks the files in fsrc and fdst according to Size
// and the actual contents of the files.
func CheckDownload(fdst, fsrc Fs) error {
check := func(a, b Object) (differ bool, noHash bool) {
differ, err := CheckIdentical(a, b)
if err != nil {
Stats.Error()
Errorf(a, "Failed to download: %v", err)
return true, true
}
return differ, false
}
return CheckFn(fdst, fsrc, check)
}

// ListFn lists the Fs to the supplied function
//
// Lists in parallel which may get them out of order
Expand Down
118 changes: 116 additions & 2 deletions fs/operations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ package fs_test

import (
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
Expand Down Expand Up @@ -469,14 +471,14 @@ func TestDelete(t *testing.T) {
fstest.CheckItems(t, r.fremote, file3)
}

func TestCheck(t *testing.T) {
func testCheck(t *testing.T, checkFunction func(fdst, fsrc fs.Fs) error) {
r := NewRun(t)
defer r.Finalise()

check := func(i int, wantErrors int64) {
fs.Debugf(r.fremote, "%d: Starting check test", i)
oldErrors := fs.Stats.GetErrors()
err := fs.Check(r.flocal, r.fremote)
err := checkFunction(r.flocal, r.fremote)
gotErrors := fs.Stats.GetErrors() - oldErrors
if wantErrors == 0 && err != nil {
t.Errorf("%d: Got error when not expecting one: %v", i, err)
Expand Down Expand Up @@ -517,6 +519,14 @@ func TestCheck(t *testing.T) {
check(5, 0)
}

func TestCheck(t *testing.T) {
testCheck(t, fs.Check)
}

func TestCheckDownload(t *testing.T) {
testCheck(t, fs.CheckDownload)
}

func TestCheckSizeOnly(t *testing.T) {
fs.Config.SizeOnly = true
defer func() { fs.Config.SizeOnly = false }()
Expand Down Expand Up @@ -954,3 +964,107 @@ func TestListDirSorted(t *testing.T) {
require.Len(t, items, 1)
assert.Equal(t, "sub dir/sub sub dir/", str(0))
}

type byteReader struct {
c byte
}

func (br *byteReader) Read(p []byte) (n int, err error) {
if br.c == 0 {
err = io.EOF
} else if len(p) >= 1 {
p[0] = br.c
n = 1
br.c--
}
return
}

func TestReadFill(t *testing.T) {
buf := []byte{9, 9, 9, 9, 9}

n, err := fs.ReadFill(&byteReader{0}, buf)
assert.Equal(t, io.EOF, err)
assert.Equal(t, 0, n)
assert.Equal(t, []byte{9, 9, 9, 9, 9}, buf)

n, err = fs.ReadFill(&byteReader{3}, buf)
assert.Equal(t, io.EOF, err)
assert.Equal(t, 3, n)
assert.Equal(t, []byte{3, 2, 1, 9, 9}, buf)

n, err = fs.ReadFill(&byteReader{8}, buf)
assert.Equal(t, nil, err)
assert.Equal(t, 5, n)
assert.Equal(t, []byte{8, 7, 6, 5, 4}, buf)
}

type errorReader struct {
err error
}

func (er errorReader) Read(p []byte) (n int, err error) {
return 0, er.err
}

func TestCheckEqualReaders(t *testing.T) {
b65a := make([]byte, 65*1024)
b65b := make([]byte, 65*1024)
b65b[len(b65b)-1] = 1
b66 := make([]byte, 66*1024)

differ, err := fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65a))
assert.NoError(t, err)
assert.Equal(t, differ, false)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65b))
assert.NoError(t, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b66))
assert.NoError(t, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), bytes.NewBuffer(b65a))
assert.NoError(t, err)
assert.Equal(t, differ, true)

myErr := errors.New("sentinel")
wrap := func(b []byte) io.Reader {
r := bytes.NewBuffer(b)
e := errorReader{myErr}
return io.MultiReader(r, e)
}

differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65b))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b66))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(wrap(b66), bytes.NewBuffer(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65b))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b66))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)

differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), wrap(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
}

0 comments on commit 5419292

Please sign in to comment.