check: Add --download flag to check all the data, not just hashes

Milly · Feb 13, 2017 · 5419292 · 5419292
1 parent 370f242
commit 5419292
Show file tree

Hide file tree

Showing 4 changed files with 245 additions and 34 deletions.
diff --git a/cmd/check/check.go b/cmd/check/check.go
@@ -6,24 +6,39 @@ import (
 	"github.com/spf13/cobra"
 )
 
+// Globals
+var (
+	download = false
+)
+
 func init() {
 	cmd.Root.AddCommand(commandDefintion)
+	commandDefintion.Flags().BoolVarP(&download, "download", "", download, "Check by downloading rather than with hash.")
 }
 
 var commandDefintion = &cobra.Command{
 	Use:   "check source:path dest:path",
 	Short: `Checks the files in the source and destination match.`,
 	Long: `
-Checks the files in the source and destination match.  It
-compares sizes and MD5SUMs and prints a report of files which
-don't match.  It doesn't alter the source or destination.
+Checks the files in the source and destination match.  It compares
+sizes and hashes (MD5 or SHA1) and logs a report of files which don't
+match.  It doesn't alter the source or destination.
+
+If you supply the --size-only flag, it will only compare the sizes not
+the hashes as well.  Use this for a quick check.
 
-` + "`" + `--size-only` + "`" + ` may be used to only compare the sizes, not the MD5SUMs.
+If you supply the --download flag, it will download the data from
+both remotes and check them against each other on the fly.  This can
+be useful for remotes that don't support hashes or if you really want
+to check all the data.
 `,
 	Run: func(command *cobra.Command, args []string) {
 		cmd.CheckArgs(2, 2, command, args)
 		fsrc, fdst := cmd.NewFsSrcDst(args)
 		cmd.Run(false, false, command, func() error {
+			if download {
+				return fs.CheckDownload(fdst, fsrc)
+			}
 			return fs.Check(fdst, fsrc)
 		})
 	},

diff --git a/cmd/cryptcheck/cryptcheck.go b/cmd/cryptcheck/cryptcheck.go
@@ -68,13 +68,6 @@ func cryptCheck(fdst, fsrc fs.Fs) error {
 	// it returns true if differences were found
 	// it also returns whether it couldn't be hashed
 	checkIdentical := func(dst, src fs.Object) (differ bool, noHash bool) {
-		fs.Stats.Checking(src.Remote())
-		defer fs.Stats.DoneChecking(src.Remote())
-		if src.Size() != dst.Size() {
-			fs.Stats.Error()
-			fs.Errorf(src, "Sizes differ")
-			return true, false
-		}
 		cryptDst := dst.(*crypt.Object)
 		underlyingDst := cryptDst.UnWrap()
 		underlyingHash, err := underlyingDst.Hash(hashType)

diff --git a/fs/operations.go b/fs/operations.go
@@ -3,6 +3,7 @@
 package fs
 
 import (
+	"bytes"
 	"fmt"
 	"io"
 	"log"
@@ -669,29 +670,19 @@ func Overlapping(fdst, fsrc Info) bool {
 // it returns true if differences were found
 // it also returns whether it couldn't be hashed
 func checkIdentical(dst, src Object) (differ bool, noHash bool) {
-	Stats.Checking(src.Remote())
-	defer Stats.DoneChecking(src.Remote())
-	if src.Size() != dst.Size() {
-		Stats.Error()
-		Errorf(src, "Sizes differ")
+	same, hash, err := CheckHashes(src, dst)
+	if err != nil {
+		// CheckHashes will log and count errors
 		return true, false
 	}
-	if !Config.SizeOnly {
-		same, hash, err := CheckHashes(src, dst)
-		if err != nil {
-			// CheckHashes will log and count errors
-			return true, false
-		}
-		if hash == HashNone {
-			return false, true
-		}
-		if !same {
-			Stats.Error()
-			Errorf(src, "%v differ", hash)
-			return true, false
-		}
+	if hash == HashNone {
+		return false, true
+	}
+	if !same {
+		Stats.Error()
+		Errorf(src, "%v differ", hash)
+		return true, false
 	}
-	Debugf(src, "OK")
 	return false, false
 }
 
@@ -746,15 +737,31 @@ func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash
 		close(checks)
 	}()
 
+	checkIdentical := func(dst, src Object) (differ bool, noHash bool) {
+		Stats.Checking(src.Remote())
+		defer Stats.DoneChecking(src.Remote())
+		if src.Size() != dst.Size() {
+			Stats.Error()
+			Errorf(src, "Sizes differ")
+			return true, false
+		}
+		if Config.SizeOnly {
+			return false, false
+		}
+		return checkFunction(dst, src)
+	}
+
 	var checkerWg sync.WaitGroup
 	checkerWg.Add(Config.Checkers)
 	for i := 0; i < Config.Checkers; i++ {
 		go func() {
 			defer checkerWg.Done()
 			for check := range checks {
-				differ, noHash := checkFunction(check[0], check[1])
+				differ, noHash := checkIdentical(check[0], check[1])
 				if differ {
 					atomic.AddInt32(&differences, 1)
+				} else {
+					Debugf(check[0], "OK")
 				}
 				if noHash {
 					atomic.AddInt32(&noHashes, 1)
@@ -780,6 +787,88 @@ func Check(fdst, fsrc Fs) error {
 	return CheckFn(fdst, fsrc, checkIdentical)
 }
 
+// ReadFill reads as much data from r into buf as it can
+//
+// It reads until the buffer is full or r.Read returned an error.
+//
+// This is io.ReadFull but when you just want as much data as
+// possible, not an exact size of block.
+func ReadFill(r io.Reader, buf []byte) (n int, err error) {
+	var nn int
+	for n < len(buf) && err == nil {
+		nn, err = r.Read(buf[n:])
+		n += nn
+	}
+	return n, err
+}
+
+// CheckEqualReaders checks to see if in1 and in2 have the same
+// content when read.
+//
+// it returns true if differences were found
+func CheckEqualReaders(in1, in2 io.Reader) (differ bool, err error) {
+	const bufSize = 64 * 1024
+	buf1 := make([]byte, bufSize)
+	buf2 := make([]byte, bufSize)
+	for {
+		n1, err1 := ReadFill(in1, buf1)
+		n2, err2 := ReadFill(in2, buf2)
+		// check errors
+		if err1 != nil && err1 != io.EOF {
+			return true, err1
+		} else if err2 != nil && err2 != io.EOF {
+			return true, err2
+		}
+		// err1 && err2 are nil or io.EOF here
+		// process the data
+		if n1 != n2 || !bytes.Equal(buf1[:n1], buf2[:n2]) {
+			return true, nil
+		}
+		// if both streams finished the we have finished
+		if err1 == io.EOF && err2 == io.EOF {
+			break
+		}
+	}
+	return false, nil
+}
+
+// CheckIdentical checks to see if dst and src are identical by
+// reading all their bytes if necessary.
+//
+// it returns true if differences were found
+func CheckIdentical(dst, src Object) (differ bool, err error) {
+	in1, err := dst.Open()
+	if err != nil {
+		return true, errors.Wrapf(err, "failed to open %q", dst)
+	}
+	in1 = NewAccountWithBuffer(in1, dst) // account and buffer the transfer
+	defer CheckClose(in1, &err)
+
+	in2, err := src.Open()
+	if err != nil {
+		return true, errors.Wrapf(err, "failed to open %q", src)
+	}
+	in2 = NewAccountWithBuffer(in2, src) // account and buffer the transfer
+	defer CheckClose(in2, &err)
+
+	return CheckEqualReaders(in1, in2)
+}
+
+// CheckDownload checks the files in fsrc and fdst according to Size
+// and the actual contents of the files.
+func CheckDownload(fdst, fsrc Fs) error {
+	check := func(a, b Object) (differ bool, noHash bool) {
+		differ, err := CheckIdentical(a, b)
+		if err != nil {
+			Stats.Error()
+			Errorf(a, "Failed to download: %v", err)
+			return true, true
+		}
+		return differ, false
+	}
+	return CheckFn(fdst, fsrc, check)
+}
+
 // ListFn lists the Fs to the supplied function
 //
 // Lists in parallel which may get them out of order

diff --git a/fs/operations_test.go b/fs/operations_test.go
@@ -21,8 +21,10 @@ package fs_test
 
 import (
 	"bytes"
+	"errors"
 	"flag"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"log"
 	"os"
@@ -469,14 +471,14 @@ func TestDelete(t *testing.T) {
 	fstest.CheckItems(t, r.fremote, file3)
 }
 
-func TestCheck(t *testing.T) {
+func testCheck(t *testing.T, checkFunction func(fdst, fsrc fs.Fs) error) {
 	r := NewRun(t)
 	defer r.Finalise()
 
 	check := func(i int, wantErrors int64) {
 		fs.Debugf(r.fremote, "%d: Starting check test", i)
 		oldErrors := fs.Stats.GetErrors()
-		err := fs.Check(r.flocal, r.fremote)
+		err := checkFunction(r.flocal, r.fremote)
 		gotErrors := fs.Stats.GetErrors() - oldErrors
 		if wantErrors == 0 && err != nil {
 			t.Errorf("%d: Got error when not expecting one: %v", i, err)
@@ -517,6 +519,14 @@ func TestCheck(t *testing.T) {
 	check(5, 0)
 }
 
+func TestCheck(t *testing.T) {
+	testCheck(t, fs.Check)
+}
+
+func TestCheckDownload(t *testing.T) {
+	testCheck(t, fs.CheckDownload)
+}
+
 func TestCheckSizeOnly(t *testing.T) {
 	fs.Config.SizeOnly = true
 	defer func() { fs.Config.SizeOnly = false }()
@@ -954,3 +964,107 @@ func TestListDirSorted(t *testing.T) {
 	require.Len(t, items, 1)
 	assert.Equal(t, "sub dir/sub sub dir/", str(0))
 }
+
+type byteReader struct {
+	c byte
+}
+
+func (br *byteReader) Read(p []byte) (n int, err error) {
+	if br.c == 0 {
+		err = io.EOF
+	} else if len(p) >= 1 {
+		p[0] = br.c
+		n = 1
+		br.c--
+	}
+	return
+}
+
+func TestReadFill(t *testing.T) {
+	buf := []byte{9, 9, 9, 9, 9}
+
+	n, err := fs.ReadFill(&byteReader{0}, buf)
+	assert.Equal(t, io.EOF, err)
+	assert.Equal(t, 0, n)
+	assert.Equal(t, []byte{9, 9, 9, 9, 9}, buf)
+
+	n, err = fs.ReadFill(&byteReader{3}, buf)
+	assert.Equal(t, io.EOF, err)
+	assert.Equal(t, 3, n)
+	assert.Equal(t, []byte{3, 2, 1, 9, 9}, buf)
+
+	n, err = fs.ReadFill(&byteReader{8}, buf)
+	assert.Equal(t, nil, err)
+	assert.Equal(t, 5, n)
+	assert.Equal(t, []byte{8, 7, 6, 5, 4}, buf)
+}
+
+type errorReader struct {
+	err error
+}
+
+func (er errorReader) Read(p []byte) (n int, err error) {
+	return 0, er.err
+}
+
+func TestCheckEqualReaders(t *testing.T) {
+	b65a := make([]byte, 65*1024)
+	b65b := make([]byte, 65*1024)
+	b65b[len(b65b)-1] = 1
+	b66 := make([]byte, 66*1024)
+
+	differ, err := fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65a))
+	assert.NoError(t, err)
+	assert.Equal(t, differ, false)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65b))
+	assert.NoError(t, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b66))
+	assert.NoError(t, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), bytes.NewBuffer(b65a))
+	assert.NoError(t, err)
+	assert.Equal(t, differ, true)
+
+	myErr := errors.New("sentinel")
+	wrap := func(b []byte) io.Reader {
+		r := bytes.NewBuffer(b)
+		e := errorReader{myErr}
+		return io.MultiReader(r, e)
+	}
+
+	differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65a))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65b))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b66))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(wrap(b66), bytes.NewBuffer(b65a))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65a))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65b))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b66))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+
+	differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), wrap(b65a))
+	assert.Equal(t, myErr, err)
+	assert.Equal(t, differ, true)
+}