Skip to content

Commit

Permalink
Revert part of aquasecurity#51 (aquasecurity#64)
Browse files Browse the repository at this point in the history
* Fix getFilteredTarballBuffer function

* Add dotfilepath tar.gz

* Add dot filepath test

* fix test name

* change test filename

* Add empty files

* fix(docker): remove getFilteredTarballBuffer

* test(docker): remove tests for TestDocker_ExtractLayerWorker

* test(docker): remove unused files

Co-authored-by: Teppei Fukuda <[email protected]>
  • Loading branch information
masahiro331 and knqyf263 committed Jan 5, 2020
1 parent 0939236 commit a3284d4
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 197 deletions.
99 changes: 26 additions & 73 deletions extractor/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ func (d Extractor) Extract(ctx context.Context, imageName string, filenames []st
for _, ref := range m.Manifest.Layers {
layerIDs = append(layerIDs, string(ref.Digest))
go func(dig digest.Digest) {
d.extractLayerWorker(dig, r, ctx, image, errCh, layerCh, filenames)
d.extractLayerWorker(dig, r, ctx, image, errCh, layerCh)
}(ref.Digest)
}

Expand Down Expand Up @@ -289,50 +289,40 @@ func (d Extractor) extractLayerFiles(ctx context.Context, layerCh chan layer, er
return nil
}

func (d Extractor) extractLayerWorker(dig digest.Digest, r *registry.Registry, ctx context.Context, image registry.Image, errCh chan error, layerCh chan layer, filenames []string) {
var tarContent bytes.Buffer
func (d Extractor) extractLayerWorker(dig digest.Digest, r *registry.Registry, ctx context.Context, image registry.Image, errCh chan error, layerCh chan layer) {
var cacheContent []byte
var cacheBuf bytes.Buffer

found, _ := d.cache.Get(LayerTarsBucket, string(dig), &cacheContent)

if found {
b, errTar := extractTarFromTarZstd(cacheContent)
n, errWrite := cacheBuf.Write(b)
if errTar != nil || len(b) <= 0 || errWrite != nil || n <= 0 {
found = false
}
}

if !found {
rc, err := r.DownloadLayer(ctx, image.Path, dig)
if err != nil {
errCh <- xerrors.Errorf("failed to download the layer(%s): %w", dig, err)
b, err := extractTarFromTarZstd(cacheContent)
if err == nil && len(b) > 0 {
cacheBuf := bytes.NewBuffer(b)
layerCh <- layer{ID: dig, Content: ioutil.NopCloser(cacheBuf)}
return
}
defer rc.Close()

// read the incoming gzip from the layer
gzipReader, err := gzip.NewReader(rc)
if err != nil {
errCh <- xerrors.Errorf("could not init gzip reader: %w", err)
return
}
defer gzipReader.Close()

tarReader := tar.NewReader(io.TeeReader(gzipReader, &tarContent))
}

if len(filenames) > 0 {
if cacheBuf, err = getFilteredTarballBuffer(tarReader, filenames); err != nil {
errCh <- err
return
}
}
rc, err := r.DownloadLayer(ctx, image.Path, dig)
if err != nil {
errCh <- xerrors.Errorf("failed to download the layer(%s): %w", dig, err)
return
}
defer rc.Close()

d.storeLayerInCache(cacheBuf, dig)
// read the incoming gzip from the layer
gzipReader, err := gzip.NewReader(rc)
if err != nil {
errCh <- xerrors.Errorf("could not init gzip reader: %w", err)
return
}
defer gzipReader.Close()

b := bytes.NewBuffer(nil)
tr := io.TeeReader(gzipReader, b)
d.storeLayerInCache(tr, dig)
layerCh <- layer{ID: dig, Content: ioutil.NopCloser(b)}

layerCh <- layer{ID: dig, Content: ioutil.NopCloser(&cacheBuf)}
return
}

Expand All @@ -352,48 +342,11 @@ func extractTarFromTarZstd(cacheContent []byte) ([]byte, error) {
return tarContent, nil
}

func getFilteredTarballBuffer(tr *tar.Reader, requiredFilenames []string) (bytes.Buffer, error) {
var cacheBuf bytes.Buffer
// Create a new tar to store in the cache
twc := tar.NewWriter(&cacheBuf)
defer twc.Close()

// check what files are inside the tar
for {
hdr, err := tr.Next()
if err == io.EOF {
break // end of archive
}
if err != nil {
return cacheBuf, xerrors.Errorf("%s: invalid tar: %w", ErrFailedCacheWrite, err)
}
if !utils.StringInSlice(hdr.Name, requiredFilenames) {
continue
}

hdrtwc := &tar.Header{
Name: hdr.Name,
Mode: 0600,
Size: hdr.Size,
}

if err := twc.WriteHeader(hdrtwc); err != nil {
return cacheBuf, xerrors.Errorf("%s: %s", ErrFailedCacheWrite, err)
}

_, err = io.Copy(twc, tr)
if err != nil {
return cacheBuf, xerrors.Errorf("%s: %s", ErrFailedCacheWrite, err)
}
}
return cacheBuf, nil
}

func (d Extractor) storeLayerInCache(cacheBuf bytes.Buffer, dig digest.Digest) {
func (d Extractor) storeLayerInCache(r io.Reader, dig digest.Digest) {
// compress tar to zstd before storing to cache
var dst bytes.Buffer
w, _ := zstd.NewWriter(&dst, zstd.WithEncoderLevel(zstd.SpeedFastest))
_, _ = io.Copy(w, &cacheBuf)
_, _ = io.Copy(w, r)
_ = w.Close()

if err := d.cache.Set(LayerTarsBucket, string(dig), dst.Bytes()); err != nil {
Expand Down
124 changes: 0 additions & 124 deletions extractor/docker/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ import (
"github.com/aquasecurity/fanal/extractor"
"github.com/aquasecurity/fanal/types"
"github.com/docker/docker/client"
"github.com/genuinetools/reg/registry"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// TODO: Use a memory based FS rather than actual fs
Expand Down Expand Up @@ -405,127 +402,6 @@ func TestDockerExtractor_Extract(t *testing.T) {
}
}

func TestDocker_ExtractLayerWorker(t *testing.T) {
goodtarzstdgolden, _ := ioutil.ReadFile("testdata/testdir.tar.zstd")
goodReturnedTarContent, _ := ioutil.ReadFile("testdata/goodTarContent.golden")

testCases := []struct {
name string
cacheHit bool
garbageCache bool
requiredFiles []string
expectedCacheContents []byte
expectedReturnedTarContent []byte
}{
{
name: "happy path with cache miss and write back",
cacheHit: false,
requiredFiles: []string{"testdir/helloworld.txt", "testdir/badworld.txt"},
expectedCacheContents: goodtarzstdgolden,
expectedReturnedTarContent: goodReturnedTarContent,
},
{
name: "happy path with cache hit with garbage cache and write back",
cacheHit: true,
garbageCache: true,
requiredFiles: []string{"testdir/helloworld.txt", "testdir/badworld.txt"},
expectedCacheContents: goodtarzstdgolden,
expectedReturnedTarContent: goodReturnedTarContent,
},
{
name: "happy path with cache hit",
cacheHit: true,
expectedCacheContents: goodtarzstdgolden,
expectedReturnedTarContent: goodReturnedTarContent,
},
{
name: "happy path with cache miss but no write back",
cacheHit: false,
expectedCacheContents: []byte{0x28, 0xb5, 0x2f, 0xfd, 0x4, 0x60, 0x1, 0x0, 0x0, 0x99, 0xe9, 0xd8, 0x51}, // just the empty tar header
expectedReturnedTarContent: []byte{},
},
}

for _, tc := range testCases {
inputDigest := digest.Digest("sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b")

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
httpPath := r.URL.String()
switch {
case strings.Contains(httpPath, "/v2/library/fooimage/blobs/sha256:62d8908bee94c202b2d35224a221aaa2058318bfa9879fa541efaecba272331b"):
layerData, _ := ioutil.ReadFile("testdata/testdir.tar.gz")
_, _ = w.Write(layerData)
default:
assert.FailNow(t, "unexpected path accessed: ", fmt.Sprintf("%s %s", r.URL.String(), tc.name))
}
}))
defer ts.Close()

c, err := client.NewClientWithOpts(client.WithHost(ts.URL))
assert.NoError(t, err)

// setup cache
s, tmpDir, err := setupCache()
require.NoError(t, err, tc.name)
defer os.RemoveAll(tmpDir)

if tc.cacheHit {
switch tc.garbageCache {
case true:
garbage, _ := ioutil.ReadFile("testdata/invalidgzvalidtar.tar.gz")
assert.NoError(t, s.Set(LayerTarsBucket, string(inputDigest), garbage))
default:
assert.NoError(t, s.Set(LayerTarsBucket, string(inputDigest), goodtarzstdgolden))
}
}

de := Extractor{
Option: types.DockerOption{
AuthURL: ts.URL,
NonSSL: true,
SkipPing: true,
Timeout: time.Second * 1000,
},
Client: c,
cache: s,
}

tsUrl := strings.TrimPrefix(ts.URL, "http://")
inputImage := registry.Image{
Domain: tsUrl,
Path: "library/fooimage",
Tag: "latest",
}

layerCh := make(chan layer)
errCh := make(chan error)
r, err := de.createRegistryClient(context.TODO(), inputImage.Domain)
go func() {
de.extractLayerWorker(inputDigest, r, context.TODO(), inputImage, errCh, layerCh, tc.requiredFiles)
}()

var errRecieved error
var layerReceived layer

select {
case errRecieved = <-errCh:
assert.FailNow(t, "unexpected error received, err: ", fmt.Sprintf("%s, %s", errRecieved, tc.name))
case layerReceived = <-layerCh:
assert.Equal(t, inputDigest, layerReceived.ID, tc.name)
got, _ := ioutil.ReadAll(layerReceived.Content)
assert.Equal(t, tc.expectedReturnedTarContent, got, tc.name)
}

// check cache contents
var actualCacheContents []byte
found, err := s.Get(LayerTarsBucket, string(inputDigest), &actualCacheContents)

assert.True(t, found, tc.name)
assert.NoError(t, err, tc.name)
assert.Equal(t, tc.expectedCacheContents, actualCacheContents, tc.name)
}
}

func TestDocker_ExtractLayerFiles(t *testing.T) {
de := Extractor{}

Expand Down
Binary file removed extractor/docker/testdata/goodTarContent.golden
Binary file not shown.
Binary file removed extractor/docker/testdata/testdir.tar.gz
Binary file not shown.
Binary file removed extractor/docker/testdata/testdir.tar.zstd
Binary file not shown.

0 comments on commit a3284d4

Please sign in to comment.