Skip to content

Commit

Permalink
New option in filestream: include_files && check after symlink is…
Browse files Browse the repository at this point in the history
… resolved (elastic#25080)

## What does this PR do?

This PR adds support for a new feature in `filestream` input named `include_files`. This option is the counterpart of `exclude_files`. It expects a list of regexes and it only includes files that match the regexes.

The PR also adds one more check after symlinks are resolved, the original file is tested again with the regexes in `include_files` and `exclude_files`.

## Why is it important?

`exclude_files` was not enough for disallowing files.
  • Loading branch information
kvch authored Apr 22, 2021
1 parent 639998c commit dfb98b2
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 9 deletions.
4 changes: 4 additions & 0 deletions filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down
4 changes: 4 additions & 0 deletions filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down
24 changes: 23 additions & 1 deletion filebeat/input/filestream/fswatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type watcherFactory func(paths []string, cfg *common.Config) (loginp.FSWatcher,
type fileScanner struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool

log *logp.Logger
Expand Down Expand Up @@ -234,6 +235,7 @@ func (w *fileWatcher) GetFiles() map[string]os.FileInfo {

type fileScannerConfig struct {
ExcludedFiles []match.Matcher `config:"exclude_files"`
IncludedFiles []match.Matcher `config:"include_files"`
Symlinks bool `config:"symlinks"`
RecursiveGlob bool `config:"recursive_glob"`
}
Expand All @@ -249,6 +251,7 @@ func newFileScanner(paths []string, cfg fileScannerConfig) (loginp.FSScanner, er
fs := fileScanner{
paths: paths,
excludedFiles: cfg.ExcludedFiles,
includedFiles: cfg.IncludedFiles,
symlinks: cfg.Symlinks,
log: logp.NewLogger(scannerName),
}
Expand Down Expand Up @@ -337,7 +340,7 @@ func (s *fileScanner) GetFiles() map[string]os.FileInfo {
}

func (s *fileScanner) shouldSkipFile(file string) bool {
if s.isFileExcluded(file) {
if s.isFileExcluded(file) || !s.isFileIncluded(file) {
s.log.Debugf("Exclude file: %s", file)
return true
}
Expand All @@ -359,6 +362,18 @@ func (s *fileScanner) shouldSkipFile(file string) bool {
return true
}

originalFile, err := filepath.EvalSymlinks(file)
if err != nil {
s.log.Debugf("finding path to original file has failed %s: %+v", file, err)
return true
}
// Check if original file is included to make sure we are not reading from
// unwanted files.
if s.isFileExcluded(originalFile) || !s.isFileIncluded(originalFile) {
s.log.Debugf("Exclude original file: %s", file)
return true
}

return false
}

Expand All @@ -384,6 +399,13 @@ func (s *fileScanner) isFileExcluded(file string) bool {
return len(s.excludedFiles) > 0 && s.matchAny(s.excludedFiles, file)
}

func (s *fileScanner) isFileIncluded(file string) bool {
if len(s.includedFiles) == 0 {
return true
}
return s.matchAny(s.includedFiles, file)
}

// matchAny checks if the text matches any of the regular expressions
func (s *fileScanner) matchAny(matchers []match.Matcher, text string) bool {
for _, m := range matchers {
Expand Down
9 changes: 9 additions & 0 deletions filebeat/input/filestream/fswatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func TestFileScanner(t *testing.T) {
testCases := map[string]struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool
expectedFiles []string
}{
Expand All @@ -66,6 +67,13 @@ func TestFileScanner(t *testing.T) {
},
expectedFiles: []string{includedFilePath},
},
"only include included_files": {
paths: []string{excludedFilePath, includedFilePath},
includedFiles: []match.Matcher{
match.MustCompile(includedFileName),
},
expectedFiles: []string{includedFilePath},
},
"skip directories": {
paths: []string{filepath.Join(tmpDir, directoryPath)},
expectedFiles: []string{},
Expand All @@ -78,6 +86,7 @@ func TestFileScanner(t *testing.T) {
t.Run(name, func(t *testing.T) {
cfg := fileScannerConfig{
ExcludedFiles: test.excludedFiles,
IncludedFiles: test.includedFiles,
Symlinks: test.symlinks,
RecursiveGlob: false,
}
Expand Down
42 changes: 34 additions & 8 deletions filebeat/input/filestream/fswatch_test_non_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -44,13 +45,14 @@ func TestFileScannerSymlinks(t *testing.T) {
testCases := map[string]struct {
paths []string
excludedFiles []match.Matcher
includedFiles []match.Matcher
symlinks bool
expectedFiles []string
}{
// covers test_input.py/test_skip_symlinks
"skip symlinks": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_included_file"),
filepath.Join(tmpDir, "symlink_to_0"),
filepath.Join(tmpDir, "included_file"),
},
symlinks: false,
Expand All @@ -60,22 +62,37 @@ func TestFileScannerSymlinks(t *testing.T) {
},
"return a file once if symlinks are enabled": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_included_file"),
filepath.Join(tmpDir, "symlink_to_0"),
filepath.Join(tmpDir, "included_file"),
},
symlinks: true,
expectedFiles: []string{
mustAbsPath(filepath.Join(tmpDir, "included_file")),
},
},
"do not return symlink if original file is not allowed": {
paths: []string{
filepath.Join(tmpDir, "symlink_to_1"),
filepath.Join(tmpDir, "included_file"),
},
excludedFiles: []match.Matcher{
match.MustCompile("original_" + excludedFileName),
},
symlinks: true,
expectedFiles: []string{
mustAbsPath(filepath.Join(tmpDir, "included_file")),
},
},
}

err := os.Symlink(
mustAbsPath(filepath.Join(tmpDir, "included_file")),
mustAbsPath(filepath.Join(tmpDir, "symlink_to_included_file")),
)
if err != nil {
t.Fatal(err)
for i, filename := range []string{"included_file", "excluded_file"} {
err := os.Symlink(
mustAbsPath(filepath.Join(tmpDir, "original_"+filename)),
mustAbsPath(filepath.Join(tmpDir, "symlink_to_"+strconv.Itoa(i))),
)
if err != nil {
t.Fatal(err)
}
}

for name, test := range testCases {
Expand All @@ -84,6 +101,7 @@ func TestFileScannerSymlinks(t *testing.T) {
t.Run(name, func(t *testing.T) {
cfg := fileScannerConfig{
ExcludedFiles: test.excludedFiles,
IncludedFiles: test.includedFiles,
Symlinks: true,
RecursiveGlob: false,
}
Expand Down Expand Up @@ -150,3 +168,11 @@ func TestFileWatcherRenamedFile(t *testing.T) {
assert.Equal(t, testPath, evt.OldPath)
assert.Equal(t, renamedPath, evt.NewPath)
}

func mustAbsPath(filename string) string {
abspath, err := filepath.Abs(filename)
if err != nil {
panic(err)
}
return abspath
}
4 changes: 4 additions & 0 deletions x-pack/filebeat/filebeat.reference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2692,6 +2692,10 @@ filebeat.inputs:
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']

# Include files. A list of regular expressions to match. Filebeat keeps only the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.include_files: ['/var/log/.*']

# Expand "**" patterns into regular glob patterns.
#prospector.scanner.recursive_glob: true

Expand Down

0 comments on commit dfb98b2

Please sign in to comment.