Skip to content

Commit

Permalink
Remove maxLineSize limit
Browse files Browse the repository at this point in the history
Some valid text files contain long lines, but are of interest to search. This
CR just removes IsText returning false due to the long line. This does mean
search results will include long lines, which may not be of interest. That will
be fixed in another CR.

Change-Id: I11fd84dc3e377a477a77c2218f6623bc6c26dba0
  • Loading branch information
keegancsmith committed Apr 10, 2018
1 parent 41ea8a3 commit 7b8898f
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 12 deletions.
11 changes: 0 additions & 11 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ func (b *IndexBuilder) AddFile(name string, content []byte) error {
}

const maxTrigramCount = 20000
const maxLineSize = 1000

// IsText returns false if the given contents are probably not source texts.
func IsText(content []byte) bool {
Expand All @@ -272,21 +271,11 @@ func IsText(content []byte) bool {

trigrams := map[ngram]struct{}{}

lineSize := 0

var cur [3]rune
for len(content) > 0 {
if content[0] == 0 {
return false
}
if content[0] == '\n' {
lineSize = 0
} else {
lineSize++
}
if lineSize > maxLineSize {
return false
}

r, sz := utf8.DecodeRune(content)
if r == utf8.RuneError {
Expand Down
3 changes: 2 additions & 1 deletion toc.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ const IndexFormatVersion = 15
// 2: Rank field for shards.
// 3: Rank documents within shards
// 4: Dedup file bugfix
const FeatureVersion = 4
// 5: Remove max line size limit
const FeatureVersion = 5

type indexTOC struct {
fileContents compoundSection
Expand Down

0 comments on commit 7b8898f

Please sign in to comment.