Skip to content

Commit

Permalink
fix(perf): crawler and longevity should share is_binary check code (s…
Browse files Browse the repository at this point in the history
  • Loading branch information
asurkov authored Jan 19, 2018
1 parent 6c895b5 commit 2b94e8d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 29 deletions.
16 changes: 0 additions & 16 deletions src/main/kotlin/app/hashers/CodeLongevity.kt
Original file line number Diff line number Diff line change
Expand Up @@ -456,22 +456,6 @@ class CodeLongevity(
val newId = diff.getNewId().toObjectId()
Logger.trace { "old: '$oldPath', new: '$newPath'" }

// Skip binary files.
val fileId = if (newPath != DiffEntry.DEV_NULL) newId else oldId
try {
if (RawText.isBinary(repo.open(fileId).openStream())) {
continue
}
} catch (e: Exception) {
continue
//TODO(anatoly): better exception handling.
}

// TODO(alex): does it happen in the wilds?
if (diff.changeType == DiffEntry.ChangeType.COPY) {
continue
}

// File was deleted, initialize the line array in the files map.
if (diff.changeType == DiffEntry.ChangeType.DELETE) {
val fileLoader = repo.open(oldId)
Expand Down
29 changes: 16 additions & 13 deletions src/main/kotlin/app/hashers/CommitCrawler.kt
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,22 @@ object CommitCrawler {
Logger.printCommit(commit.shortMessage, commit.name, perc)

val diffEntries = df.scan(parentCommit, commit)
val diffEdits = diffEntries.map { diff ->
val diffEdits = diffEntries
.filter { diff ->
diff.changeType != DiffEntry.ChangeType.COPY
}
.filter { diff ->
val fileId =
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
diff.getNewId().toObjectId()
} else {
diff.getOldId().toObjectId()
}
val stream = try { repo.open(fileId).openStream() }
catch (e: Exception) { null }
stream != null && !RawText.isBinary(stream)
}
.map { diff ->
JgitDiff(diff, df.toFileHeader(diff).toEditList())
}
subscriber.onNext(JgitPair(commit, diffEdits))
Expand Down Expand Up @@ -180,18 +195,6 @@ object CommitCrawler {
private fun getDiffFiles(jgitRepo: Repository,
jgitDiffs: List<JgitDiff>) : List<DiffFile> {
return jgitDiffs
// Skip binary files.
.filter { (diff, _) ->
val fileId =
if (diff.getNewPath() != DiffEntry.DEV_NULL) {
diff.getNewId().toObjectId()
} else {
diff.getOldId().toObjectId()
}
val stream = try { jgitRepo.open(fileId).openStream() }
catch (e: Exception) { null }
stream != null && !RawText.isBinary(stream)
}
.map { (diff, edits) ->
// TODO(anatoly): Can produce exception for large object.
// Investigate for size.
Expand Down

0 comments on commit 2b94e8d

Please sign in to comment.