Skip to content

Commit

Permalink
feat: python list comprehension fact (sourcerer-io#117) (sourcerer-io…
Browse files Browse the repository at this point in the history
…#158)

* feat: python list comprehesion fact

* feat: syntax stats facts

* wip: fix pr
  • Loading branch information
yaronskaya authored and anatolystansler committed Jan 10, 2018
1 parent 74fd2db commit d4a6d02
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/main/kotlin/app/extractors/Extractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class Extractor : ExtractorInterface {
val TYPE_LANGUAGE = 1
val TYPE_LIBRARY = 2
val TYPE_KEYWORD = 3
val TYPE_SYNTAX = 4
val SEPARATOR = ">"
}

Expand Down
33 changes: 32 additions & 1 deletion src/main/kotlin/app/extractors/PythonExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,42 @@ class PythonExtractor : ExtractorInterface {
}
val MULTI_IMPORT_TO_LIB =
ExtractorInterface.getMultipleImportsToLibraryMap(LANGUAGE_NAME)
val COMPREHENSION_MAP = "map"
val COMPREHENSION_LIST = "list"
}

override fun extract(files: List<DiffFile>): List<CommitStats> {
files.map { file -> file.language = LANGUAGE_NAME }
return super.extract(files)
val stats = super.extract(files).toMutableList()

// List comprehension fun fact.
val allAdded = files.map{ file -> file.getAllAdded() }.flatten()
val allDeleted = files.map{ file -> file.getAllDeleted() }.flatten()

val mapRegex = Regex("""(map\([^,]+?,)""")
val mapAllAdded = allAdded.fold(0) { total, line ->
total + mapRegex.findAll(line).toList().size }
val mapAllDeleted = allDeleted.fold(0) { total, line ->
total + mapRegex.findAll(line).toList().size }

val listAllAdded = allAdded.fold(0) { total, line ->
total + line.count { c -> c == '[' } }
val listAllDeleted = allDeleted.fold(0) { total, line ->
total + line.count { c -> c == '[' } }

if (mapAllAdded > 0 || mapAllDeleted > 0) {
stats.add(CommitStats(
mapAllAdded, mapAllDeleted, Extractor.TYPE_SYNTAX,
tech = LANGUAGE_NAME + Extractor.SEPARATOR + COMPREHENSION_MAP))
}

if (listAllAdded > 0 || listAllDeleted > 0) {
stats.add(CommitStats(
listAllAdded, listAllDeleted, Extractor.TYPE_SYNTAX,
tech = LANGUAGE_NAME + Extractor.SEPARATOR + COMPREHENSION_LIST))
}

return stats
}

override fun extractImports(fileContent: List<String>): List<String> {
Expand Down
3 changes: 3 additions & 0 deletions src/main/kotlin/app/hashers/FactHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import app.FactCodes
import app.Logger
import app.api.Api
import app.extractors.Extractor
import app.extractors.PythonExtractor
import app.model.Author
import app.model.Commit
import app.model.Fact
Expand Down Expand Up @@ -109,6 +110,7 @@ class FactHasher(private val serverRepo: Repo = Repo(),

fsLinesPerCommits[email]!![numCommits - 1] += lines.size

// Variable naming.
lines.forEach { line ->
val tokens = Extractor().tokenize(line)
val underscores = tokens.count { it.contains('_') }
Expand All @@ -124,6 +126,7 @@ class FactHasher(private val serverRepo: Repo = Repo(),
others
}

// Indentation.
fsIndentation[email]!![FactCodes.INDENTATION_SPACES] +=
lines.count { it.isNotBlank() && it.startsWith(" ") && !it.contains("\t")}
fsIndentation[email]!![FactCodes.INDENTATION_TABS] +=
Expand Down
61 changes: 61 additions & 0 deletions src/test/kotlin/test/tests/hashers/CommitHasherTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package test.tests.hashers

import app.api.MockApi
import app.extractors.Extractor
import app.hashers.CommitHasher
import app.hashers.CommitCrawler
import app.model.*
Expand All @@ -13,6 +14,7 @@ import org.eclipse.jgit.api.Git
import org.jetbrains.spek.api.Spek
import org.jetbrains.spek.api.dsl.given
import org.jetbrains.spek.api.dsl.it
import test.utils.TestRepo
import java.io.File
import java.util.stream.StreamSupport.stream
import kotlin.streams.toList
Expand Down Expand Up @@ -236,5 +238,64 @@ class CommitHasherTest : Spek({
}
}*/

given("commits with syntax stats") {

val lines = listOf("x = [i**2 for i range(9999)]", "def fn()", "x = 1",
"x = map(lambda x: x**2, range(9999))",
"x = map(lambda x: x**2, map(lambda x: x**3, range(10))",
"x = map(lambda x: x**2, range(10))," +
"map(lambda x: x**3, range(10)))")

val authorEmail = "[email protected]"
val author = Author("Test", authorEmail)

val testRepoPath = "../testrepo-commit-hasher-"
val testRepo = TestRepo(testRepoPath + "python-facts")

val emails = hashSetOf(authorEmail)
val mockApi = MockApi(mockRepo = repo)
val observable = CommitCrawler.getObservable(testRepo.git, repo)

it("sends stats") {
for (i in 0..lines.size - 1) {
val line = lines[i]
val fileName = "file$i.py"
testRepo.createFile(fileName, listOf(line))
testRepo.commit(message = "$line in $fileName", author = author)
}

val errors = mutableListOf<Throwable>()

val rehashes = (0..lines.size - 1).map { "r$it" }

CommitHasher(repo, mockApi, rehashes, emails)
.updateFromObservable(observable, { e -> errors.add(e) })
if (errors.size > 0) {
println(errors[0].message)
}
assertEquals(0, errors.size)

val syntaxStats = mockApi.receivedAddedCommits
.fold(mutableListOf<CommitStats>()) { allStats, commit ->
allStats.addAll(commit.stats)
allStats
}.filter { it.type == Extractor.TYPE_SYNTAX }

val mapStats = syntaxStats.filter { it.tech == "python>map" }
val listStats = syntaxStats.filter { it.tech == "python>list" }
assertEquals(3, mapStats.size)
assertEquals(1, listStats.size)
assertEquals(5, mapStats.map { it.numLinesAdded }.sum())
assertEquals(0, mapStats.map { it.numLinesDeleted }.sum())

assertEquals(1, listStats.map { it.numLinesAdded }.sum())
assertEquals(0, listStats.map { it.numLinesDeleted }.sum())
}

afterGroup {
testRepo.destroy()
}
}

cleanRepos()
})
1 change: 1 addition & 0 deletions src/test/kotlin/test/tests/hashers/FactHasherTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,5 @@ class FactHasherTest : Spek({
testRepo.destroy()
}
}

})

0 comments on commit d4a6d02

Please sign in to comment.