Skip to content

Commit

Permalink
fix problem with token count vectorizer array order
Browse files Browse the repository at this point in the history
  • Loading branch information
akondas committed Jul 14, 2016
1 parent 7c0767c commit 9f140d5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
2 changes: 2 additions & 0 deletions src/Phpml/FeatureExtraction/TokenCountVectorizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ private function transformSample(string &$sample)
}
}

ksort($counts);

$sample = $counts;
}

Expand Down
14 changes: 7 additions & 7 deletions tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ public function testTransformationWithWhitespaceTokenizer()
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());

$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());

$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}

public function testTransformationWithMinimumDocumentTokenCountFrequency()
Expand Down Expand Up @@ -74,10 +74,10 @@ public function testTransformationWithMinimumDocumentTokenCountFrequency()
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);

$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());

$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);

// word at least once in all samples
$samples = [
Expand All @@ -96,7 +96,7 @@ public function testTransformationWithMinimumDocumentTokenCountFrequency()
$vectorizer->fit($samples);
$vectorizer->transform($samples);

$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}

public function testTransformationWithStopWords()
Expand Down Expand Up @@ -131,9 +131,9 @@ public function testTransformationWithStopWords()
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);

$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());

$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}
}

0 comments on commit 9f140d5

Please sign in to comment.