Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs(suites): add suites to docs #56

Merged
merged 2 commits into from
Oct 4, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
chore(test): add tests for ToxicityExtractor
  • Loading branch information
BigicecreamTaken committed Oct 4, 2023
commit 312b59ff67c447c77907c5a73650c5de648357f9
24 changes: 23 additions & 1 deletion test/nlp/unit/test_metadata_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from elemeta.nlp.extractors.high_level.syllable_count import SyllableCount
from elemeta.nlp.extractors.high_level.text_complexity import TextComplexity
from elemeta.nlp.extractors.high_level.text_length import TextLength
from elemeta.nlp.extractors.high_level.toxicity_extractor import ToxicityExtractor
from elemeta.nlp.extractors.high_level.unique_word_count import UniqueWordCount
from elemeta.nlp.extractors.high_level.unique_word_ratio import UniqueWordRatio
from elemeta.nlp.extractors.high_level.out_of_vocabulary_count import (
Expand All @@ -63,7 +64,28 @@
)


# TODO for all check tokenizer difference. example can be between twitter and not. the parse isn't differently
@pytest.mark.parametrize(
"name, text, min_toxicity_threshold, max_toxicity_threshold",
[
("non-toxic sentence", "Elemeta is amazing", 0.0, 0.5),
("toxic sentence", "Kill youself you piece of shit", 0.5, 1),
(
"many sentences",
"The presence of toxicity in a sentence is hard to measure. A negative sentence does not imply toxicity. So how do you tell?",
0,
1
),
],
)

def test_toxicity_analysis(name, text, min_toxicity_threshold, max_toxicity_threshold):
toxicity = ToxicityExtractor().extract(text)
assert (
toxicity >= min_toxicity_threshold
), f"output {toxicity} is smaller than min threshold {min_toxicity_threshold} for test {name}"
assert (
toxicity <= max_toxicity_threshold
), f"output {toxicity} is larger than max threshold {max_toxicity_threshold} for test {name}"

@pytest.mark.parametrize(
"name, text, required_PII",
Expand Down