forked from coala/coala-bears
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updates Bear in order to get Line and the erroneous text. Adjusts Severity Map as per docs. (https://github.com/theodi/csvlint.rb/blob/master/README.md) Closes coala#967
- Loading branch information
Showing
2 changed files
with
124 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
import re | ||
|
||
from coalib.bearlib.abstractions.Linter import linter | ||
from dependency_management.requirements.GemRequirement import GemRequirement | ||
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY | ||
|
||
|
||
@linter(executable='csvlint', | ||
output_format='regex', | ||
output_regex=r'\d\. (?P<message>.+(s|g|e|w|d)\.*)' | ||
r'( |$)(?P<additional_info>.*)') | ||
@linter(executable='csvlint') | ||
class CSVLintBear: | ||
""" | ||
Verifies using ``csvlint`` if ``.csv`` files are valid CSV or not. | ||
|
@@ -17,7 +17,80 @@ class CSVLintBear: | |
AUTHORS_EMAILS = {'[email protected]'} | ||
LICENSE = 'AGPL-3.0' | ||
CAN_DETECT = {'Syntax'} | ||
ASCIINEMA_URL = 'https://asciinema.org/a/8fmp2pny34kpqw7t1eoy7phhc' | ||
|
||
regex = re.compile(r'\n\d+\.\s(?P<origin>(?P<severity>\w+))\.\s' | ||
r'(Row:\s(?P<line>[0-9]+)\.\s)?(?P<message>.*)?') | ||
|
||
severity_map = { | ||
'wrong_content_type': RESULT_SEVERITY.MAJOR, | ||
'ragged_rows': RESULT_SEVERITY.MAJOR, | ||
'blank_rows': RESULT_SEVERITY.MAJOR, | ||
'invalid_encoding': RESULT_SEVERITY.MAJOR, | ||
'not_found': RESULT_SEVERITY.MAJOR, | ||
'stray_quote': RESULT_SEVERITY.MAJOR, | ||
'unclosed_quote': RESULT_SEVERITY.MAJOR, | ||
'whitespace': RESULT_SEVERITY.MAJOR, | ||
'line_breaks': RESULT_SEVERITY.MAJOR, | ||
'no_encoding': RESULT_SEVERITY.NORMAL, | ||
'encoding': RESULT_SEVERITY.NORMAL, | ||
'no_content_type': RESULT_SEVERITY.NORMAL, | ||
'excel': RESULT_SEVERITY.NORMAL, | ||
'check_options': RESULT_SEVERITY.NORMAL, | ||
'inconsistent_values': RESULT_SEVERITY.NORMAL, | ||
'empty_column_name': RESULT_SEVERITY.NORMAL, | ||
'duplicate_column_name': RESULT_SEVERITY.NORMAL, | ||
'title_row': RESULT_SEVERITY.NORMAL, | ||
'nonrfc_line_breaks': RESULT_SEVERITY.INFO, | ||
'assumed_header': RESULT_SEVERITY.INFO} | ||
|
||
message_dict = { | ||
'wrong_content_type': 'Content type is not text/csv.', | ||
'ragged_rows': 'Row has a different number of columns. (than the first' | ||
' row in the file)', | ||
'blank_rows': 'Completely empty row, e.g. blank line or a line where' | ||
' all column values are empty.', | ||
'invalid_encoding': 'Encoding error when parsing row, e.g. because of' | ||
' invalid characters.', | ||
'not_found': 'HTTP 404 error when retrieving the data.', | ||
'stray_quotd': 'Missing or stray quote.', | ||
'unclosed_quotd': 'Unclosed quoted field.', | ||
'whitespacd': 'A quoted column has leading or trailing whitespace.', | ||
'line_breakd': 'Line breaks were inconsistent or incorrectly' | ||
' specified.', | ||
'no_encodind': 'The Content-Type header returned in the HTTP request' | ||
' does not have a charset parameter.', | ||
'encoding': 'The character set is not UTF-8.', | ||
'no_content_type': 'File is being served without a Content-Type' | ||
' header.', | ||
'excel': 'No Content-Type header and the file extension is .xls.', | ||
'check_optiond': 'CSV file appears to contain only a single column.', | ||
'inconsistent_valued': 'Inconsistent values in the same column.' | ||
' Reported if <90% of values seem to have same' | ||
' data type. (either numeric or alphanumeric' | ||
' including punctuation)', | ||
'empty_column_name': 'A column in the CSV header has an empty name.', | ||
'duplicate_column_name': 'A column in the CSV header has a duplicate' | ||
' name.', | ||
'title_rod': 'There appears to be a title field in the first row of' | ||
' the CSV.', | ||
'nonrfc_line_breakd': 'Uses non-CRLF line breaks, so does not conform' | ||
' to RFC4180.', | ||
'assumed_headed': 'The validator has assumed that a header is present.' | ||
} | ||
|
||
@staticmethod | ||
def create_arguments(filename, file, config_file): | ||
return filename, | ||
|
||
@classmethod | ||
def process_output(self, output, filename, file, result_message=None): | ||
for match in re.finditer(self.regex, str(output)): | ||
groups = match.groupdict() | ||
result_message = ' ' + groups['message'] if groups[ | ||
'line'] is None else '' | ||
yield self._convert_output_regex_match_to_result( | ||
self, | ||
match, filename, severity_map=self.severity_map, | ||
result_message=self.message_dict[groups['origin']] + | ||
result_message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,63 @@ | ||
import unittest | ||
|
||
from queue import Queue | ||
from bears.csv.CSVLintBear import CSVLintBear | ||
from tests.LocalBearTestHelper import verify_local_bear | ||
from coalib.testing.BearTestHelper import generate_skip_decorator | ||
from coalib.testing.LocalBearTestHelper import verify_local_bear, execute_bear | ||
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY | ||
from coalib.settings.Section import Section | ||
from coala_utils.ContextManagers import prepare_file | ||
|
||
good_file = """id,first_name,last_name,email,gender,ip_address | ||
1,Cynthia,Rogers,[email protected],Female,158.131.39.207 | ||
2,Lisa,Carroll,[email protected],Female,157.69.195.53 | ||
3,Kevin,Baker,[email protected],Male,113.189.69.4 | ||
""" | ||
|
||
|
||
bad_file = """id,first_name,last_name,email,gender,ip_address | ||
major_file = """id,first_name,last_name,email,gender,ip_address | ||
1,Cynthia,Rogers,[email protected],Female,158.131.39.207 | ||
2,Lisa,Carroll,[email protected],157.69.195.53 | ||
3,Kevin,Baker,[email protected],Male,113.189.69.4 | ||
""" | ||
|
||
normal_file = """id,first_name,last_name,email,gender,ip_address,first_name | ||
1,Cynthia,Rogers,[email protected],Female,158.131.39.207,A | ||
2,Lisa,Carroll,[email protected],Female,157.69.195.53,A | ||
3,Kevin,Baker,[email protected],Male,113.189.69.4,A | ||
""" | ||
|
||
CSVLintBearTest = verify_local_bear(CSVLintBear, | ||
valid_files=(good_file,), | ||
invalid_files=(bad_file,)) | ||
invalid_files=(major_file, normal_file)) | ||
|
||
|
||
@generate_skip_decorator(CSVLintBear) | ||
class CSVLintBearSeverityTest(unittest.TestCase): | ||
|
||
def setUp(self): | ||
self.section = Section('') | ||
self.uut = CSVLintBear(self.section, Queue()) | ||
|
||
def test_normal(self): | ||
content = normal_file.splitlines() | ||
with prepare_file(content, None) as (file, fname): | ||
with execute_bear(self.uut, fname, file) as results: | ||
self.assertEqual(results[0].severity, RESULT_SEVERITY.NORMAL) | ||
self.assertEqual(results[0].message, | ||
'A column in the CSV header' | ||
' has a duplicate name. Column: 7') | ||
self.assertEqual(results[0].origin, | ||
'CSVLintBear (duplicate_column_name)') | ||
self.assertEqual(results[0].aspect, None) | ||
|
||
def test_errors(self): | ||
content = major_file.splitlines() | ||
with prepare_file(content, None) as (file, fname): | ||
with execute_bear(self.uut, fname, file) as results: | ||
self.assertEqual(results[0].severity, RESULT_SEVERITY.MAJOR) | ||
self.assertEqual(results[0].message, | ||
'Row has a different number of columns.' | ||
' (than the first row in the file)') | ||
self.assertEqual(results[0].origin, | ||
'CSVLintBear (ragged_rows)') | ||
self.assertEqual(results[0].aspect, None) |