CSVLintBear: Get more info

Updates Bear in order to get Line and the erroneous text. Adjusts Severity Map as per docs. (https://github.com/theodi/csvlint.rb/blob/master/README.md) Closes coala#967
javidy · Dec 18, 2016 · ee7c9c2 · ee7c9c2
1 parent 326b14b
commit ee7c9c2
Show file tree

Hide file tree

Showing 2 changed files with 124 additions and 8 deletions.
diff --git a/bears/csv/CSVLintBear.py b/bears/csv/CSVLintBear.py
@@ -1,11 +1,11 @@
+import re
+
 from coalib.bearlib.abstractions.Linter import linter
 from dependency_management.requirements.GemRequirement import GemRequirement
+from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
 
 
-@linter(executable='csvlint',
-        output_format='regex',
-        output_regex=r'\d\. (?P<message>.+(s|g|e|w|d)\.*)'
-                     r'( |$)(?P<additional_info>.*)')
+@linter(executable='csvlint')
 class CSVLintBear:
     """
     Verifies using ``csvlint`` if ``.csv`` files are valid CSV or not.
@@ -17,7 +17,80 @@ class CSVLintBear:
     AUTHORS_EMAILS = {'[email protected]'}
     LICENSE = 'AGPL-3.0'
     CAN_DETECT = {'Syntax'}
+    ASCIINEMA_URL = 'https://asciinema.org/a/8fmp2pny34kpqw7t1eoy7phhc'
+
+    regex = re.compile(r'\n\d+\.\s(?P<origin>(?P<severity>\w+))\.\s'
+                       r'(Row:\s(?P<line>[0-9]+)\.\s)?(?P<message>.*)?')
+
+    severity_map = {
+        'wrong_content_type': RESULT_SEVERITY.MAJOR,
+        'ragged_rows': RESULT_SEVERITY.MAJOR,
+        'blank_rows': RESULT_SEVERITY.MAJOR,
+        'invalid_encoding': RESULT_SEVERITY.MAJOR,
+        'not_found': RESULT_SEVERITY.MAJOR,
+        'stray_quote': RESULT_SEVERITY.MAJOR,
+        'unclosed_quote': RESULT_SEVERITY.MAJOR,
+        'whitespace': RESULT_SEVERITY.MAJOR,
+        'line_breaks': RESULT_SEVERITY.MAJOR,
+        'no_encoding': RESULT_SEVERITY.NORMAL,
+        'encoding': RESULT_SEVERITY.NORMAL,
+        'no_content_type': RESULT_SEVERITY.NORMAL,
+        'excel': RESULT_SEVERITY.NORMAL,
+        'check_options': RESULT_SEVERITY.NORMAL,
+        'inconsistent_values': RESULT_SEVERITY.NORMAL,
+        'empty_column_name': RESULT_SEVERITY.NORMAL,
+        'duplicate_column_name': RESULT_SEVERITY.NORMAL,
+        'title_row': RESULT_SEVERITY.NORMAL,
+        'nonrfc_line_breaks': RESULT_SEVERITY.INFO,
+        'assumed_header': RESULT_SEVERITY.INFO}
+
+    message_dict = {
+        'wrong_content_type': 'Content type is not text/csv.',
+        'ragged_rows': 'Row has a different number of columns. (than the first'
+                        ' row in the file)',
+        'blank_rows': 'Completely empty row, e.g. blank line or a line where'
+                       ' all column values are empty.',
+        'invalid_encoding': 'Encoding error when parsing row, e.g. because of'
+                             ' invalid characters.',
+        'not_found': 'HTTP 404 error when retrieving the data.',
+        'stray_quotd': 'Missing or stray quote.',
+        'unclosed_quotd': 'Unclosed quoted field.',
+        'whitespacd': 'A quoted column has leading or trailing whitespace.',
+        'line_breakd': 'Line breaks were inconsistent or incorrectly'
+                         ' specified.',
+        'no_encodind': 'The Content-Type header returned in the HTTP request'
+                         ' does not have a charset parameter.',
+        'encoding': 'The character set is not UTF-8.',
+        'no_content_type': 'File is being served without a Content-Type'
+                           ' header.',
+        'excel': 'No Content-Type header and the file extension is .xls.',
+        'check_optiond': 'CSV file appears to contain only a single column.',
+        'inconsistent_valued': 'Inconsistent values in the same column.'
+                               ' Reported if <90% of values seem to have same'
+                               ' data type. (either numeric or alphanumeric'
+                               ' including punctuation)',
+        'empty_column_name': 'A column in the CSV header has an empty name.',
+        'duplicate_column_name': 'A column in the CSV header has a duplicate'
+                                 ' name.',
+        'title_rod': 'There appears to be a title field in the first row of'
+                      ' the CSV.',
+        'nonrfc_line_breakd': 'Uses non-CRLF line breaks, so does not conform'
+                              ' to RFC4180.',
+        'assumed_headed': 'The validator has assumed that a header is present.'
+    }
 
     @staticmethod
     def create_arguments(filename, file, config_file):
         return filename,
+
+    @classmethod
+    def process_output(self, output, filename, file, result_message=None):
+        for match in re.finditer(self.regex, str(output)):
+            groups = match.groupdict()
+            result_message = ' ' + groups['message'] if groups[
+                'line'] is None else ''
+            yield self._convert_output_regex_match_to_result(
+                self,
+                match, filename, severity_map=self.severity_map,
+                result_message=self.message_dict[groups['origin']] +
+                               result_message)
diff --git a/tests/csv/CSVLintBearTest.py b/tests/csv/CSVLintBearTest.py
@@ -1,20 +1,63 @@
+import unittest
+
+from queue import Queue
 from bears.csv.CSVLintBear import CSVLintBear
-from tests.LocalBearTestHelper import verify_local_bear
+from coalib.testing.BearTestHelper import generate_skip_decorator
+from coalib.testing.LocalBearTestHelper import verify_local_bear, execute_bear
+from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
+from coalib.settings.Section import Section
+from coala_utils.ContextManagers import prepare_file
 
 good_file = """id,first_name,last_name,email,gender,ip_address
 1,Cynthia,Rogers,[email protected],Female,158.131.39.207
 2,Lisa,Carroll,[email protected],Female,157.69.195.53
 3,Kevin,Baker,[email protected],Male,113.189.69.4
 """
 
-
-bad_file = """id,first_name,last_name,email,gender,ip_address
+major_file = """id,first_name,last_name,email,gender,ip_address
 1,Cynthia,Rogers,[email protected],Female,158.131.39.207
 2,Lisa,Carroll,[email protected],157.69.195.53
 3,Kevin,Baker,[email protected],Male,113.189.69.4
 """
 
+normal_file = """id,first_name,last_name,email,gender,ip_address,first_name
+1,Cynthia,Rogers,[email protected],Female,158.131.39.207,A
+2,Lisa,Carroll,[email protected],Female,157.69.195.53,A
+3,Kevin,Baker,[email protected],Male,113.189.69.4,A
+"""
 
 CSVLintBearTest = verify_local_bear(CSVLintBear,
                                     valid_files=(good_file,),
-                                    invalid_files=(bad_file,))
+                                    invalid_files=(major_file, normal_file))
+
+
+@generate_skip_decorator(CSVLintBear)
+class CSVLintBearSeverityTest(unittest.TestCase):
+
+    def setUp(self):
+        self.section = Section('')
+        self.uut = CSVLintBear(self.section, Queue())
+
+    def test_normal(self):
+        content = normal_file.splitlines()
+        with prepare_file(content, None) as (file, fname):
+            with execute_bear(self.uut, fname, file) as results:
+                self.assertEqual(results[0].severity, RESULT_SEVERITY.NORMAL)
+                self.assertEqual(results[0].message,
+                                 'A column in the CSV header'
+                                 ' has a duplicate name. Column: 7')
+                self.assertEqual(results[0].origin,
+                                 'CSVLintBear (duplicate_column_name)')
+                self.assertEqual(results[0].aspect, None)
+
+    def test_errors(self):
+        content = major_file.splitlines()
+        with prepare_file(content, None) as (file, fname):
+            with execute_bear(self.uut, fname, file) as results:
+                self.assertEqual(results[0].severity, RESULT_SEVERITY.MAJOR)
+                self.assertEqual(results[0].message,
+                                 'Row has a different number of columns.'
+                                 ' (than the first row in the file)')
+                self.assertEqual(results[0].origin,
+                                 'CSVLintBear (ragged_rows)')
+                self.assertEqual(results[0].aspect, None)