Skip to content

Commit

Permalink
Merge pull request airbnb#54 from airbnb/jacknagz-csv-parser-bug-and-…
Browse files Browse the repository at this point in the history
…unit-test-descriptions

[core][tests] csv parser bug and unit test descriptions
  • Loading branch information
jacknagz authored Mar 3, 2017
2 parents b6594e3 + 4ba0153 commit e850298
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 6 deletions.
2 changes: 1 addition & 1 deletion stream_alert/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def parse(self):
- False if the data is not CSV or the columns do not match.
"""
schema = self.schema
hints = self.options['hints']
hints = self.options.get('hints')

hint_result = []
csv_payloads = []
Expand Down
12 changes: 12 additions & 0 deletions test/unit/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def teardown(self):


def test_refresh_record(self):
"""Payload Record Refresh"""
kinesis_data = json.dumps({
'key3': 'key3data',
'key2': 'key2data',
Expand All @@ -107,6 +108,7 @@ def test_refresh_record(self):


def test_map_source_1(self):
"""Payload Source Mapping 1"""
data_encoded = base64.b64encode('test_map_source data')
payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
kinesis_data=data_encoded)
Expand All @@ -132,6 +134,7 @@ def test_map_source_1(self):


def test_map_source_2(self):
"""Payload Source Mapping 2"""
data_encoded = base64.b64encode('test_map_source_data_2')
payload = self.payload_generator(kinesis_stream='test_stream_2',
kinesis_data=data_encoded)
Expand All @@ -153,6 +156,7 @@ def test_map_source_2(self):


def test_classify_record_kinesis_json(self):
"""Payload Classify JSON"""
kinesis_data = json.dumps({
'key1': 'sample data!!!!',
'key2': 'more sample data',
Expand Down Expand Up @@ -185,6 +189,7 @@ def test_classify_record_kinesis_json(self):


def test_classify_record_kinesis_nested_json(self):
"""Payload Classify Nested JSON"""
kinesis_data = json.dumps({
'date': 'Jan 01 2017',
'unixtime': '1485556524',
Expand Down Expand Up @@ -224,6 +229,7 @@ def test_classify_record_kinesis_nested_json(self):


def test_classify_record_kinesis_nested_json_osquery(self):
"""Payload Classify JSON osquery"""
kinesis_data = json.dumps({
'name': 'testquery',
'hostIdentifier': 'host1.test.prod',
Expand Down Expand Up @@ -275,6 +281,7 @@ def test_classify_record_kinesis_nested_json_osquery(self):


def test_classify_record_kinesis_nested_json_missing_subkey_fields(self):
"""Payload Classify Nested JSON Missing Subkeys"""
kinesis_data = json.dumps({
'name': 'testquery',
'hostIdentifier': 'host1.test.prod',
Expand Down Expand Up @@ -307,6 +314,7 @@ def test_classify_record_kinesis_nested_json_missing_subkey_fields(self):


def test_classify_record_kinesis_nested_json_with_data(self):
"""Payload Classify Nested JSON Generic"""
kinesis_data = json.dumps({
'date': 'Jan 01 2017',
'unixtime': '1485556524',
Expand Down Expand Up @@ -352,6 +360,7 @@ def test_classify_record_kinesis_nested_json_with_data(self):


def test_classify_record_kinesis_csv(self):
"""Payload Classify CSV"""
csv_data = 'jan102017,0100,host1,thisis some data with keyword1 in it'
payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
kinesis_data=csv_data)
Expand Down Expand Up @@ -380,6 +389,7 @@ def test_classify_record_kinesis_csv(self):


def test_classify_record_kinesis_csv_nested(self):
"""Payload Classify Nested CSV"""
csv_nested_data = (
'"Jan 10 2017","1485635414","host1.prod.test","Corp",'
'"chef,web-server,1,10,success"'
Expand Down Expand Up @@ -413,6 +423,7 @@ def test_classify_record_kinesis_csv_nested(self):


def test_classify_record_kinesis_kv(self):
"""Payload Classify KV"""
auditd_test_data = (
'type=SYSCALL msg=audit(1364481363.243:24287): '
'arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 '
Expand Down Expand Up @@ -453,6 +464,7 @@ def test_classify_record_kinesis_kv(self):


def test_classify_record_syslog(self):
"""Payload Classify Syslog"""
test_data_1 = (
'Jan 26 19:35:33 vagrant-ubuntu-trusty-64 '
'sudo: pam_unix(sudo:session): '
Expand Down
3 changes: 3 additions & 0 deletions test/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
)

def test_validate_config_valid():
"""Config Validator - Valid Config"""
config = {
'logs': {
'json_log': {
Expand Down Expand Up @@ -70,6 +71,7 @@ def test_validate_config_valid():

@raises(ConfigError)
def test_validate_config_no_parsers():
"""Config Validator - No Parsers"""
config = {
'logs': {
'json_log': {
Expand Down Expand Up @@ -100,6 +102,7 @@ def test_validate_config_no_parsers():

@raises(ConfigError)
def test_validate_config_no_logs():
"""Config Validator - No Logs"""
config = {
'logs': {
'json_log': {
Expand Down
51 changes: 46 additions & 5 deletions test/unit/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def parser_helper(self, **kwargs):
return parsed_result

def test_multi_nested_json(self):
"""Multi-layered JSON"""
"""Parse Multi-layered JSON"""
# setup
schema = {
'name': 'string',
Expand All @@ -62,7 +62,7 @@ def test_multi_nested_json(self):
assert_equal(parsed_data[0]['result'], 'fail')

def test_inspec(self):
"""Inspec JSON"""
"""Parse Inspec JSON"""
schema = self.config['logs']['test_inspec']['schema']
options = { "hints" : self.config['logs']['test_inspec']['hints'] }
# load fixture file
Expand All @@ -77,7 +77,7 @@ def test_inspec(self):
u'results', u'id', u'desc')),sorted(parsed_result[0].keys()))

def test_cloudtrail(self):
"""Cloudtrail JSON"""
"""Parse Cloudtrail JSON"""
schema = self.config['logs']['test_cloudtrail']['schema']
options = { "hints" : self.config['logs']['test_cloudtrail']['hints'] }
# load fixture file
Expand All @@ -104,7 +104,7 @@ def test_cloudtrail(self):
'stream_alert_prod_user')

def test_basic_json(self):
"""Non-nested JSON objects"""
"""Parse Non-nested JSON objects"""
# setup
schema = {
'name': 'string',
Expand Down Expand Up @@ -155,7 +155,7 @@ def parser_helper(self, **kwargs):
return parsed_result

def test_cloudwatch(self):
"""CloudWatch JSON"""
"""Parse CloudWatch JSON"""
schema = self.config['logs']['test_cloudwatch']['schema']
options = { "hints": self.config['logs']['test_cloudwatch']['hints']}
with open('test/unit/fixtures/cloudwatch.json','r') as fixture_file:
Expand All @@ -167,3 +167,44 @@ def test_cloudwatch(self):
for result in parsed_result:
assert_equal(sorted((u'protocol', u'source', u'destination', u'srcport', u'destport', u'eni', u'action', u'packets', u'bytes', u'windowstart', u'windowend', u'version', u'account', u'flowlogstatus',u'envelope')), sorted(result.keys()))
assert_equal(sorted((u"logGroup",u"logStream",u"owner")),sorted(result['envelope'].keys()))

class TestKVParser(object):
def setup(self):
"""Setup before each method"""
# load config
self.config = load_config('test/unit/conf')
# load JSON parser class
self.parser_class = get_parser('kv')

def teardown(self):
"""Teardown after each method"""
pass

def parser_helper(self, **kwargs):
data = kwargs['data']
schema = kwargs['schema']
options = kwargs['options']

kv_parser = self.parser_class(data, schema, options)
parsed_result = kv_parser.parse()
return parsed_result

def test_kv_parsing(self):
"""Parse KV - 'key:value,key:value'"""
# setup
schema = {
'name': 'string',
'result': 'string'
}
options = {
'separator': ':',
'delimiter': ',',
'service': 'kinesis'
}
data = 'name:joe bob,result:success'

# get parsed data
parsed_data = self.parser_helper(data=data, schema=schema, options=options)

assert_equal(len(parsed_data), 1)
assert_equal(parsed_data[0]['name'], 'joe bob')
2 changes: 2 additions & 0 deletions test/unit/test_rule_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from rules.helpers.base import in_set, last_hour

def test_in_set():
"""Helpers - In Set"""
# basic example
test_list = ['this', 'is', 'a9', 'test']
data = 'test'
Expand All @@ -37,6 +38,7 @@ def test_in_set():
assert_equal(result, True)

def test_last_hour():
"""Helpers - Last Hour"""
time_now = int(time.time())

thirty_minutes_ago = time_now - 1800
Expand Down
6 changes: 6 additions & 0 deletions test/unit/test_rules_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def make_kinesis_payload(self, **kwargs):
return payload

def test_alert_format(self):
"""Rule Engine - Alert Format"""
@rule(logs=['test_log_type_json_nested_with_data'],
outputs=['s3'])
def alert_format_test(rec):
Expand Down Expand Up @@ -118,6 +119,7 @@ def alert_format_test(rec):


def test_basic_rule_matcher_process(self):
"""Rule Engine - Basic Rule/Matcher"""
@matcher()
def prod(rec):
return rec['environment'] == 'prod'
Expand Down Expand Up @@ -169,6 +171,7 @@ def chef_logs(rec):
assert_equal(alerts[0]['metadata']['outputs'], ['s3'])

def test_process_req_subkeys(self):
"""Rule Engine - Req Subkeys"""
@rule(logs=['test_log_type_json_nested'],
outputs=['s3'],
req_subkeys={'data': ['location']})
Expand Down Expand Up @@ -222,6 +225,7 @@ def web_server(rec):
assert_equal(alerts[1]['rule_name'], 'data_location')

def test_syslog_rule(self):
"""Rule Engine - Syslog Rule"""
@rule(logs=['test_log_type_syslog'],
outputs=['s3'])
def syslog_sudo(rec):
Expand Down Expand Up @@ -249,6 +253,7 @@ def syslog_sudo(rec):
assert_equal(alerts[0]['metadata']['type'], 'syslog')

def test_csv_rule(self):
"""Rule Engine - CSV Rule"""
@rule(logs=['test_log_type_csv_nested'],
outputs=['pagerduty'])
def nested_csv(rec):
Expand All @@ -272,6 +277,7 @@ def nested_csv(rec):
assert_equal(alerts[0]['rule_name'], 'nested_csv')

def test_kv_rule(self):
"""Rule Engine - KV Rule"""
@rule(logs=['test_log_type_kv_auditd'],
outputs=['pagerduty'])
def auditd_bin_cat(rec):
Expand Down

0 comments on commit e850298

Please sign in to comment.