Skip to content

Commit

Permalink
sumologic backend: review with inspiration from arcsight
Browse files Browse the repository at this point in the history
  • Loading branch information
juju4 committed Feb 3, 2019
1 parent 3ef930b commit 7d159fb
Showing 1 changed file with 163 additions and 6 deletions.
169 changes: 163 additions & 6 deletions tools/sigma/backends/sumologic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import re
import sigma
from sigma.parser.condition import ConditionOR
from .base import SingleTextQueryBackend

# Sumo specifics
Expand All @@ -32,7 +33,7 @@ class SumoLogicBackend(SingleTextQueryBackend):
active = True

index_field = "_index"
reEscape = re.compile('("|(?<!\\\\)\\\\(?![*?\\\\]))')
#reEscape = re.compile('("|\\\\(?![*?]))')
reClear = None
andToken = " AND "
orToken = " OR "
Expand All @@ -46,19 +47,175 @@ class SumoLogicBackend(SingleTextQueryBackend):
mapExpression = "%s=%s"
mapListsSpecialHandling = True
mapListValueExpression = "%s IN %s"
interval = None
logname = None

def generateAggregation(self, agg):
if agg == None:
return ""
if agg.aggfunc == sigma.parser.condition.SigmaAggregationParser.AGGFUNC_NEAR:
raise NotImplementedError("The 'near' aggregation operator is not yet implemented for this backend")
# WIP
# ex:
# (QUERY) | timeslice 5m
# | count_distinct(process) _timeslice,hostname
# | where _count_distinct > 5
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct > 0" % (self.interval, agg.aggfunc_notrans or "", agg.aggfield or "", agg.groupfield or "")
#return " | timeslice %s | count_distinct(%s) %s | where _count_distinct %s %s" % (self.interval, agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
if agg.groupfield == None:
#return " | %s(%s) | when _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
return " | %s(%s) as val | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
return " | %s %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.cond_op, agg.condition)
else:
return " | %s(%s) as val by %s | when val %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)
return " | %s %s by %s | where _count %s %s" % (agg.aggfunc_notrans, agg.aggfield or "", agg.groupfield or "", agg.cond_op, agg.condition)

# TimeFrame condition / within timeframe
# condition | timeslice 5m | count_distinct(f1) as val by f2 | where val > 5
# Near condition => how near... like timeframe?
def generateBefore(self, parsed):
# not required but makes query faster, especially if no FER or _index/_sourceCategory
if self.logname:
return "%s " % self.logname
if self.service:
return "%s %s " % (self.product, self.service)
return ""

def generate(self, sigmaparser):
try:
self.product = sigmaparser.parsedyaml['logsource']['product'] # OS or Software
self.service = sigmaparser.parsedyaml['logsource']['service'] # Channel
except KeyError:
self.product = None
self.service = None

try:
self.interval = sigmaparser.parsedyaml['detection']['timeframe']
except:
pass

for parsed in sigmaparser.condparsed:
query = self.generateQuery(parsed)
before = self.generateBefore(parsed)
after = self.generateAfter(parsed)

result = ""
if before is not None:
result = before
if query is not None:
result += query
if after is not None:
result += after

# adding parenthesis here in case 2 rules are aggregated together - ex: win_possible_applocker_bypass
return "(" + result + ")"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# TODO/FIXME! depending on deployment configuration, existing FER must be populate here (or backend config?)
#aFL = ["EventID"]
aFL = ["EventID", "sourcename", "CommandLine", "NewProcessName", "Image", "ParentImage", "ParentCommandLine", "ParentProcessName"]
for item in self.sigmaconfig.fieldmappings.values():
if item.target_type is list:
aFL.extend(item.target)
else:
aFL.append(item.target)
self.allowedFieldsList = list(set(aFL))

# Skip logsource value from sigma document for separate path.
#def generateCleanValueNodeLogsource(self, value):
# return self.valueExpression % (self.cleanValue(str(value)))

# Clearing values from special characters.
# Sumologic: only removing '*' (in quotes, is litteral. without, is wildcard) and '"'
def CleanNode(self, node):
search_ptrn = re.compile(r"[\/@?#&_%*\(\)\"]")
replace_ptrn = re.compile(r"[\/@?#&_%*\(\)\"]")
match = search_ptrn.search(str(node))
new_node = list()
if match:
replaced_str = replace_ptrn.sub('*', node)
node = [x for x in replaced_str.split('*') if x]
new_node.extend(node)
else:
new_node.append(node)
node = new_node
return node

# Clearing values from special characters.
def generateMapItemNode(self, node):
key, value = node
if key in self.allowedFieldsList:
if self.mapListsSpecialHandling == False and type(value) in (
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
if key in ("LogName","source"):
self.logname = value
return self.mapExpression % (key, value)
elif type(value) is list:
return self.generateMapItemListNode(key, value)
else:
raise TypeError("Backend does not support map values of type " + str(type(value)))
else:
if self.mapListsSpecialHandling == False and type(value) in (
str, int, list) or self.mapListsSpecialHandling == True and type(value) in (str, int):
if type(value) is str:
new_value = list()
value = self.CleanNode(value)
if type(value) == list:
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
else:
new_value.append(value)
if len(new_value)==1:
return "(" + self.generateANDNode(new_value) + ")"
else:
return "(" + self.generateORNode(new_value) + ")"
else:
return self.generateValueNode(value)
elif type(value) is list:
new_value = list()
for item in value:
item = self.CleanNode(item)
if type(item) is list and len(item) == 1:
new_value.append(self.valueExpression % item[0])
elif type(item) is list:
new_value.append(self.andToken.join([self.valueExpression % val for val in item]))
else:
new_value.append(item)
return self.generateORNode(new_value)
else:
raise TypeError("Backend does not support map values of type " + str(type(value)))

# for keywords values with space
def generateValueNode(self, node):
if type(node) is int:
return self.cleanValue(str(node))
if 'AND' in node:
return "(" + self.cleanValue(str(node)) + ")"
else:
return self.cleanValue(str(node))

def generateMapItemListNode(self, key, value):
itemslist = list()
for item in value:
if key in self.allowedFieldsList:
itemslist.append('%s = %s' % (key, self.generateValueNode(item)))
else:
itemslist.append('%s' % (self.generateValueNode(item)))
return "(" + " OR ".join(itemslist) + ")"

# generateORNode algorithm for ArcSightBackend & SumoLogicBackend class.
def generateORNode(self, node):
if type(node) == ConditionOR and all(isinstance(item, str) for item in node):
new_value = list()
for value in node:
value = self.CleanNode(value)
if type(value) is list:
new_value.append(self.andToken.join([self.valueExpression % val for val in value]))
else:
new_value.append(value)
return "(" + self.orToken.join([self.generateNode(val) for val in new_value]) + ")"
return "(" + self.orToken.join([self.generateNode(val) for val in node]) + ")"

def fieldNameMapping(self, fieldname, value):
"""
Alter field names depending on the value(s). Backends may use this method to perform a final transformation of the field name
in addition to the field mapping defined in the conversion configuration. The field name passed to this method was already
transformed from the original name given in the Sigma rule.
TODO/FIXME!
"""
return fieldname

0 comments on commit 7d159fb

Please sign in to comment.