Skip to content

Commit

Permalink
cortex: Turn analyzer PoC into job tracker
Browse files Browse the repository at this point in the history
The current implementation of Cortex access has PoC characteristics in that it
does not use a job tracker and block until a single job has finished.

Turn the Cortex module into a job tracker analogous to Cuckoo. Add a
class-based abstraction of the backend analyzers and their reports. Make the
whole thing configurable through analyzers.conf.

Unfortunately, the cortex4py module does not use requests sessions. So there is
no easy way to keep the retry and backoff logic of the Cuckoo module.

While at it, move analyzer-specific exceptions into their modules for clarity.

TODO:
- proper error and retry handling
- test cases
  • Loading branch information
michaelweiser committed Jun 24, 2021
1 parent b4471fb commit 15a0321
Show file tree
Hide file tree
Showing 11 changed files with 661 additions and 212 deletions.
25 changes: 25 additions & 0 deletions analyzers.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,28 @@
# New installations create a bearer token by default and require it but upgraded
# installations don't automatically get one.
#api_token : <empty>

# Cortex analyzer settings
[cortex]
# where to reach the Cortex REST API
#url: http://127.0.0.1:9001

# Token to authenticate to the Cortex REST API with.
#api_token : <empty>

# how long to wait inbetween checks of job status
#poll_interval: 5

# Submit samples with their original filenames if available. Enhances
# authenticity of analysis environment but also leaks original filenames into
# Cortex's database.
#submit_original_filename : yes

# Specify how long to track running Cortex jobs before giving up on them. This
# does not actively cancel jobs. It's rather meant to handle cases where jobs
# have for some reason been dropped by or got stuck within Cortex. This value
# is unrelated to how long our client is willing to wait for a result because
# even if it gives up on us we would normally want to learn and cache the job
# result because the analysis was expensive and the sample might be presented
# to us again.
#maximum_job_age : 900
2 changes: 1 addition & 1 deletion docs/source/ruleset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ Attribues of cortexreport
.. code-block:: shell
File_InfoReport.full
FileInfoReport.full
HybridAnalysisReport.full
VirusTotalQueryReport.n_of_all
VirusTotalQueryReport.level
Expand Down
13 changes: 13 additions & 0 deletions peekaboo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,13 +456,26 @@ def __init__(self, config_file=None):
self.cuckoo_submit_original_filename = True
self.cuckoo_maximum_job_age = 15*60

self.cortex_url = 'http://127.0.0.1:9001'
self.cortex_api_token = ''
self.cortex_poll_interval = 5
self.cortex_submit_original_filename = True
self.cortex_maximum_job_age = 15*60

config_options = {
'cuckoo_url': ['cuckoo', 'url'],
'cuckoo_api_token': ['cuckoo', 'api_token'],
'cuckoo_poll_interval': ['cuckoo', 'poll_interval'],
'cuckoo_submit_original_filename': [
'cuckoo', 'submit_original_filename'],
'cuckoo_maximum_job_age': ['cuckoo', 'maximum_job_age'],

'cortex_url': ['cortex', 'url'],
'cortex_api_token': ['cortex', 'api_token'],
'cortex_poll_interval': ['cortex', 'poll_interval'],
'cortex_submit_original_filename': [
'cortex', 'submit_original_filename'],
'cortex_maximum_job_age': ['cortex', 'maximum_job_age'],
}

# read configuration file. Note that we require a configuration file
Expand Down
5 changes: 0 additions & 5 deletions peekaboo/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,3 @@ class PeekabooAnalysisDeferred(PeekabooRulesetException):
take into account that the ruleset will be rerun from the very beginning.
"""
pass


class CuckooSubmitFailedException(PeekabooException):
""" An exception raised if submitting a job to Cuckoo fails. """
pass
27 changes: 27 additions & 0 deletions peekaboo/ruleset/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from peekaboo.ruleset import Result, RuleResult
from peekaboo.ruleset.rules import *
from peekaboo.toolbox.cuckoo import Cuckoo
from peekaboo.toolbox.cortex import Cortex
from peekaboo.toolbox.peekabooyar import ContainsPeekabooYarRule
from peekaboo.exceptions import PeekabooAnalysisDeferred, \
PeekabooConfigException, PeekabooRulesetConfigError
Expand Down Expand Up @@ -79,6 +80,7 @@ def __init__(self, config, job_queue, db_con, analyzer_config):
self.db_con = db_con
self.analyzer_config = analyzer_config
self.cuckoo = None
self.cortex = None
self.rules = []

self.shutdown_requested = False
Expand Down Expand Up @@ -147,6 +149,25 @@ def start(self):

rule.set_cuckoo_job_tracker(self.cuckoo)

if rule.uses_cortex:
if self.cortex is None:
logger.debug(
"Rule %s uses Cortex. Starting job tracker.", rule_name)

self.cortex = Cortex(
self.job_queue,
self.analyzer_config.cortex_url,
self.analyzer_config.cortex_api_token,
self.analyzer_config.cortex_poll_interval,
self.analyzer_config.cortex_submit_original_filename,
self.analyzer_config.cortex_maximum_job_age)

if not self.cortex.start_tracker():
raise PeekabooRulesetConfigError(
"Failure to initialize Cortex job tracker")

rule.set_cortex_job_tracker(self.cortex)

self.rules.append(rule)

# abort startup if we've been asked to shut down meanwhile
Expand Down Expand Up @@ -197,6 +218,9 @@ def shut_down_resources(self):
if self.cuckoo is not None:
self.cuckoo.shut_down()

if self.cortex is not None:
self.cortex.shut_down()

def shut_down(self):
""" Initiate asynchronous shutdown of the ruleset engine and dependent
logic such as job trackers. """
Expand All @@ -207,3 +231,6 @@ def close_down(self):
""" Finalize ruleset engine shutdown synchronously. """
if self.cuckoo is not None:
self.cuckoo.close_down()

if self.cortex is not None:
self.cortex.close_down()
162 changes: 127 additions & 35 deletions peekaboo/ruleset/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@
from peekaboo.ruleset.expressions import ExpressionParser, \
IdentifierMissingException
from peekaboo.exceptions import PeekabooAnalysisDeferred, \
CuckooSubmitFailedException, PeekabooRulesetConfigError
PeekabooRulesetConfigError
from peekaboo.sample import Sample
from peekaboo.toolbox.cuckoo import CuckooReport
from peekaboo.toolbox.cuckoo import CuckooReport, CuckooSubmitFailedException
from peekaboo.toolbox.ole import Oletools, OletoolsReport
from peekaboo.toolbox.file import Filetools, FiletoolsReport
from peekaboo.toolbox.known import Knowntools, KnowntoolsReport
from peekaboo.toolbox.cortex import Cortextools, CortexReport

from peekaboo.toolbox.cortex import CortexReport, \
CortexSubmitFailedException, CortexAnalyzerReportMissingException

logger = logging.getLogger(__name__)

Expand All @@ -50,6 +50,7 @@ class Rule:
connection) or helper functions. """
rule_name = 'unimplemented'
uses_cuckoo = False
uses_cortex = False

def __init__(self, config, db_con):
""" Initialize common configuration and resources.
Expand All @@ -63,6 +64,7 @@ def __init__(self, config, db_con):
self.db_con = db_con

self.cuckoo = None
self.cortex = None

# initialise and validate configuration
self.config_options = {}
Expand Down Expand Up @@ -131,6 +133,15 @@ def set_cuckoo_job_tracker(self, cuckoo):
"""
self.cuckoo = cuckoo

def set_cortex_job_tracker(self, cortex):
""" Set the Cortex job tracker to use for submitting samples to Cortex
as well as tracking status.
@param cortex: the Cortex job tracker to use
@type cortex: Cortex
"""
self.cortex = cortex

def get_cuckoo_report(self, sample):
""" Get the samples cuckoo_report or submit the sample for analysis by
Cuckoo.
Expand Down Expand Up @@ -176,12 +187,50 @@ def get_knowntools_report(self, sample):
"""
return Knowntools(sample, self.db_con).get_report()

def get_cortextools_report(self, sample):
""" Get a Cortextools report on the sample.
def get_cortex_report(self, sample):
""" Get the sample's Cortex report.
@returns: CortexReport or None if a previous analysis attempt has
already failed.
"""
if sample.cortex_failed:
return None

@returns: CortextoolsReport
report = sample.cortex_report
if report is None:
# here we synthesize the main CortexReport as a (mostly) empty
# proxy and attach it to the sample. Since the report consists of
# potentially multiple subreports of Cortex analyzers, the report
# may request submission to an actual analyzer through an
# exception when accessing certain properties.
report = CortexReport()
sample.register_cortex_report(report)

return report

def submit_to_cortex(self, sample, analyzer):
""" Submit the sample to an actual Cortex analyzer to augment the
report.
@param sample: The sample to submit to Cortex.
@type sample: Sample
@param analyzer: The Cortex analyzer to submit to.
@type analyzer: subclass of CortexAnalyzer
@returns: None if submit failed
@raises PeekabooAnalysisDeferred: if successfully submitted to abort
ruleset run until result has been
retrieved.
"""
return Cortextools(sample).get_report()
logger.debug("Submitting %s to Cortex", sample.submit_path)
try:
job_id = self.cortex.submit(sample, analyzer)
except CortexSubmitFailedException as failed:
logger.error("Submit to Cortex failed: %s", failed)
return None

logger.info('Sample submitted to Cortex. Job ID: %s. '
'Sample: %s', job_id, sample)
raise PeekabooAnalysisDeferred()


class KnownRule(Rule):
Expand Down Expand Up @@ -591,6 +640,10 @@ def get_config(self):
# attempting anything illegal
try:
parsed_expression.eval(context=context)
except CortexAnalyzerReportMissingException:
# This exception tells us that CortexReport knows the analyzer
# and wants a job submitted. So all is well.
pass
except IdentifierMissingException as missing:
# our dummy context provides everything we would provide at
# runtime as well, so any missing identifier is an error at
Expand Down Expand Up @@ -623,6 +676,55 @@ def uses_cuckoo(self):
class variable with a dynamic determination. """
return self.uses_identifier("cuckooreport")

@property
def uses_cortex(self):
""" Tells if any expression uses the Cortex report. Overrides base
class variable with a dynamic determination. """
return self.uses_identifier("cortexreport")

def resolve_identifier(self, identifier, context, sample):
""" Resolves a missing identifer into an object.
@param identifer: Name of identifer to resolve.
@type identifier: string
@returns: object or None if identifier is unknown.
"""
if identifier == "cuckooreport":
logger.debug("Expression requests cuckoo report")
value = self.get_cuckoo_report(sample)
if value is None:
return self.result(
Result.failed,
_("Evaluation of expression couldn't get cuckoo "
"report."),
False)
elif identifier == "olereport":
logger.debug("Expression requests oletools report")
value = self.get_oletools_report(sample)
elif identifier == "filereport":
logger.debug("Expression requests filetools report")
value = self.get_filetools_report(sample)
elif identifier == "knownreport":
logger.debug("Expression requests knowntools report")
value = self.get_knowntools_report(sample)
elif identifier == "cortexreport":
logger.debug("Expression requests cortex report")
value = self.get_cortex_report(sample)
if value is None:
return self.result(
Result.failed,
_("Evaluation of expression couldn't get Cortex "
"report."),
False)
# elif here for other identifiers
else:
return self.result(
Result.failed,
_("Evaluation of expression uses undefined identifier."), False)

context['variables'][identifier] = value
return None

def evaluate(self, sample):
""" Match what rules report against our known result status names. """
for ruleno, expression in enumerate(self.expressions):
Expand All @@ -632,42 +734,32 @@ def evaluate(self, sample):
# retry until expression evaluation doesn't throw exceptions any
# more
while True:
identifier = None
cortex_analyzer = None
try:
result = expression.eval(context=context)
break
except IdentifierMissingException as missing:
identifier = missing.name

if identifier == "cuckooreport":
logger.debug("Expression requests cuckoo report")
value = self.get_cuckoo_report(sample)
if value is None:
return self.result(
Result.failed,
_("Evaluation of expression couldn't get cuckoo "
"report."),
False)
elif identifier == "olereport":
logger.debug("Expression requests oletools report")
value = self.get_oletools_report(sample)
elif identifier == "filereport":
logger.debug("Expression requests filetools report")
value = self.get_filetools_report(sample)
elif identifier == "knownreport":
logger.debug("Expression requests knowntools report")
value = self.get_knowntools_report(sample)
elif identifier == "cortexreport":
logger.debug("Expression requests cortextools report")
value = self.get_cortextools_report(sample)
# elif here for other identifiers
else:
except CortexAnalyzerReportMissingException as missing:
cortex_analyzer = missing.analyzer

if identifier is not None:
result = self.resolve_identifier(
identifier, context, sample)
if result is not None:
return result

if cortex_analyzer is not None:
self.submit_to_cortex(sample, cortex_analyzer)
# submission either raises an exception or has failed, so
# getting here is an error
return self.result(
Result.failed,
_("Evaluation of expression uses undefined "
"identifier."),
_("Evaluation of expression failed to submit Cortex "
"analysis."),
False)

context['variables'][identifier] = value
# beware: here we intentionally loop on through for retry

# our implication returns None if expression did not match
Expand Down
Loading

0 comments on commit 15a0321

Please sign in to comment.