From 0be35ff7e036a92707a7fbd897f44b37f2759271 Mon Sep 17 00:00:00 2001 From: lonnen Date: Fri, 19 Jan 2018 19:54:32 -0800 Subject: [PATCH] Remove rule_sets construct Restrict the processor to a single array of rules. This is similar to the structure of the signature utilities, but it instantiates the rules with configuration. This moves rules out of configuration, and out of the required_config population change. To keep the rules happy, all necessary configuration for rules has moved to the Processor_2015 class and is passed to rules at initialization in the `config` variable. Required config options are left on the rules to support testing. --- socorro/processor/breakpad_transform_rules.py | 2 +- socorro/processor/processor_2015.py | 356 +++++++++--------- .../unittest/processor/test_processor_2015.py | 166 +++----- 3 files changed, 232 insertions(+), 292 deletions(-) diff --git a/socorro/processor/breakpad_transform_rules.py b/socorro/processor/breakpad_transform_rules.py index d55f6ef7d1..14ae07944e 100644 --- a/socorro/processor/breakpad_transform_rules.py +++ b/socorro/processor/breakpad_transform_rules.py @@ -352,7 +352,7 @@ def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): dump_file_pathname = raw_dumps[dump_name] - if self.config.chatty: + if self.config.get('chatty'): self.config.logger.debug( "BreakpadStackwalkerRule2015: %s, %s", dump_name, diff --git a/socorro/processor/processor_2015.py b/socorro/processor/processor_2015.py index 514b8611f4..744233def8 100644 --- a/socorro/processor/processor_2015.py +++ b/socorro/processor/processor_2015.py @@ -7,130 +7,195 @@ as sets of loadable rules. The rules are applied one at a time, each doing some small part of the transformation process.""" -import ujson +import os +import tempfile -from configman import Namespace, RequiredConfig, class_converter -from configman.dotdict import DotDict as OrderedDotDict +from configman import ( + Namespace, + RequiredConfig, +) +from configman.converters import str_to_list, str_to_python_object +from socorro.lib.converters import change_default from socorro.lib.datetimeutil import utc_now from socorro.lib.util import DotDict -# Rule sets are defined as lists of lists (or tuples). As they will be loaded -# from json, they will always come in a lists rather than tuples. Arguably, -# tuples may be more appropriate, but really, they can be anything iterable. +from socorro.processor.breakpad_transform_rules import ( + BreakpadStackwalkerRule2015, + CrashingThreadRule, + ExternalProcessRule, # imported to override config, not instantiated + JitCrashCategorizeRule, +) + +from socorro.processor.general_transform_rules import ( + CPUInfoRule, + IdentifierRule, + OSInfoRule, +) + +from socorro.processor.rules.memory_report_extraction import ( + MemoryReportExtraction, +) + +from socorro.processor.mozilla_transform_rules import ( + AddonsRule, + AuroraVersionFixitRule, + BetaVersionRule, + DatesAndTimesRule, + EnvironmentRule, + ESRVersionRewrite, + ExploitablityRule, + FlashVersionRule, + JavaProcessRule, + MissingSymbolsRule, + OSPrettyVersionRule, + OutOfMemoryBinaryRule, + PluginContentURL, + PluginRule, + PluginUserComment, + ProductRewrite, + ProductRule, + SignatureGeneratorRule, + ThemePrettyNameRule, + TopMostFilesRule, + UserDataRule, + Winsock_LSPRule, +) -# The outermost sequence is a list of rule sets. There can be any number of -# them and can be organized at will. The example below shows an organization -# by processing stage: pre-processing the raw_crash, converter raw to -# processed, and post-processing the processed_crash. -# Each rule set is defined by five elements: -# rule name: any useful string -# tag: a categorization system, programmer defined system (for future) -# rule set class: the fully qualified name of the class that implements -# the rule application process. On the introduction of -# Processor2015, the only option is the one in the example. -# rule list: a comma delimited list of fully qualified class names that -# implement the individual transformation rules. The API that -# these classes must conform to is defined by the rule base class -# socorro.lib.transform_rules.Rule default_rule_set = [ - [ # rules to change the internals of the raw crash - "transform_rules", - "socorro.processor.mozilla_transform_rules.ProductRewrite," - "socorro.processor.mozilla_transform_rules.ESRVersionRewrite," - "socorro.processor.mozilla_transform_rules.PluginContentURL," - "socorro.processor.mozilla_transform_rules.PluginUserComment," - # rules to transform a raw crash into a processed crash - "socorro.processor.general_transform_rules.IdentifierRule, " - "socorro.processor.breakpad_transform_rules.BreakpadStackwalkerRule2015, " - "socorro.processor.mozilla_transform_rules.ProductRule, " - "socorro.processor.mozilla_transform_rules.UserDataRule, " - "socorro.processor.mozilla_transform_rules.EnvironmentRule, " - "socorro.processor.mozilla_transform_rules.PluginRule, " - "socorro.processor.mozilla_transform_rules.AddonsRule, " - "socorro.processor.mozilla_transform_rules.DatesAndTimesRule, " - "socorro.processor.mozilla_transform_rules.OutOfMemoryBinaryRule, " - "socorro.processor.mozilla_transform_rules.JavaProcessRule, " - "socorro.processor.mozilla_transform_rules.Winsock_LSPRule, " - # post processing of the processed crash - "socorro.processor.breakpad_transform_rules.CrashingThreadRule, " - "socorro.processor.general_transform_rules.CPUInfoRule, " - "socorro.processor.general_transform_rules.OSInfoRule, " - "socorro.processor.mozilla_transform_rules.BetaVersionRule, " - "socorro.processor.mozilla_transform_rules.ExploitablityRule, " - "socorro.processor.mozilla_transform_rules.AuroraVersionFixitRule, " - "socorro.processor.mozilla_transform_rules.FlashVersionRule, " - "socorro.processor.mozilla_transform_rules.OSPrettyVersionRule, " - "socorro.processor.mozilla_transform_rules.TopMostFilesRule, " - "socorro.processor.mozilla_transform_rules.ThemePrettyNameRule, " - "socorro.processor.rules.memory_report_extraction.MemoryReportExtraction, " - # a set of classifiers to help with jit crashes - "socorro.processor.breakpad_transform_rules.JitCrashCategorizeRule, " - # generate signature now that we've done all the processing it depends on - 'socorro.processor.mozilla_transform_rules.SignatureGeneratorRule, ' - ] + # rules to change the internals of the raw crash + ProductRewrite, + ESRVersionRewrite, + PluginContentURL, + PluginUserComment, + # rules to transform a raw crash into a processed crash + IdentifierRule, + BreakpadStackwalkerRule2015, + ProductRule, + UserDataRule, + EnvironmentRule, + PluginRule, + AddonsRule, + DatesAndTimesRule, + OutOfMemoryBinaryRule, + JavaProcessRule, + Winsock_LSPRule, + # post processing of the processed crash + CrashingThreadRule, + CPUInfoRule, + OSInfoRule, + BetaVersionRule, + ExploitablityRule, + AuroraVersionFixitRule, + FlashVersionRule, + OSPrettyVersionRule, + TopMostFilesRule, + ThemePrettyNameRule, + MemoryReportExtraction, + # a set of classifiers to help with jit crashes + JitCrashCategorizeRule, + # generate signature now that we've done all the processing it depends on + SignatureGeneratorRule, ] -# rules come into Socorro via Configman. Configman defines them as strings -# conveniently, a json module can be used to serialize and deserialize them. -default_rules_set_str = ujson.dumps(default_rule_set) - - -def rule_sets_from_string(rule_sets_as_string): - """this configman converter takes a json file in the form of a string, - and converts it into rules sets for use in the processor. See the - default rule set above for the form.""" - rule_sets = ujson.loads(rule_sets_as_string) - - class ProcessorRuleSets(RequiredConfig): - # why do rules come in sets? Why not just have a big list of rules? - # rule sets are containers for rules with a similar purpose and - # execution mode. For example, there are rule sets for adjusting the - # raw_crash, transforming raw to processed, post processing the - # processed_crash and then all the different forms of classifiers. - # Some rule sets have different execution modes: run all the rules, - # run the rules until one fails, run the rules until one succeeds, - # etc. - required_config = Namespace() - - names = [] - for (name, default_rules_str) in rule_sets: - #name = "raw_to_processed_transform" - names.append(name) - required_config.namespace(name) - - required_config[name].add_option( - name='rules_list', - doc='a list of fully qualified class names for the rules', - default=default_rules_str, - from_string_converter=str_to_classes_in_namespaces_converter(), - likely_to_be_changed=True, - ) - - @classmethod - def to_str(klass): - return "'%s'" % rule_sets_as_string - - return ProcessorRuleSets - class Processor2015(RequiredConfig): """this class is a generalization of the Processor into a rule processing framework. This class is suitable for use in the 'processor_app' introducted in 2012.""" - required_config = Namespace() + required_config = Namespace('transform_rules') + required_config.add_option( + 'database_class', + doc="the class of the database", + default='socorro.external.postgresql.connection_context.' + 'ConnectionContext', + from_string_converter=str_to_python_object, + reference_value_from='resource.postgresql', + ) + required_config.add_option( + 'transaction_executor_class', + default="socorro.database.transaction_executor." + "TransactionExecutorWithInfiniteBackoff", + doc='a class that will manage transactions', + from_string_converter=str_to_python_object, + reference_value_from='resource.postgresql', + ) + required_config.add_option( + 'dump_field', + doc='the default name of a dump', + default='upload_file_minidump', + ) + required_config.command_pathname = change_default( + ExternalProcessRule, + 'command_pathname', + # NOTE(willkg): This is the path for the RPM-based Socorro deploy. When + # we switch to Docker, we should change this. + '/data/socorro/stackwalk/bin/stackwalker', + ) + required_config.add_option( + 'result_key', + doc='the key where the external process result should be stored ' + 'in the processed crash', + default='%s_result' % + required_config.command_pathname.default.split('/')[-1] + .replace('-', ''), + ) + required_config.add_option( + 'return_code_key', + doc='the key where the external process return code should be stored ' + 'in the processed crash', + default='%s_return_code' % + required_config.command_pathname.default.split('/')[-1], + ) + required_config.add_option( + name='symbols_urls', + doc='comma delimited ordered list of urls for symbol lookup', + default='https://localhost', + from_string_converter=str_to_list, + likely_to_be_changed=True + ) + required_config.command_line = change_default( + ExternalProcessRule, + 'command_line', + 'timeout -s KILL {kill_timeout} {command_pathname} ' + '--raw-json {raw_crash_pathname} ' + '{symbols_urls} ' + '--symbols-cache {symbol_cache_path} ' + '--symbols-tmp {symbol_tmp_path} ' + '{dump_file_pathname} ' + '2> /dev/null' + ) + required_config.add_option( + 'kill_timeout', + doc='amount of time to let mdsw run before declaring it hung', + default=600 + ) required_config.add_option( - name='rule_sets', - doc="a hierarchy of rules in json form", - default=default_rules_set_str, - from_string_converter=rule_sets_from_string, - likely_to_be_changed=True, + 'symbol_tmp_path', + doc=( + 'directory to use as temp space for downloading symbols--must be ' + 'on the same filesystem as symbols-cache' + ), + default=os.path.join(tempfile.gettempdir(), 'symbols-tmp'), + ), + required_config.add_option( + 'symbol_cache_path', + doc=( + 'the path where the symbol cache is found, this location must be ' + 'readable and writeable (quote path with embedded spaces)' + ), + default=os.path.join(tempfile.gettempdir(), 'symbols'), + ) + required_config.add_option( + 'temporary_file_system_storage_path', + doc='a path where temporary files may be written', + default=tempfile.gettempdir(), ) - def __init__(self, config, quit_check_callback=None): + def __init__(self, config, rules=None, quit_check_callback=None): super(Processor2015, self).__init__() self.config = config # the quit checks are components of a system of callbacks used @@ -148,29 +213,11 @@ def __init__(self, config, quit_check_callback=None): else: self.quit_check = lambda: False - self.rule_system = OrderedDotDict() - self.rules = [] - - # here we instantiate the rule sets and their rules. - if 'transform_rules' not in config: - return # no rules - - self.config.logger.debug('setting up transform_rules') - - # begin TRS insert - trs_config = config['transform_rules'] + rule_set = rules or list(default_rule_set) - list_of_rules = trs_config.rules_list.class_list - - for a_rule_class_name, a_rule_class, ns_name in list_of_rules: - try: - self.rules.append( - a_rule_class(trs_config[ns_name]) - ) - except KeyError: - self.rules.append( - a_rule_class(trs_config) - ) + self.rules = [] + for a_rule_class in rule_set: + self.rules.append(a_rule_class(config)) def process_crash(self, raw_crash, raw_dumps, processed_crash): """Take a raw_crash and its associated raw_dumps and return a @@ -287,60 +334,3 @@ def close(self): # no close method mean no need to close continue close_method() - - -def str_to_classes_in_namespaces_converter(): - - def class_list_converter(class_list_str): - """This function becomes the actual converter used by configman to - take a string and convert it into the nested sequence of Namespaces, - one for each class in the list. It does this by creating a proxy - class stuffed with its own 'required_config' that's dynamically - generated.""" - if isinstance(class_list_str, basestring): - class_str_list = [] - for class_str_padded in class_list_str.split(','): - class_str = class_str_padded.strip() - if class_str: - class_str_list.append(class_str) - - else: - raise TypeError('must be derivative of a basestring') - - class InnerClassList(RequiredConfig): - """This nested class is a proxy list for the classes. It collects - all the config requirements for the listed classes and places them - each into their own Namespace. - """ - # we're dynamically creating a class here. The following block of - # code is actually adding class level attributes to this new class - - # 1st requirement for configman - required_config = Namespace() - - # to help the programmer know what Namespaces we added - subordinate_namespace_names = [] - - # for each class in the class list - class_list = [] - for class_list_element in class_str_list: - a_class = class_converter(class_list_element) - - # figure out the Namespace name - namespace_name = a_class.__name__ - class_list.append((namespace_name, a_class, namespace_name)) - subordinate_namespace_names.append(namespace_name) - # create the new Namespace - required_config.namespace(namespace_name) - a_class_namespace = required_config[namespace_name] - a_class_namespace.add_option( - 'qualified_class_name', - doc='fully qualified classname', - default=class_list_element, - from_string_converter=class_converter, - likely_to_be_changed=True, - ) - - return InnerClassList # result of class_list_converter - - return class_list_converter # result of classes_in_namespaces_converter diff --git a/socorro/unittest/processor/test_processor_2015.py b/socorro/unittest/processor/test_processor_2015.py index 0d569c0ccf..9c0a73f601 100644 --- a/socorro/unittest/processor/test_processor_2015.py +++ b/socorro/unittest/processor/test_processor_2015.py @@ -4,10 +4,7 @@ from configman.dotdict import DotDict from mock import Mock, patch -from socorro.processor.processor_2015 import ( - Processor2015, - rule_sets_from_string -) +from socorro.processor.processor_2015 import Processor2015 from socorro.processor.general_transform_rules import ( CPUInfoRule, OSInfoRule @@ -15,126 +12,32 @@ from socorro.unittest.testbase import TestCase -thread_over_255_chars = { - "frames": [ - { - "file": "nsTerminator.cpp:604367e1fa5e", - "frame": 0, - "function": ("mozilla::`anonymous namespace::" - "RunWatchdog(void *)" + "a" * 255), - "function_offset": "0x0", - "line": 151, - "module": "xul.dll", - "module_offset": "0x783f2b", - "offset": "0x67903f2b", - "registers": { - "eax": "0x0000003f", - "ebp": "0x163ff96c", - "ebx": "0x0cf44450", - "ecx": "0x691e3698", - "edi": "0x76d3f551", - "edx": "0x01dc1010", - "efl": "0x00000246", - "eip": "0x67903f2b", - "esi": "0x0000003f", - "esp": "0x163ff968" - }, - "trust": "context" - } - ] -} -stackwalker_output_str = ujson.dumps({ - "crash_info": { - "address": "0x0", - "crashing_thread": 0, - "type": "EXC_BAD_ACCESS / KERN_INVALID_ADDRESS" - }, - "status": "OK", - "crashing_thread": thread_over_255_chars, - "threads": [thread_over_255_chars], - "system_info": { - "os": "Windows NT", - "cpu_arch": "x86" - }, - "sensitive": { - "exploitability": "high" - }, -}) - - -rule_set_01 = [ - [ - 'transform_rules', - 'socorro.processor.general_transform_rules.CPUInfoRule, ' - 'socorro.processor.general_transform_rules.OSInfoRule ' - ] -] -rule_set_01_str = ujson.dumps(rule_set_01) - - class TestProcessor2015(TestCase): - def test_rule_sets_from_string_1(self): - rule_set_config = rule_sets_from_string(rule_set_01_str) - rc = rule_set_config.get_required_config() - assert 'transform_rules' in rc - expected = ( - 'socorro.processor.general_transform_rules.CPUInfoRule, ' - 'socorro.processor.general_transform_rules.OSInfoRule ' - ) - assert rc.transform_rules.rules_list.default == expected def test_Processor2015_init(self): cm = ConfigurationManager( definition_source=Processor2015.get_required_config(), - values_source_list=[{'rule_sets': rule_set_01_str}], + values_source_list=[], ) config = cm.get_config() config.logger = Mock() - p = Processor2015(config) - - assert isinstance(p.rule_system, DotDict) - assert len(p.rules) == 2 - assert isinstance(p.rules[0], CPUInfoRule) - assert isinstance(p.rules[1], OSInfoRule) - - def test_process_crash_no_rules(self): - cm = ConfigurationManager( - definition_source=Processor2015.get_required_config(), - values_source_list=[{'rule_sets': '[]'}], - ) - config = cm.get_config() - config.logger = Mock() - config.processor_name = 'dwight' - - p = Processor2015(config) - raw_crash = DotDict() - raw_dumps = {} - with patch('socorro.processor.processor_2015.utc_now') as faked_utcnow: - faked_utcnow.return_value = '2015-01-01T00:00:00' - processed_crash = p.process_crash( - raw_crash, - raw_dumps, - DotDict() - ) - - assert processed_crash.success - assert processed_crash.started_datetime == '2015-01-01T00:00:00' - assert processed_crash.startedDateTime == '2015-01-01T00:00:00' - assert processed_crash.completed_datetime == '2015-01-01T00:00:00' - assert processed_crash.completeddatetime == '2015-01-01T00:00:00' - assert processed_crash.processor_notes == 'dwight; Processor2015' + Processor2015(config) def test_process_crash_existing_processed_crash(self): cm = ConfigurationManager( definition_source=Processor2015.get_required_config(), - values_source_list=[{'rule_sets': '[]'}], + values_source_list=[], ) config = cm.get_config() config.logger = Mock() + config.processor_name = 'dwight' - p = Processor2015(config) + p = Processor2015(config, rules=[ + CPUInfoRule, + OSInfoRule, + ]) raw_crash = DotDict() raw_dumps = {} processed_crash = DotDict() @@ -154,13 +57,59 @@ def test_process_crash_existing_processed_crash(self): assert processed_crash.completed_datetime == '2015-01-01T00:00:00' assert processed_crash.completeddatetime == '2015-01-01T00:00:00' expected = ( - "dwight; Processor2015; earlier processing: 2014-01-01T00:00:00; we've been here " - "before; yep" + "dwight; Processor2015; earlier processing: 2014-01-01T00:00:00;" + " we've been here before; yep" ) assert processed_crash.processor_notes == expected @patch('socorro.processor.breakpad_transform_rules.subprocess') def test_process_over_255_chars(self, mocked_subprocess_module): + thread_over_255_chars = { + "frames": [ + { + "file": "nsTerminator.cpp:604367e1fa5e", + "frame": 0, + "function": ("mozilla::`anonymous namespace::" + "RunWatchdog(void *)" + "a" * 255), + "function_offset": "0x0", + "line": 151, + "module": "xul.dll", + "module_offset": "0x783f2b", + "offset": "0x67903f2b", + "registers": { + "eax": "0x0000003f", + "ebp": "0x163ff96c", + "ebx": "0x0cf44450", + "ecx": "0x691e3698", + "edi": "0x76d3f551", + "edx": "0x01dc1010", + "efl": "0x00000246", + "eip": "0x67903f2b", + "esi": "0x0000003f", + "esp": "0x163ff968" + }, + "trust": "context" + } + ] + } + stackwalker_output_str = ujson.dumps({ + "crash_info": { + "address": "0x0", + "crashing_thread": 0, + "type": "EXC_BAD_ACCESS / KERN_INVALID_ADDRESS" + }, + "status": "OK", + "crashing_thread": thread_over_255_chars, + "threads": [thread_over_255_chars], + "system_info": { + "os": "Windows NT", + "cpu_arch": "x86" + }, + "sensitive": { + "exploitability": "high" + }, + }) + cm = ConfigurationManager( definition_source=( Processor2015.get_required_config(), @@ -179,6 +128,7 @@ def test_process_over_255_chars(self, mocked_subprocess_module): stackwalker_output_str ) p = Processor2015(config) + raw_crash = DotDict({ "uuid": "00000000-0000-0000-0000-000002140504", "CrashTime": "1336519554",