diff --git a/sql/data_masking.py b/sql/data_masking.py index dba9d274..abb76cc8 100644 --- a/sql/data_masking.py +++ b/sql/data_masking.py @@ -145,9 +145,19 @@ def analy_query_tree(self, query_tree, cluster_name): hit_columns = [] # 命中列 table_hit_columns = [] # 涉及表命中的列 - # 获取select信息的规则,仅处理type为FIELD_ITEM的select信息,如[*],[*,column_a],[column_a,*],[column_a,a.*,column_b],[a.*,column_a,b.*], - select_index = [select_item['field'] for select_item in select_list if - select_item['type'] == 'FIELD_ITEM'] + # 判断是否存在不支持脱敏的语法 + for select_item in select_list: + if select_item['type'] not in ('FIELD_ITEM', 'aggregate'): + raise Exception('不支持该查询语句脱敏!') + + # 获取select信息的规则,仅处理type为FIELD_ITEM和aggregate类型的select信息,如[*],[*,column_a],[column_a,*],[column_a,a.*,column_b],[a.*,column_a,b.*], + select_index = [ + select_item['field'] if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate']['field'] for + select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] + + # 处理select_list,为统一的{'type': 'FIELD_ITEM', 'db': 'archer_master', 'table': 'sql_users', 'field': 'email'}格式 + select_list = [select_item if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate'] for + select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] if select_index: # 如果发现存在field='*',则遍历所有表,找出所有的命中字段 @@ -163,86 +173,61 @@ def analy_query_tree(self, query_tree, cluster_name): elif re.match(r"^(\*,)+(\w,?)+$", ','.join(select_index)): # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取后切片 for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - item['index'] = index - len(select_list) - if item['field'] != '*': - columns.append(item) - - for column in columns: - hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column['db'], - column['table'], column['field']) - if hit_info['is_hit']: - hit_info['index'] = column['index'] - hit_columns.append(hit_info) + item['index'] = index - len(select_list) + if item['field'] != '*': + columns.append(item) + # [column_a, *] elif re.match(r"^(\w,?)+(\*,?)+$", ','.join(select_index)): # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取前切片 for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - item['index'] = index - if item['field'] != '*': - columns.append(item) - - for column in columns: - hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column['db'], - column['table'], column['field']) - if hit_info['is_hit']: - hit_info['index'] = column['index'] - hit_columns.append(hit_info) + item['index'] = index + if item['field'] != '*': + columns.append(item) + # [column_a,a.*,column_b] elif re.match(r"^(\w,?)+(\*,?)+(\w,?)+$", ','.join(select_index)): # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,*前面的字段index采取前切片,*后面的字段采取后切片 for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - item['index'] = index - if item['field'] == '*': - first_idx = index - break + item['index'] = index + if item['field'] == '*': + first_idx = index + break select_list.reverse() for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - item['index'] = index - if item['field'] == '*': - last_idx = len(select_list) - index - 1 - break + item['index'] = index + if item['field'] == '*': + last_idx = len(select_list) - index - 1 + break select_list.reverse() for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - if item['field'] != '*' and index < first_idx: - item['index'] = index - columns.append(item) - - if item['field'] != '*' and index > last_idx: - item['index'] = index - len(select_list) - columns.append(item) - - for column in columns: - hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column['db'], - column['table'], column['field']) - if hit_info['is_hit']: - hit_info['index'] = column['index'] - hit_columns.append(hit_info) + if item['field'] != '*' and index < first_idx: + item['index'] = index + + if item['field'] != '*' and index > last_idx: + item['index'] = index - len(select_list) + columns.append(item) # [a.*, column_a, b.*] else: - hit_columns = [] - return table_hit_columns, hit_columns + raise Exception('不支持select信息为[a.*, column_a, b.*]格式的查询脱敏!') + # 没有*的查询,直接遍历查询命中字段,query_tree的列index就是查询语句列的index else: for index, item in enumerate(select_list): - if item['type'] == 'FIELD_ITEM': - item['index'] = index - if item['field'] != '*': - columns.append(item) + item['index'] = index + if item['field'] != '*': + columns.append(item) - for column in columns: - hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column['db'], column['table'], - column['field']) - if hit_info['is_hit']: - hit_info['index'] = column['index'] - hit_columns.append(hit_info) + # 格式化命中的列信息 + for column in columns: + hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column['db'], column['table'], + column['field']) + if hit_info['is_hit']: + hit_info['index'] = column['index'] + hit_columns.append(hit_info) return table_hit_columns, hit_columns # 判断字段是否命中脱敏规则,如果命中则返回脱敏的规则id和规则类型 diff --git a/sql/extend_json_encoder.py b/sql/extend_json_encoder.py index 6b38149e..7c45cba6 100644 --- a/sql/extend_json_encoder.py +++ b/sql/extend_json_encoder.py @@ -1,7 +1,7 @@ # -*- coding: UTF-8 -*- import simplejson as json -from datetime import datetime, date +from datetime import datetime, date, timedelta from decimal import Decimal from functools import singledispatch @@ -36,6 +36,10 @@ def _(o): return o.strftime('%Y-%m-%d') +@convert.register(timedelta) +def _(o): + return o.total_seconds() + # @convert.register(Decimal) # def _(o): # return float(o) @@ -59,7 +63,9 @@ def default(self, obj): 'dm': dm, 'dt': dt, 'dat': dat, + 'tl': timedelta(minutes=30), 'bigint': 988983860501598208 } -#print(json.dumps(data, cls=ExtendJSONEncoder, bigint_as_string=True)) +# print(json.dumps(data, cls=ExtendJSONEncoder, bigint_as_string=True)) +# print(json.dumps(data, cls=ExtendJSONEncoder, bigint_as_string=True, default=str)) diff --git a/sql/models.py b/sql/models.py index ee59d8dd..a6aa7d3f 100644 --- a/sql/models.py +++ b/sql/models.py @@ -290,7 +290,7 @@ class Meta: # SlowQuery class SlowQuery(models.Model): - checksum = models.BigIntegerField(primary_key=True) + checksum = models.CharField(max_length=32, primary_key=True) fingerprint = models.TextField() sample = models.TextField() first_seen = models.DateTimeField(blank=True, null=True) @@ -408,6 +408,7 @@ class SlowQueryHistory(models.Model): class Meta: managed = False db_table = 'mysql_slow_query_review_history' - unique_together = ('hostname_max', 'ts_min') + unique_together = ('checksum', 'ts_min', 'ts_max') + index_together = ('hostname_max', 'ts_min') verbose_name = u'慢日志明细' verbose_name_plural = u'慢日志明细' diff --git a/sql/query.py b/sql/query.py index 517f21e5..298e1347 100644 --- a/sql/query.py +++ b/sql/query.py @@ -945,7 +945,7 @@ def slowquery_review_history(request): # 获取慢查明细数据 slowsql_record_obj = SlowQueryHistory.objects.filter( hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), - checksum=int(SQLId), + checksum=SQLId, ts_min__range=(StartTime, EndTime) ).annotate(ExecutionStartTime=F('ts_min'), # 执行开始时间 DBName=F('db_max'), # 数据库名 @@ -962,7 +962,7 @@ def slowquery_review_history(request): slowsql_obj_count = SlowQueryHistory.objects.filter( hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), - checksum=int(SQLId), + checksum=SQLId, ts_min__range=(StartTime, EndTime) ).count() else: diff --git a/src/script/mysql_slow_query_review.sql b/src/script/mysql_slow_query_review.sql index 3813698e..f00d65e6 100644 --- a/src/script/mysql_slow_query_review.sql +++ b/src/script/mysql_slow_query_review.sql @@ -1,5 +1,5 @@ CREATE TABLE `mysql_slow_query_review` ( - `checksum` bigint(20) unsigned NOT NULL, + `checksum` CHAR(32) NOT NULL, `fingerprint` longtext NOT NULL, `sample` longtext NOT NULL, `first_seen` datetime(6) DEFAULT NULL, @@ -18,97 +18,97 @@ CREATE TABLE `mysql_slow_query_review_history` ( `client_max` varchar(64) DEFAULT NULL, `user_max` varchar(64) NOT NULL, `db_max` varchar(64) DEFAULT NULL, - `checksum` bigint(20) unsigned NOT NULL, + `checksum` CHAR(32) NOT NULL, `sample` longtext NOT NULL, `ts_min` datetime(6) NOT NULL, `ts_max` datetime(6) NOT NULL, - `ts_cnt` double DEFAULT NULL, - `Query_time_sum` double DEFAULT NULL, - `Query_time_min` double DEFAULT NULL, - `Query_time_max` double DEFAULT NULL, - `Query_time_pct_95` double DEFAULT NULL, - `Query_time_stddev` double DEFAULT NULL, - `Query_time_median` double DEFAULT NULL, - `Lock_time_sum` double DEFAULT NULL, - `Lock_time_min` double DEFAULT NULL, - `Lock_time_max` double DEFAULT NULL, - `Lock_time_pct_95` double DEFAULT NULL, - `Lock_time_stddev` double DEFAULT NULL, - `Lock_time_median` double DEFAULT NULL, - `Rows_sent_sum` double DEFAULT NULL, - `Rows_sent_min` double DEFAULT NULL, - `Rows_sent_max` double DEFAULT NULL, - `Rows_sent_pct_95` double DEFAULT NULL, - `Rows_sent_stddev` double DEFAULT NULL, - `Rows_sent_median` double DEFAULT NULL, - `Rows_examined_sum` double DEFAULT NULL, - `Rows_examined_min` double DEFAULT NULL, - `Rows_examined_max` double DEFAULT NULL, - `Rows_examined_pct_95` double DEFAULT NULL, - `Rows_examined_stddev` double DEFAULT NULL, - `Rows_examined_median` double DEFAULT NULL, - `Rows_affected_sum` double DEFAULT NULL, - `Rows_affected_min` double DEFAULT NULL, - `Rows_affected_max` double DEFAULT NULL, - `Rows_affected_pct_95` double DEFAULT NULL, - `Rows_affected_stddev` double DEFAULT NULL, - `Rows_affected_median` double DEFAULT NULL, - `Rows_read_sum` double DEFAULT NULL, - `Rows_read_min` double DEFAULT NULL, - `Rows_read_max` double DEFAULT NULL, - `Rows_read_pct_95` double DEFAULT NULL, - `Rows_read_stddev` double DEFAULT NULL, - `Rows_read_median` double DEFAULT NULL, - `Merge_passes_sum` double DEFAULT NULL, - `Merge_passes_min` double DEFAULT NULL, - `Merge_passes_max` double DEFAULT NULL, - `Merge_passes_pct_95` double DEFAULT NULL, - `Merge_passes_stddev` double DEFAULT NULL, - `Merge_passes_median` double DEFAULT NULL, - `InnoDB_IO_r_ops_min` double DEFAULT NULL, - `InnoDB_IO_r_ops_max` double DEFAULT NULL, - `InnoDB_IO_r_ops_pct_95` double DEFAULT NULL, - `InnoDB_IO_r_ops_stddev` double DEFAULT NULL, - `InnoDB_IO_r_ops_median` double DEFAULT NULL, - `InnoDB_IO_r_bytes_min` double DEFAULT NULL, - `InnoDB_IO_r_bytes_max` double DEFAULT NULL, - `InnoDB_IO_r_bytes_pct_95` double DEFAULT NULL, - `InnoDB_IO_r_bytes_stddev` double DEFAULT NULL, - `InnoDB_IO_r_bytes_median` double DEFAULT NULL, - `InnoDB_IO_r_wait_min` double DEFAULT NULL, - `InnoDB_IO_r_wait_max` double DEFAULT NULL, - `InnoDB_IO_r_wait_pct_95` double DEFAULT NULL, - `InnoDB_IO_r_wait_stddev` double DEFAULT NULL, - `InnoDB_IO_r_wait_median` double DEFAULT NULL, - `InnoDB_rec_lock_wait_min` double DEFAULT NULL, - `InnoDB_rec_lock_wait_max` double DEFAULT NULL, - `InnoDB_rec_lock_wait_pct_95` double DEFAULT NULL, - `InnoDB_rec_lock_wait_stddev` double DEFAULT NULL, - `InnoDB_rec_lock_wait_median` double DEFAULT NULL, - `InnoDB_queue_wait_min` double DEFAULT NULL, - `InnoDB_queue_wait_max` double DEFAULT NULL, - `InnoDB_queue_wait_pct_95` double DEFAULT NULL, - `InnoDB_queue_wait_stddev` double DEFAULT NULL, - `InnoDB_queue_wait_median` double DEFAULT NULL, - `InnoDB_pages_distinct_min` double DEFAULT NULL, - `InnoDB_pages_distinct_max` double DEFAULT NULL, - `InnoDB_pages_distinct_pct_95` double DEFAULT NULL, - `InnoDB_pages_distinct_stddev` double DEFAULT NULL, - `InnoDB_pages_distinct_median` double DEFAULT NULL, - `QC_Hit_cnt` double DEFAULT NULL, - `QC_Hit_sum` double DEFAULT NULL, - `Full_scan_cnt` double DEFAULT NULL, - `Full_scan_sum` double DEFAULT NULL, - `Full_join_cnt` double DEFAULT NULL, - `Full_join_sum` double DEFAULT NULL, - `Tmp_table_cnt` double DEFAULT NULL, - `Tmp_table_sum` double DEFAULT NULL, - `Tmp_table_on_disk_cnt` double DEFAULT NULL, - `Tmp_table_on_disk_sum` double DEFAULT NULL, - `Filesort_cnt` double DEFAULT NULL, - `Filesort_sum` double DEFAULT NULL, - `Filesort_on_disk_cnt` double DEFAULT NULL, - `Filesort_on_disk_sum` double DEFAULT NULL, + `ts_cnt` float DEFAULT NULL, + `Query_time_sum` float DEFAULT NULL, + `Query_time_min` float DEFAULT NULL, + `Query_time_max` float DEFAULT NULL, + `Query_time_pct_95` float DEFAULT NULL, + `Query_time_stddev` float DEFAULT NULL, + `Query_time_median` float DEFAULT NULL, + `Lock_time_sum` float DEFAULT NULL, + `Lock_time_min` float DEFAULT NULL, + `Lock_time_max` float DEFAULT NULL, + `Lock_time_pct_95` float DEFAULT NULL, + `Lock_time_stddev` float DEFAULT NULL, + `Lock_time_median` float DEFAULT NULL, + `Rows_sent_sum` float DEFAULT NULL, + `Rows_sent_min` float DEFAULT NULL, + `Rows_sent_max` float DEFAULT NULL, + `Rows_sent_pct_95` float DEFAULT NULL, + `Rows_sent_stddev` float DEFAULT NULL, + `Rows_sent_median` float DEFAULT NULL, + `Rows_examined_sum` float DEFAULT NULL, + `Rows_examined_min` float DEFAULT NULL, + `Rows_examined_max` float DEFAULT NULL, + `Rows_examined_pct_95` float DEFAULT NULL, + `Rows_examined_stddev` float DEFAULT NULL, + `Rows_examined_median` float DEFAULT NULL, + `Rows_affected_sum` float DEFAULT NULL, + `Rows_affected_min` float DEFAULT NULL, + `Rows_affected_max` float DEFAULT NULL, + `Rows_affected_pct_95` float DEFAULT NULL, + `Rows_affected_stddev` float DEFAULT NULL, + `Rows_affected_median` float DEFAULT NULL, + `Rows_read_sum` float DEFAULT NULL, + `Rows_read_min` float DEFAULT NULL, + `Rows_read_max` float DEFAULT NULL, + `Rows_read_pct_95` float DEFAULT NULL, + `Rows_read_stddev` float DEFAULT NULL, + `Rows_read_median` float DEFAULT NULL, + `Merge_passes_sum` float DEFAULT NULL, + `Merge_passes_min` float DEFAULT NULL, + `Merge_passes_max` float DEFAULT NULL, + `Merge_passes_pct_95` float DEFAULT NULL, + `Merge_passes_stddev` float DEFAULT NULL, + `Merge_passes_median` float DEFAULT NULL, + `InnoDB_IO_r_ops_min` float DEFAULT NULL, + `InnoDB_IO_r_ops_max` float DEFAULT NULL, + `InnoDB_IO_r_ops_pct_95` float DEFAULT NULL, + `InnoDB_IO_r_ops_stddev` float DEFAULT NULL, + `InnoDB_IO_r_ops_median` float DEFAULT NULL, + `InnoDB_IO_r_bytes_min` float DEFAULT NULL, + `InnoDB_IO_r_bytes_max` float DEFAULT NULL, + `InnoDB_IO_r_bytes_pct_95` float DEFAULT NULL, + `InnoDB_IO_r_bytes_stddev` float DEFAULT NULL, + `InnoDB_IO_r_bytes_median` float DEFAULT NULL, + `InnoDB_IO_r_wait_min` float DEFAULT NULL, + `InnoDB_IO_r_wait_max` float DEFAULT NULL, + `InnoDB_IO_r_wait_pct_95` float DEFAULT NULL, + `InnoDB_IO_r_wait_stddev` float DEFAULT NULL, + `InnoDB_IO_r_wait_median` float DEFAULT NULL, + `InnoDB_rec_lock_wait_min` float DEFAULT NULL, + `InnoDB_rec_lock_wait_max` float DEFAULT NULL, + `InnoDB_rec_lock_wait_pct_95` float DEFAULT NULL, + `InnoDB_rec_lock_wait_stddev` float DEFAULT NULL, + `InnoDB_rec_lock_wait_median` float DEFAULT NULL, + `InnoDB_queue_wait_min` float DEFAULT NULL, + `InnoDB_queue_wait_max` float DEFAULT NULL, + `InnoDB_queue_wait_pct_95` float DEFAULT NULL, + `InnoDB_queue_wait_stddev` float DEFAULT NULL, + `InnoDB_queue_wait_median` float DEFAULT NULL, + `InnoDB_pages_distinct_min` float DEFAULT NULL, + `InnoDB_pages_distinct_max` float DEFAULT NULL, + `InnoDB_pages_distinct_pct_95` float DEFAULT NULL, + `InnoDB_pages_distinct_stddev` float DEFAULT NULL, + `InnoDB_pages_distinct_median` float DEFAULT NULL, + `QC_Hit_cnt` float DEFAULT NULL, + `QC_Hit_sum` float DEFAULT NULL, + `Full_scan_cnt` float DEFAULT NULL, + `Full_scan_sum` float DEFAULT NULL, + `Full_join_cnt` float DEFAULT NULL, + `Full_join_sum` float DEFAULT NULL, + `Tmp_table_cnt` float DEFAULT NULL, + `Tmp_table_sum` float DEFAULT NULL, + `Tmp_table_on_disk_cnt` float DEFAULT NULL, + `Tmp_table_on_disk_sum` float DEFAULT NULL, + `Filesort_cnt` float DEFAULT NULL, + `Filesort_sum` float DEFAULT NULL, + `Filesort_on_disk_cnt` float DEFAULT NULL, + `Filesort_on_disk_sum` float DEFAULT NULL, `Bytes_sum` float DEFAULT NULL, `Bytes_min` float DEFAULT NULL, `Bytes_max` float DEFAULT NULL,