From d3c23b035dc2193777fe39aa0e15912253db84a7 Mon Sep 17 00:00:00 2001 From: hhyo Date: Tue, 27 Nov 2018 21:14:27 +0800 Subject: [PATCH 1/5] fix #94 --- sql/query.py | 118 ++++++++++++++++++-------------------- sql/static/slowquery.html | 28 ++++++--- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/sql/query.py b/sql/query.py index 298e1347..56799f5b 100644 --- a/sql/query.py +++ b/sql/query.py @@ -3,7 +3,7 @@ import simplejson as json from django.core.urlresolvers import reverse -from django.db.models import Q, Min, F, Sum +from django.db.models import Q, Min, F, Sum, Max from django.db import connection from django.conf import settings from django.db.models.functions import Concat @@ -721,8 +721,12 @@ def query(request): query_log.save() # 返回查询结果 - return HttpResponse(json.dumps(finalResult, cls=ExtendJSONEncoder, bigint_as_string=True), - content_type='application/json') + try: + return HttpResponse(json.dumps(finalResult, cls=ExtendJSONEncoder, bigint_as_string=True), + content_type='application/json') + except Exception: + return HttpResponse(json.dumps(finalResult, default=str, bigint_as_string=True, encoding='latin1'), + content_type='application/json') # 获取sql查询记录 @@ -840,15 +844,11 @@ def slowquery_review(request): slowsql_obj = SlowQuery.objects.filter( slowqueryhistory__hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), slowqueryhistory__db_max=DBName, - slowqueryhistory__ts_min__range=(StartTime, EndTime), - last_seen__range=(StartTime, EndTime) - ).annotate(CreateTime=F('last_seen'), - SQLId=F('checksum'), - DBName=F('slowqueryhistory__db_max'), # 数据库 - SQLText=F('fingerprint'), # SQL语句 - ).values( - 'CreateTime', 'SQLId', 'DBName', 'SQLText' - ).annotate( + slowqueryhistory__ts_min__range=(StartTime, EndTime) + ).annotate(SQLText=F('fingerprint'), SQLId=F('checksum')).values('SQLText', 'SQLId').annotate( + CreateTime=Max('slowqueryhistory__ts_max'), + DBName=Max('slowqueryhistory__db_max'), # 数据库 + QueryTimeAvg=Sum('slowqueryhistory__query_time_sum') / Sum('slowqueryhistory__ts_cnt'), # 平均执行时长 MySQLTotalExecutionCounts=Sum('slowqueryhistory__ts_cnt'), # 执行总次数 MySQLTotalExecutionTimes=Sum('slowqueryhistory__query_time_sum'), # 执行总时长 ParseTotalRowCounts=Sum('slowqueryhistory__rows_examined_sum'), # 扫描总行数 @@ -858,15 +858,11 @@ def slowquery_review(request): slowsql_obj_count = SlowQuery.objects.filter( slowqueryhistory__hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), slowqueryhistory__db_max=DBName, - slowqueryhistory__ts_min__range=(StartTime, EndTime), - last_seen__range=(StartTime, EndTime) - ).annotate(CreateTime=F('last_seen'), - SQLId=F('checksum'), - DBName=F('slowqueryhistory__db_max'), # 数据库 - SQLText=F('fingerprint'), # SQL语句 - ).values( - 'CreateTime', 'SQLId', 'DBName', 'SQLText' - ).annotate( + slowqueryhistory__ts_min__range=(StartTime, EndTime) + ).annotate(SQLText=F('fingerprint'), SQLId=F('checksum')).values('SQLText', 'SQLId').annotate( + CreateTime=Max('slowqueryhistory__ts_max'), + DBName=Max('slowqueryhistory__db_max'), # 数据库 + QueryTimeAvg=Sum('slowqueryhistory__query_time_sum') / Sum('slowqueryhistory__ts_cnt'), # 平均执行时长 MySQLTotalExecutionCounts=Sum('slowqueryhistory__ts_cnt'), # 执行总次数 MySQLTotalExecutionTimes=Sum('slowqueryhistory__query_time_sum'), # 执行总时长 ParseTotalRowCounts=Sum('slowqueryhistory__rows_examined_sum'), # 扫描总行数 @@ -877,14 +873,10 @@ def slowquery_review(request): slowsql_obj = SlowQuery.objects.filter( slowqueryhistory__hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), slowqueryhistory__ts_min__range=(StartTime, EndTime), - last_seen__range=(StartTime, EndTime) - ).annotate(CreateTime=F('last_seen'), - SQLId=F('checksum'), - DBName=F('slowqueryhistory__db_max'), # 数据库 - SQLText=F('fingerprint'), # SQL语句 - ).values( - 'CreateTime', 'SQLId', 'DBName', 'SQLText' - ).annotate( + ).annotate(SQLText=F('fingerprint'), SQLId=F('checksum')).values('SQLText', 'SQLId').annotate( + CreateTime=Max('slowqueryhistory__ts_max'), + DBName=Max('slowqueryhistory__db_max'), # 数据库 + QueryTimeAvg=Sum('slowqueryhistory__query_time_sum') / Sum('slowqueryhistory__ts_cnt'), # 平均执行时长 MySQLTotalExecutionCounts=Sum('slowqueryhistory__ts_cnt'), # 执行总次数 MySQLTotalExecutionTimes=Sum('slowqueryhistory__query_time_sum'), # 执行总时长 ParseTotalRowCounts=Sum('slowqueryhistory__rows_examined_sum'), # 扫描总行数 @@ -894,14 +886,10 @@ def slowquery_review(request): slowsql_obj_count = SlowQuery.objects.filter( slowqueryhistory__hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), slowqueryhistory__ts_min__range=(StartTime, EndTime), - last_seen__range=(StartTime, EndTime) - ).annotate(CreateTime=F('last_seen'), - SQLId=F('checksum'), - DBName=F('slowqueryhistory__db_max'), # 数据库 - SQLText=F('fingerprint'), # SQL语句 - ).values( - 'CreateTime', 'SQLId', 'DBName', 'SQLText' - ).annotate( + ).annotate(SQLText=F('fingerprint'), SQLId=F('checksum')).values('SQLText', 'SQLId').annotate( + CreateTime=Max('slowqueryhistory__ts_max'), + DBName=Max('slowqueryhistory__db_max'), # 数据库 + QueryTimeAvg=Sum('slowqueryhistory__query_time_sum') / Sum('slowqueryhistory__ts_cnt'), # 平均执行时长 MySQLTotalExecutionCounts=Sum('slowqueryhistory__ts_cnt'), # 执行总次数 MySQLTotalExecutionTimes=Sum('slowqueryhistory__query_time_sum'), # 执行总时长 ParseTotalRowCounts=Sum('slowqueryhistory__rows_examined_sum'), # 扫描总行数 @@ -947,17 +935,19 @@ def slowquery_review_history(request): hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), checksum=SQLId, ts_min__range=(StartTime, EndTime) - ).annotate(ExecutionStartTime=F('ts_min'), # 执行开始时间 + ).annotate(ExecutionStartTime=F('ts_min'), # 本次统计(每5分钟一次)该类型sql语句出现的最小时间 DBName=F('db_max'), # 数据库名 HostAddress=Concat(V('\''), 'user_max', V('\''), V('@'), V('\''), 'client_max', V('\'')), # 用户名 SQLText=F('sample'), # SQL语句 - QueryTimes=F('query_time_sum'), # 执行时长(秒) - LockTimes=F('lock_time_sum'), # 锁定时长(秒) - ParseRowCounts=F('rows_examined_sum'), # 解析行数 - ReturnRowCounts=F('rows_sent_sum') # 返回行数 + TotalExecutionCounts=F('ts_cnt'), # 本次统计该sql语句出现的次数 + QueryTimePct95=F('query_time_pct_95'), # 本次统计该sql语句95%耗时 + QueryTimes=F('query_time_sum'), # 本次统计该sql语句花费的总时间(秒) + LockTimes=F('lock_time_sum'), # 本次统计该sql语句锁定总时长(秒) + ParseRowCounts=F('rows_examined_sum'), # 本次统计该sql语句解析总行数 + ReturnRowCounts=F('rows_sent_sum') # 本次统计该sql语句返回总行数 ).values( - 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'QueryTimes', 'LockTimes', 'ParseRowCounts', - 'ReturnRowCounts' + 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'TotalExecutionCounts', 'QueryTimePct95', + 'QueryTimes', 'LockTimes', 'ParseRowCounts', 'ReturnRowCounts' )[offset:limit] slowsql_obj_count = SlowQueryHistory.objects.filter( @@ -972,18 +962,20 @@ def slowquery_review_history(request): hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), db_max=DBName, ts_min__range=(StartTime, EndTime) - ).annotate(ExecutionStartTime=F('ts_min'), # 执行开始时间 + ).annotate(ExecutionStartTime=F('ts_min'), # 本次统计(每5分钟一次)该类型sql语句出现的最小时间 DBName=F('db_max'), # 数据库名 - HostAddress=Concat(V('\''), 'user_max', V('\''), V('@'), V('\''), 'client_max', V('\'')), # 用户名 + HostAddress=Concat(V('\''), 'user_max', V('\''), V('@'), V('\''), 'client_max', V('\'')), + # 用户名 SQLText=F('sample'), # SQL语句 - QueryTimes=F('query_time_sum'), # 执行时长(秒) - LockTimes=F('lock_time_sum'), # 锁定时长(秒) - ParseRowCounts=F('rows_examined_sum'), # 解析行数 - ReturnRowCounts=F('rows_sent_sum') # 返回行数 + TotalExecutionCounts=F('ts_cnt'), # 本次统计该sql语句出现的次数 + QueryTimePct95=F('query_time_pct_95'), # 本次统计该sql语句出现的次数 + QueryTimes=F('query_time_sum'), # 本次统计该sql语句花费的总时间(秒) + LockTimes=F('lock_time_sum'), # 本次统计该sql语句锁定总时长(秒) + ParseRowCounts=F('rows_examined_sum'), # 本次统计该sql语句解析总行数 + ReturnRowCounts=F('rows_sent_sum') # 本次统计该sql语句返回总行数 ).values( - 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'QueryTimes', 'LockTimes', - 'ParseRowCounts', - 'ReturnRowCounts' + 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'TotalExecutionCounts', 'QueryTimePct95', + 'QueryTimes', 'LockTimes', 'ParseRowCounts', 'ReturnRowCounts' )[offset:limit] # 执行总次数倒序排列 slowsql_obj_count = SlowQueryHistory.objects.filter( @@ -996,18 +988,20 @@ def slowquery_review_history(request): slowsql_record_obj = SlowQueryHistory.objects.filter( hostname_max=(cluster_info.master_host + ':' + str(cluster_info.master_port)), ts_min__range=(StartTime, EndTime) - ).annotate(ExecutionStartTime=F('ts_min'), # 执行开始时间 + ).annotate(ExecutionStartTime=F('ts_min'), # 本次统计(每5分钟一次)该类型sql语句出现的最小时间 DBName=F('db_max'), # 数据库名 - HostAddress=F('user_max'), # 用户名 + HostAddress=Concat(V('\''), 'user_max', V('\''), V('@'), V('\''), 'client_max', V('\'')), + # 用户名 SQLText=F('sample'), # SQL语句 - QueryTimes=F('query_time_sum'), # 执行时长(秒) - LockTimes=F('lock_time_sum'), # 锁定时长(秒) - ParseRowCounts=F('rows_examined_sum'), # 解析行数 - ReturnRowCounts=F('rows_sent_sum') # 返回行数 + TotalExecutionCounts=F('ts_cnt'), # 本次统计该sql语句出现的次数 + QueryTimePct95=F('query_time_pct_95'), # 本次统计该sql语句95%耗时 + QueryTimes=F('query_time_sum'), # 本次统计该sql语句花费的总时间(秒) + LockTimes=F('lock_time_sum'), # 本次统计该sql语句锁定总时长(秒) + ParseRowCounts=F('rows_examined_sum'), # 本次统计该sql语句解析总行数 + ReturnRowCounts=F('rows_sent_sum') # 本次统计该sql语句返回总行数 ).values( - 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'QueryTimes', 'LockTimes', - 'ParseRowCounts', - 'ReturnRowCounts' + 'ExecutionStartTime', 'DBName', 'HostAddress', 'SQLText', 'TotalExecutionCounts', 'QueryTimePct95', + 'QueryTimes', 'LockTimes', 'ParseRowCounts', 'ReturnRowCounts' )[offset:limit] # 执行总次数倒序排列 slowsql_obj_count = SlowQueryHistory.objects.filter( diff --git a/sql/static/slowquery.html b/sql/static/slowquery.html index 79ac6388..665e6f73 100644 --- a/sql/static/slowquery.html +++ b/sql/static/slowquery.html @@ -323,8 +323,8 @@ } }, formatter: function (value, row, index) { - if (value.length > 80) { - var sql = value.substr(0, 808) + '...'; + if (value.length > 100) { + var sql = value.substr(0, 100) + '...'; return sql; } else { @@ -345,6 +345,9 @@ field: 'ReturnTotalRowCounts' }], locale: 'zh-CN', + onLoadError: function () { + alert("数据加载失败!请检查接口返回信息和错误日志!"); + }, responseHandler: function (res) { //在ajax获取到数据,渲染表格之前,修改数据源 return res; @@ -435,8 +438,8 @@ title: 'SQL语句', field: 'SQLText', formatter: function (value, row, index) { - if (value.length > 80) { - var sql = value.substr(0, 80) + '...'; + if (value.length > 100) { + var sql = value.substr(0, 100) + '...'; return sql; } else { @@ -444,21 +447,30 @@ } } }, { - title: '执行时长(秒)', + title: '执行总次数', + field: 'TotalExecutionCounts' + }, { + title: '执行时长(95%)', + field: 'QueryTimePct95' + }, { + title: '执行总时长(秒)', field: 'QueryTimes' }, { - title: '锁定时长(秒)', + title: '锁定总时长(秒)', field: 'LockTimes' }, { - title: '解析行数', + title: '解析总行数', field: 'ParseRowCounts' }, { - title: '返回行数', + title: '返回总行数', field: 'ReturnRowCounts' }], singleSelect: true, toolbar: "#sqladvisor-toolbar", //指明自定义的toolbar locale: 'zh-CN', + onLoadError: function () { + alert("数据加载失败!请检查接口返回信息和错误日志!"); + }, responseHandler: function (res) { //在ajax获取到数据,渲染表格之前,修改数据源 return res; From 161b12f65e845c9286e631f63971e27857b7622e Mon Sep 17 00:00:00 2001 From: hhyo Date: Tue, 27 Nov 2018 21:19:08 +0800 Subject: [PATCH 2/5] add --timeout 600 --- src/docker/startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docker/startup.sh b/src/docker/startup.sh index abe5b7da..2d70dc74 100755 --- a/src/docker/startup.sh +++ b/src/docker/startup.sh @@ -14,4 +14,4 @@ settings=${1:-"archer.settings"} ip=${2:-"127.0.0.1"} port=${3:-8888} -gunicorn -w 2 --env DJANGO_SETTINGS_MODULE=${settings} --error-logfile=/tmp/archer.err -b ${ip}:${port} --daemon archer.wsgi:application +gunicorn -w 2 --env DJANGO_SETTINGS_MODULE=${settings} --error-logfile=/tmp/archer.err -b ${ip}:${port} --timeout 600 archer.wsgi:application From cea4ecc09a5edcd71054beb899615dcbb94b46cc Mon Sep 17 00:00:00 2001 From: hhyo Date: Tue, 27 Nov 2018 21:42:48 +0800 Subject: [PATCH 3/5] fix #95 --- sql/data_masking.py | 198 ++++++++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 90 deletions(-) diff --git a/sql/data_masking.py b/sql/data_masking.py index 7dec2149..2bc17650 100644 --- a/sql/data_masking.py +++ b/sql/data_masking.py @@ -21,18 +21,19 @@ def data_masking(self, cluster_name, db_name, sql, sql_result): if print_info is None: result['status'] = 1 - result['msg'] = 'inception返回的结果集为空!可能是SQL语句有语法错误' + result['msg'] = 'inception返回的结果集为空!可能是SQL语句有语法错误,无法完成脱敏校验,如果需要继续查询请关闭校验' elif print_info['errlevel'] != 0: result['status'] = 2 - result['msg'] = 'inception返回异常:\n' + print_info['errmsg'] + result['msg'] = 'inception返回异常,无法完成脱敏校验,如果需要继续查询请关闭校验:\n' + print_info['errmsg'] else: query_tree = print_info['query_tree'] - # 获取集群所属环境,获取命中脱敏规则的列数据 + # 获取命中脱敏规则的列数据 try: table_hit_columns, hit_columns = self.analy_query_tree(query_tree, cluster_name) except Exception as msg: result['status'] = 2 - result['msg'] = 'inception语法树解析表信息出错:{}\nquery_tree:{}'.format(str(msg), print_info) + result['msg'] = '解析inception语法树获取表信息出错,无法完成脱敏校验,如果需要继续查询请关闭校验:{}\nquery_tree:{}'.format(str(msg), + print_info) return result # 存在select * 的查询,遍历column_list,获取命中列的index,添加到hit_columns @@ -81,7 +82,8 @@ def query_tree(self, sqlContent, cluster_name, dbName): errlevel = 2 errmsg = 'Global environment: ' + query_tree if errlevel == 0: - print(json.dumps(json.loads(query_tree), indent=4, sort_keys=False, ensure_ascii=False)) + pass + # print(json.dumps(json.loads(query_tree), indent=4, sort_keys=False, ensure_ascii=False)) return {'id': id, 'statement': statement, 'errlevel': errlevel, 'query_tree': query_tree, 'errmsg': errmsg} else: @@ -99,10 +101,10 @@ def query_table_ref(self, sqlContent, cluster_name, dbName): if print_info is None: result['status'] = 1 - result['msg'] = 'inception返回的结果集为空!可能是SQL语句有语法错误' + result['msg'] = 'inception返回的结果集为空!可能是SQL语句有语法错误,无法校验表权限,如果需要继续查询请关闭校验' elif print_info['errlevel'] != 0: result['status'] = 2 - result['msg'] = 'inception返回异常:\n' + print_info['errmsg'] + result['msg'] = 'inception返回异常,无法校验表权限,如果需要继续查询请关闭校验:\n' + print_info['errmsg'] else: try: table_ref = json.loads(print_info['query_tree'])['table_ref'] @@ -118,7 +120,8 @@ def query_table_ref(self, sqlContent, cluster_name, dbName): table_ref = json.loads(query_tree_str)['table_ref'] except Exception as msg: result['status'] = 2 - result['msg'] = 'inception语法树解析表信息出错:{}\nquery_tree:{}'.format(str(msg), print_info) + result['msg'] = '通过inception语法树解析表信息出错,无法校验表权限,如果需要继续查询请关闭校验:{}\nquery_tree:{}'.format(str(msg), + print_info) table_ref = '' result['data'] = table_ref return result @@ -140,97 +143,112 @@ def analy_query_tree(self, query_tree, cluster_name): # 获取全部脱敏字段信息,减少循环查询,提升效率 DataMaskingColumnsOb = DataMaskingColumns.objects.all() - # 遍历select_list - columns = [] - hit_columns = [] # 命中列 - table_hit_columns = [] # 涉及表命中的列 - - # 判断是否存在不支持脱敏的语法 - for select_item in select_list: - if select_item['type'] not in ('FIELD_ITEM', 'aggregate'): - raise Exception('不支持该查询语句脱敏!') - if select_item['type'] == 'aggregate': - if select_item['aggregate'].get('type') != 'FIELD_ITEM': + # 判断语句涉及的表是否存在脱敏字段配置 + is_exist = False + for table in table_ref: + if DataMaskingColumnsOb.filter(cluster_name=cluster_name, + table_schema=table['db'], + table_name=table['table'], + active=1).exists(): + is_exist = True + # 不存在脱敏字段则直接跳过规则解析 + if is_exist: + # 遍历select_list + columns = [] + hit_columns = [] # 命中列 + table_hit_columns = [] # 涉及表命中的列,仅select *需要 + + # 判断是否存在不支持脱敏的语法 + for select_item in select_list: + if select_item['type'] not in ('FIELD_ITEM', 'aggregate'): raise Exception('不支持该查询语句脱敏!') + if select_item['type'] == 'aggregate': + if select_item['aggregate'].get('type') not in ('FIELD_ITEM', 'INT_ITEM'): + raise Exception('不支持该查询语句脱敏!') + + # 获取select信息的规则,仅处理type为FIELD_ITEM和aggregate类型的select信息,如[*],[*,column_a],[column_a,*],[column_a,a.*,column_b],[a.*,column_a,b.*], + select_index = [ + select_item['field'] if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate'].get('field') for + select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] + + # 处理select_list,为统一的{'type': 'FIELD_ITEM', 'db': 'archer_master', 'table': 'sql_users', 'field': 'email'}格式 + select_list = [select_item if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate'] for + select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] + + if select_index: + # 如果发现存在field='*',则遍历所有表,找出所有的命中字段 + if '*' in select_index: + # 涉及表命中的列 + for table in table_ref: + hit_columns_info = self.hit_table(DataMaskingColumnsOb, cluster_name, table['db'], + table['table']) + table_hit_columns.extend(hit_columns_info) + # 几种不同查询格式 + # [*] + if re.match(r"^(\*,?)+$", ','.join(select_index)): + hit_columns = [] + # [*,column_a] + elif re.match(r"^(\*,)+(\w,?)+$", ','.join(select_index)): + # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取后切片 + for index, item in enumerate(select_list): + item['index'] = index - len(select_list) + if item.get('field') != '*': + columns.append(item) - # 获取select信息的规则,仅处理type为FIELD_ITEM和aggregate类型的select信息,如[*],[*,column_a],[column_a,*],[column_a,a.*,column_b],[a.*,column_a,b.*], - select_index = [ - select_item['field'] if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate'].get('field') for - select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] - - # 处理select_list,为统一的{'type': 'FIELD_ITEM', 'db': 'archer_master', 'table': 'sql_users', 'field': 'email'}格式 - select_list = [select_item if select_item['type'] == 'FIELD_ITEM' else select_item['aggregate'] for - select_item in select_list if select_item['type'] in ('FIELD_ITEM', 'aggregate')] - - if select_index: - # 如果发现存在field='*',则遍历所有表,找出所有的命中字段 - if '*' in select_index: - for table in table_ref: - hit_columns_info = self.hit_table(DataMaskingColumnsOb, cluster_name, table['db'], - table['table']) - table_hit_columns.extend(hit_columns_info) - # [*] - if re.match(r"^(\*,?)+$", ','.join(select_index)): - hit_columns = [] - # [*,column_a] - elif re.match(r"^(\*,)+(\w,?)+$", ','.join(select_index)): - # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取后切片 - for index, item in enumerate(select_list): - item['index'] = index - len(select_list) - if item.get('field') != '*': - columns.append(item) + # [column_a, *] + elif re.match(r"^(\w,?)+(\*,?)+$", ','.join(select_index)): + # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取前切片 + for index, item in enumerate(select_list): + item['index'] = index + if item.get('field') != '*': + columns.append(item) - # [column_a, *] - elif re.match(r"^(\w,?)+(\*,?)+$", ','.join(select_index)): - # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,index采取前切片 - for index, item in enumerate(select_list): - item['index'] = index - if item.get('field') != '*': - columns.append(item) + # [column_a,a.*,column_b] + elif re.match(r"^(\w,?)+(\*,?)+(\w,?)+$", ','.join(select_index)): + # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,*前面的字段index采取前切片,*后面的字段采取后切片 + for index, item in enumerate(select_list): + item['index'] = index + if item.get('field') == '*': + first_idx = index + break - # [column_a,a.*,column_b] - elif re.match(r"^(\w,?)+(\*,?)+(\w,?)+$", ','.join(select_index)): - # 找出field不为* 的列信息, 循环判断列是否命中脱敏规则,并增加规则类型和index,*前面的字段index采取前切片,*后面的字段采取后切片 - for index, item in enumerate(select_list): - item['index'] = index - if item.get('field') == '*': - first_idx = index - break + select_list.reverse() + for index, item in enumerate(select_list): + item['index'] = index + if item.get('field') == '*': + last_idx = len(select_list) - index - 1 + break - select_list.reverse() - for index, item in enumerate(select_list): - item['index'] = index - if item.get('field') == '*': - last_idx = len(select_list) - index - 1 - break + select_list.reverse() + for index, item in enumerate(select_list): + if item.get('field') != '*' and index < first_idx: + item['index'] = index - select_list.reverse() - for index, item in enumerate(select_list): - if item.get('field') != '*' and index < first_idx: - item['index'] = index + if item.get('field') != '*' and index > last_idx: + item['index'] = index - len(select_list) + columns.append(item) - if item.get('field') != '*' and index > last_idx: - item['index'] = index - len(select_list) - columns.append(item) + # [a.*, column_a, b.*] + else: + raise Exception('不支持select信息为[a.*, column_a, b.*]格式的查询脱敏!') - # [a.*, column_a, b.*] + # 没有*的查询,直接遍历查询命中字段,query_tree的列index就是查询语句列的index else: - raise Exception('不支持select信息为[a.*, column_a, b.*]格式的查询脱敏!') - - # 没有*的查询,直接遍历查询命中字段,query_tree的列index就是查询语句列的index - else: - for index, item in enumerate(select_list): - item['index'] = index - if item.get('field') != '*': - columns.append(item) - - # 格式化命中的列信息 - for column in columns: - hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column.get('db'), column.get('table'), - column.get('field')) - if hit_info['is_hit']: - hit_info['index'] = column['index'] - hit_columns.append(hit_info) + for index, item in enumerate(select_list): + item['index'] = index + if item.get('field') != '*': + columns.append(item) + + # 格式化命中的列信息 + for column in columns: + hit_info = self.hit_column(DataMaskingColumnsOb, cluster_name, column.get('db'), column.get('table'), + column.get('field')) + if hit_info['is_hit']: + hit_info['index'] = column['index'] + hit_columns.append(hit_info) + else: + table_hit_columns = None + hit_columns = None return table_hit_columns, hit_columns # 判断字段是否命中脱敏规则,如果命中则返回脱敏的规则id和规则类型 From b4d24a4be43b3c2126e47256b517955f088ab4bb Mon Sep 17 00:00:00 2001 From: hhyo Date: Tue, 27 Nov 2018 21:43:43 +0800 Subject: [PATCH 4/5] Update README.md --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 93729b3a..7c225c0a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ * [功能集成](#其他功能集成) * [在线查询&脱敏查询](#在线查询) * [慢日志管理](#慢日志管理) - * [SQL优化工具](#SQLAdvisor优化工具) + * [SQL优化工具](#sqladvisor优化工具) * [阿里云rds管理](#阿里云rds管理) * [Q&A](#部分问题解决办法 ) @@ -184,7 +184,7 @@ ## 系统体验 -[点击体验](http://52.221.195.102:9123/) +[点击体验](http://139.199.0.191:9123/) | 角色 | 账号 | 密码 | | --- | --- | --- | @@ -250,6 +250,9 @@ inception无法连接备份库 - 检查binlog格式,需要为ROW,binlog_row_image为FULL - 检查DML的表是否存在主键 - 检查语句是否有影响数据 +- 检查备份库是否开启autocommit +- 检查是否为连表更新语句 +- 检查执行实例是否为mysql #### 脱敏规则未生效 - 检查脱敏字段是否命中(是否区分大小写) @@ -262,7 +265,7 @@ inception无法连接备份库 | 手机号 | (.{3})(.*)(.{4}) | 2 | 保留前三后四| | 证件号码 | (.*)(.{4})$ | 2 | 隐藏后四位| | 银行卡 | (.*)(.{4})$ | 2 | 隐藏后四位| -| 邮箱 | (.*)@(.*) | 2 | 去除后缀| +| 邮箱 | (.\*)@(.\*) | 2 | 去除后缀| #### 审核人看不到查询权限申请待审核工单 查询权限申请待办列表被隐藏至右上角的消息图标中,当有待审核信息时会显示图标,可以进入查看待办数据 From 196e4fa5c8fa8f48dd08c4e33c6b5bdabe2ea371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E5=9C=88=E5=9C=88?= Date: Tue, 27 Nov 2018 22:28:47 +0800 Subject: [PATCH 5/5] Update Dockerfile --- src/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docker/Dockerfile b/src/docker/Dockerfile index 57c9230a..59aef697 100644 --- a/src/docker/Dockerfile +++ b/src/docker/Dockerfile @@ -8,7 +8,7 @@ RUN yum -y install unzip git gcc gcc-c++ make cmake bison openssl-devel mysql-de && pip3 install virtualenv -i https://mirrors.ustc.edu.cn/pypi/web/simple/ \ && virtualenv venv4archer --python=python3.4 \ && source venv4archer/bin/activate \ - && git clone -b github https://github.com/hhyo/archer.git \ + && git clone https://github.com/hhyo/archer.git \ && pip3 install -r /opt/archer/src/docker/requirements.txt -i https://mirrors.ustc.edu.cn/pypi/web/simple/ \ && cp /opt/archer/src/docker/pymysql/connections.py /opt/venv4archer/lib/python3.4/site-packages/pymysql/ \ && cp /opt/archer/src/docker/pymysql/cursors.py /opt/venv4archer/lib/python3.4/site-packages/pymysql/ \