-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlog.py
130 lines (108 loc) · 3.53 KB
/
log.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# encoding: utf-8
import mxml
import os
import random
individual_logs_dir = r'./individual_logs'
mixed_logs_dir = r'./mixed_logs'
loan_log_dir = os.path.join(individual_logs_dir, 'loan')
def extract_loan_logs(name, dataset_path):
"""
从论文 QUT 2016 - Fast and Accurate Business Process Drift Detection 的数据集中提取出每个模型独立的日志
"""
if name == 'original':
filepath = os.path.join(dataset_path, r'cb\cb5k.mxml')
begin, end = 0, 500
else:
filepath = os.path.join(dataset_path, r'%s\%s5k.mxml' % (name, name))
begin, end = 500, 1000
log = mxml.parse(filepath)
all_traces = set()
for i in xrange(begin, end):
all_traces.add(','.join(log['traces'][i]))
dest_file = os.path.join(loan_log_dir, '%s.txt' % name)
with open(dest_file, 'w') as f:
for trace in all_traces:
f.write(trace)
f.write('\n')
print 'write model: %s length: %d' % (name, len(all_traces))
def _copy_log(log):
new_log = [None] * len(log)
for i in xrange(len(log)):
new_log[i] = log[i][:]
return new_log
def mix(logs, config):
"""
混合不同模型产生的日志,logs 是不同模型的集合,config 是混合的配置
log = {
'model-name': [[a,b c],...]
}
config = [{
'name': name of model, like '
'length': length of model
}]
"""
mixed_traces = []
for item in config:
log = logs[item['name']]
needed_log_length = item['length']
# 当需要的 trace 数量大于日志大小时,保证日志的完整性
if needed_log_length >= len(log):
copy_of_log = _copy_log(log)
random.shuffle(copy_of_log)
mixed_traces.extend(copy_of_log)
needed_log_length -= len(log)
for i in xrange(needed_log_length):
trace = random.choice(log)
mixed_traces.append(trace[:])
return mixed_traces
def read_individual_log(dataset):
models = []
logs = {}
for filename in os.listdir(os.path.join(individual_logs_dir, dataset)):
model, _ = os.path.splitext(filename)
models.append(model)
logs[model] = []
for line in open(os.path.join(individual_logs_dir, dataset, filename), 'r'):
line = line.strip()
if line:
logs[model].append(line.split(','))
return logs
def random_mix(logs, num, min_n, max_n=None):
config = []
models = logs.keys()
last_model = None
for i in xrange(num):
while True:
# 保证相邻的模型不相同
model = random.choice(models)
if model != last_model:
last_model = model
break
if max_n is None:
length = random.randrange(max(min_n, len(logs[model])), int(max(min_n, len(logs[model])) * 1.5))
else:
length = random.randrange(min_n, max_n)
config.append({
'name': model,
'length': length
})
mixed_traces = mix(logs, config)
return mixed_traces, config
def label_of_config(config):
label = [0]
for item in config:
label.append(label[-1] + item['length'])
return label
def name_of_config(config):
name = ''
trace_sum = 0
for item in config:
s = item['name'].lstrip('model_')
if not s:
s = 'origin'
name += "[%d %s]-" % (trace_sum, s)
trace_sum += item['length']
name += '[%d end]' % trace_sum
return name
if __name__ == '__main__':
pass