forked from sth1997/GraphSet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrequent_subgraph_mining.py
91 lines (77 loc) · 2.88 KB
/
frequent_subgraph_mining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from settings import *
from utils import *
fsm_graphs = ['mico', 'patents', 'youtube']
fsm_pairs = [
[ # mico
(2, [100, 300, 500]),
(3, [10000, 13000, 15000]),
],
[ # patents
(2, [5000, 10000, 15000]),
(3, [10000, 15000, 20000])
],
[ # youtube
(2, [1000, 3000, 5000]),
(3, [1000, 3000, 5000])
]
]
def frequent_subgraph_mining(
is_gpu: bool = False,
bin_path: str = "../build/bin",
log_path: str = "../reproduce_log/frequent_subgraph_mining",
result_path: str = "../reproduce_result",
):
log_path = log_path + ("_gpu" if is_gpu else "_cpu")
if not os.path.exists(log_path):
os.makedirs(log_path)
result_path = result_path
if not os.path.exists(result_path):
os.makedirs(result_path)
result_path = (
f"{result_path}/frequent_subgraph_mining"
+ ("_gpu" if is_gpu else "_cpu")
+ ".csv"
)
execute_name = "gpu_fsm" if is_gpu else "fsm_test"
data = []
data.append(["size-support"] + fsm_graphs)
size_support_list = []
for graph_index in range(len(fsm_graphs)):
graph = fsm_graphs[graph_index]
for fsm_data in fsm_pairs[graph_index]:
size = fsm_data[0]
for support in fsm_data[1]:
size_support_list.append(f"{size}-{support}")
size_support_list = list(set(size_support_list))
size_support_list.sort()
tmp_data = [
[size_support] + [0] * (len(data[0]) - 1) for size_support in size_support_list
]
for graph_index in range(len(fsm_graphs)):
graph = fsm_graphs[graph_index]
for fsm_data in fsm_pairs[graph_index]:
size = fsm_data[0]
for support in fsm_data[1]:
tmp_execute_name = execute_name
if graph == "mico" and size == 3:
tmp_execute_name = "gpu_new_fsm" if is_gpu else "fsm_vertex_test"
log_name = f"{log_path}/{graph}_fsm{size}+{support}.log"
command = f"{bin_path}/{tmp_execute_name} {DATA_PATH}/{graph}.adj {size} {support} 1>{log_name}"
print(command, flush=True)
result = os.system(COMMAND_PREFIX + command)
tmp_data[size_support_list.index(f"{size}-{support}")][
data[0].index(graph)
] = read_time_cost(log_name)
if result != 0:
return 1
data += tmp_data
write_table(data, result_path)
return 0
if __name__ == "__main__":
print("Reproducing frequent subgraph mining results. (Time: 7 hours)")
print("Reproducing frequent subgraph mining results on CPU...")
assert frequent_subgraph_mining(is_gpu=False) == 0
print("Reproducing frequent subgraph mining results on GPU...")
assert frequent_subgraph_mining(is_gpu=True) == 0
print("Reproducing frequent subgraph mining results done.")