-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 15788b4
Showing
20 changed files
with
1,004 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
MIT License Copyright (c) 2024 HuangDenan | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is furnished | ||
to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice (including the next | ||
paragraph) shall be included in all copies or substantial portions of the | ||
Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS | ||
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF | ||
OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Multi-Concept Attribute Reduction | ||
|
||
This project is an open-source implementation of the experimental code for the **Multi-Concept Attribute Reduction** paper. | ||
|
||
First, the environment needs to be set up. Use the following command: | ||
|
||
```shell | ||
conda env create -f environment.yml | ||
``` | ||
|
||
Modify the execution permissions of the **.sh** scripts: | ||
|
||
```shell | ||
chmod +x ./attribute_reduction/*.sh | ||
``` | ||
|
||
Finally, run the **.sh** scripts as needed. An example is shown below: | ||
|
||
```shell | ||
nohup ./attribute_reduction/run_DAAR.sh > ./DAAR.log & | ||
nohup ./attribute_reduction/run_heuri.sh > ./heuri.log & | ||
nohup ./attribute_reduction/run_time.sh > ./time.log & | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import os | ||
import time | ||
import json | ||
from Pfunc import * | ||
import pandas as pd | ||
|
||
|
||
def DAAR(concept_idx, return_run_time=False): | ||
table_name = f'concept_{concept_idx}_binary.xlsx' | ||
table_path = os.path.join('/home/huangdn/Attribute_Reduction/data', table_name) | ||
table = pd.read_excel(table_path, index_col=0) | ||
|
||
start_time = time.time() | ||
|
||
funcs = Functional(filter_data(table)) | ||
C = list(table.columns[2:]) | ||
map_index = {value: index for index, value in enumerate(C)} | ||
D = ['subconcept'] | ||
|
||
k = 1 | ||
delta = [dict(), dict()] | ||
# only tuple type can store in set, so ... | ||
# explain delta structure: [{None}, {}, ..., {(), (), ...,(a1, a2,..., ak), ..., ()}, ..., {}]. | ||
for ai in C: | ||
delta[k][tuple([ai])] = funcs.PC(D, [ai]) # Adding {ai} to delta_{size_k} | ||
while len(delta[k]): | ||
# print(f'In level {k}...', end=' ') | ||
# print(f'delta[{k}] = {delta[k]}.') | ||
delta.append(dict()) # init next delta_{k + 1} | ||
for Pi_tuple in delta[k].keys(): | ||
Pi_list = list(Pi_tuple) | ||
max_index = map_index[Pi_list[-1]] | ||
for j in range(max_index + 1, len(C)): | ||
P_list = Pi_list.copy() | ||
P_list.append(C[j]) | ||
P_tuple = tuple(P_list) | ||
# if len(funcs.POS(P_list, D)) > len(funcs.POS(Pi_list, D)) or funcs.PC(D, P_list) > delta[k][Pi_tuple]: | ||
# print(f'Choose the {Pi_tuple} -> {P_tuple}. POS_Pi(D) = {funcs.POS(Pi_list, D)}, POS_P(D) = {funcs.POS(P_list, D)}; PC(D|Pi) = {funcs.PC(D, Pi_list)}, PC(D|P) = {funcs.PC(D, P_list)}.') | ||
delta[k + 1][P_tuple] = funcs.PC(D, P_list) | ||
k += 1 | ||
|
||
_delta = dict() | ||
_delta[tuple(C)] = funcs.PC(D, C) | ||
|
||
for dct in delta: | ||
for key in dct.keys(): | ||
_delta[key] = dct[key] | ||
max_value = max(_delta.values()) | ||
delta = {k: v for k, v in _delta.items() if v == max_value} | ||
filtered_delta = filter_superset_keys(delta) | ||
end_time = time.time() | ||
if return_run_time: | ||
return end_time - start_time | ||
return filtered_delta | ||
|
||
|
||
def execute(save_path): | ||
for idx in [8, 34, 68, 84]: | ||
delta = DAAR(idx) | ||
delta = {str(k): v for k, v in delta.items()} | ||
file_path = os.path.join(save_path, f'DAAR_of_concept{idx}.json') | ||
with open(file_path, 'w') as json_file: | ||
json.dump(delta, json_file, indent=4) # indent参数可以美化输出 | ||
print(f'DAAR_of_concept{idx}.json is Done.') | ||
pass | ||
|
||
|
||
# path_to_result = '/home/huangdn/Attribute_Reduction/result' | ||
# execute(path_to_result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import os | ||
import json | ||
from Pfunc import * | ||
import pandas as pd | ||
from concurrent.futures import ProcessPoolExecutor | ||
|
||
|
||
def DAAR_for_func(concept_idx, func_type: str): | ||
table_name = f'concept_{concept_idx}_binary.xlsx' | ||
table_path = os.path.join('/home/huangdn/Attribute_Reduction/data', table_name) | ||
table = pd.read_excel(table_path, index_col=0) | ||
funcs = Functional(filter_data(table)) | ||
func_map = { | ||
"PC": funcs.PC, | ||
"Phi": funcs.Phi, | ||
"H": funcs.H, | ||
"E": funcs.E, | ||
"K": funcs.K | ||
} | ||
func = func_map[func_type] | ||
C = list(table.columns[2:]) | ||
map_index = {value: index for index, value in enumerate(C)} | ||
D = ['subconcept'] | ||
func_C = func(D, C) | ||
|
||
k = 1 | ||
delta = [dict(), dict()] | ||
_delta = dict() | ||
# only tuple type can store in set, so ... | ||
# explain delta structure: [{None}, {}, ..., {(), (), ...,(a1, a2,..., ak), ..., ()}, ..., {}]. | ||
# print('--------------------------------Start searching--------------------------------') | ||
for ai in C: | ||
delta[k][tuple([ai])] = func(D, [ai]) # Adding {ai} to delta_{size_k} | ||
while len(delta[k]): | ||
# print(f'In {k}th level, max have {_delta}.') | ||
delta.append(dict()) # init next delta_{k + 1} | ||
for Pi_tuple in delta[k].keys(): | ||
Pi_list = list(Pi_tuple) | ||
max_index = map_index[Pi_list[-1]] | ||
for j in range(max_index+1, len(C)): | ||
P_list = Pi_list.copy() | ||
P_list.append(C[j]) | ||
P_tuple = tuple(P_list) | ||
func_P = func(D, P_list) | ||
if func_P == func_C: | ||
_delta[P_tuple] = func_P | ||
# else: | ||
delta[k + 1][P_tuple] = func_P | ||
pass | ||
pass | ||
k += 1 | ||
res_delta = dict() | ||
# print('--------------------------------End searching--------------------------------') | ||
# print(f'Equal max have {_delta}.') | ||
# print('--------------------------------Check search--------------------------------') | ||
for Pi_tuple in _delta.keys(): | ||
# print(f'For Att set {Pi_tuple}, the sub att set situation is: ') | ||
lower_subsets = [Pi_tuple[:i] + Pi_tuple[i+1:] for i in range(len(Pi_tuple))] | ||
func_lower_subsets = {subset: func(D, list(subset)) for subset in lower_subsets} | ||
# print(func_lower_subsets) | ||
all_greater = all(value > func_C for value in func_lower_subsets.values()) | ||
if all_greater: | ||
res_delta[Pi_tuple] = func_C | ||
# print('--------------------------------Finish check search--------------------------------') | ||
# print(f'However, the result max have {res_delta}.') | ||
return res_delta | ||
|
||
|
||
def process_task(concept_idx, func_type, save_path): | ||
delta = DAAR_for_func(concept_idx, func_type) | ||
df = pd.DataFrame({ | ||
"Attribute_Set": [", ".join(key) for key in delta.keys()], | ||
func_type: list(delta.values()) | ||
}) | ||
output_file = os.path.join(save_path, f'daar_funcs_concept_{concept_idx}_{func_type}.xlsx') | ||
df.to_excel(output_file, index=False) | ||
print(f"Saved: {output_file}") | ||
|
||
|
||
def execute_daar_for_others_func(save_path='/home/huangdn/Attribute_Reduction/result', max_workers=16): | ||
concept_list = [8, 34, 68, 84] | ||
funcs_list = ["Phi", "H", "E", "K"] | ||
|
||
# 准备所有任务 | ||
tasks = [(cpt, fc_tp, save_path) for cpt in concept_list for fc_tp in funcs_list] | ||
|
||
# 使用 ProcessPoolExecutor 进行多进程处理 | ||
with ProcessPoolExecutor(max_workers=max_workers) as executor: | ||
futures = [executor.submit(process_task, cpt, fc_tp, save_path) for cpt, fc_tp, save_path in tasks] | ||
|
||
# 等待所有任务完成 | ||
for future in futures: | ||
future.result() # 如果需要捕获异常,可以在这里处理 | ||
|
||
print("All tasks completed.") | ||
|
||
|
||
execute_daar_for_others_func() | ||
# DAAR_for_func(68, 'K') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import os | ||
import time | ||
import random | ||
from Pfunc import * | ||
import pandas as pd | ||
|
||
|
||
def heuri(concept_idx, search_times=500, optimization_unit=100, return_run_time=False): | ||
table_name = f'concept_{concept_idx}_binary.xlsx' | ||
table_path = os.path.join('./data', table_name) | ||
table = pd.read_excel(table_path, index_col=0) | ||
|
||
start_time = time.time() | ||
|
||
funcs = Functional(filter_data(table)) | ||
C = set(table.columns[2:]) | ||
D = ['subconcept'] | ||
delta = { | ||
k: funcs.PC(D, list(k)) | ||
for k in set([ | ||
tuple(sorted(random.sample(list(C), golden_ratio(len(C))))) | ||
for rp in range(10) | ||
]) | ||
} # init 10 attribute set. | ||
_ = delta.copy() # for check init state. | ||
for t in range(1, search_times+1): | ||
if t % optimization_unit == 0: | ||
max_pc = max(delta.values()) | ||
att_tus = [key for key, value in delta.items() if value == max_pc] | ||
else: | ||
att_tus = [random.choice(list(delta.keys()))] | ||
for att_tu in att_tus: | ||
att_set = set(att_tu) | ||
s = C - att_set | ||
# expand element. | ||
for col in s: | ||
expand = tuple(sorted(att_set | {col})) | ||
if expand in delta.keys(): | ||
continue | ||
delta[expand] = funcs.PC(D, list(expand)) | ||
# shrink element | ||
for col in att_set: | ||
shrink = tuple(sorted(att_set - {col})) | ||
if len(shrink) == 0 or shrink in delta.keys(): | ||
continue | ||
delta[shrink] = funcs.PC(D, list(shrink)) | ||
# filter PC value == max{PC}. | ||
max_pc = max(delta.values()) | ||
delta = {k: v for k, v in delta.items() if v == max_pc} | ||
filtered_delta = filter_superset_keys(delta) | ||
end_time = time.time() | ||
if return_run_time: | ||
return end_time - start_time | ||
return _, filtered_delta | ||
|
||
|
||
# def heuri(concept_idx, search_times=500): | ||
# table_name = f'concept_{concept_idx}_binary.xlsx' | ||
# table_path = os.path.join('./data', table_name) | ||
# table = pd.read_excel(table_path, index_col=0) | ||
# funcs = Functional(filter_data(table)) | ||
# C = set(table.columns[2:]) | ||
# D = ['subconcept'] | ||
# delta = { | ||
# k: funcs.PC(D, list(k)) | ||
# for k in set([ | ||
# tuple(sorted(random.sample(list(C), golden_ratio(len(C))))) | ||
# for rp in range(10) | ||
# ]) | ||
# } # init 10 attribute set. | ||
# _ = delta.copy() # for check init state. | ||
# for t in range(search_times): | ||
# att_tu = random.choice(list(delta.keys())) | ||
# att_set = set(att_tu) | ||
# s = C - att_set | ||
# # expand element. | ||
# for col in s: | ||
# expand = tuple(sorted(att_set | {col})) | ||
# if expand in delta.keys(): | ||
# continue | ||
# delta[expand] = funcs.PC(D, list(expand)) | ||
# # shrink element | ||
# for col in att_set: | ||
# shrink = tuple(sorted(att_set - {col})) | ||
# if len(shrink) == 0 or shrink in delta.keys(): | ||
# continue | ||
# delta[shrink] = funcs.PC(D, list(shrink)) | ||
# # filter PC value == max{PC}. | ||
# max_pc = max(delta.values()) | ||
# delta = {k: v for k, v in delta.items() if v == max_pc} | ||
# return _, filter_superset_keys(delta) | ||
|
||
# delta = {} | ||
# for col in C: | ||
# delta[tuple([col])] = funcs.PC(D, [col]) | ||
# for t in range(search_times): | ||
# length = len(delta) | ||
# key_list = get_top_n_keys(delta, length, False) | ||
# div = init_div_list(length, n) | ||
# choice_list = init_choice_list(div, key_list, n) | ||
# random_tuple = random.choice(choice_list) | ||
# s = C - set(random_tuple) | ||
# for col in s: | ||
# li = list(random_tuple) | ||
# li.append(col) | ||
# li = sorted(li) | ||
# tl = tuple(li) | ||
# if tl in delta.keys(): | ||
# continue | ||
# delta[tl] = funcs.PC(D, li) | ||
# pass | ||
|
||
# max_value = max(delta.values()) | ||
# # 筛选出所有与最大值相等的 key, value 对,并保存为字典 | ||
# delta = {k: v for k, v in delta.items() if v == max_value} | ||
# return filter_superset_keys(delta) | ||
|
||
|
||
# print(heuri(84, 10)[-1]) | ||
|
||
|
||
# def execute(save_path='/home/huangdn/Attribute_Reduction/result'): | ||
# _, delta = heuri(-1, search_times=3000, optimization_unit=10) | ||
# delta = {str(k): v for k, v in delta.items()} | ||
# file_path = os.path.join(save_path, 'merge_reduction.json') | ||
# with open(file_path, 'w') as json_file: | ||
# json.dump(delta, json_file, indent=4) # indent参数可以美化输出 | ||
# print('merge_reduction.json is Done.') | ||
# pass | ||
|
||
|
||
# execute() | ||
|
Oops, something went wrong.