Initial commit

CarlCypress · Dec 12, 2024 · 15788b4 · 15788b4
commit 15788b4
Show file tree

Hide file tree

Showing 20 changed files with 1,004 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,19 @@
+MIT License Copyright (c) 2024 HuangDenan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,24 @@
+# Multi-Concept Attribute Reduction
+
+This project is an open-source implementation of the experimental code for the **Multi-Concept Attribute Reduction** paper.
+
+First, the environment needs to be set up. Use the following command:
+
+```shell
+conda env create -f environment.yml
+```
+
+Modify the execution permissions of the **.sh** scripts:
+
+```shell
+chmod +x ./attribute_reduction/*.sh
+```
+
+Finally, run the **.sh** scripts as needed. An example is shown below:
+
+```shell
+nohup ./attribute_reduction/run_DAAR.sh > ./DAAR.log &
+nohup ./attribute_reduction/run_heuri.sh > ./heuri.log &
+nohup ./attribute_reduction/run_time.sh > ./time.log &
+```
+
diff --git a/attribute_reduction/DAAR.py b/attribute_reduction/DAAR.py
@@ -0,0 +1,69 @@
+import os
+import time
+import json
+from Pfunc import *
+import pandas as pd
+
+
+def DAAR(concept_idx, return_run_time=False):
+    table_name = f'concept_{concept_idx}_binary.xlsx'
+    table_path = os.path.join('/home/huangdn/Attribute_Reduction/data', table_name)
+    table = pd.read_excel(table_path, index_col=0)
+
+    start_time = time.time()
+
+    funcs = Functional(filter_data(table))
+    C = list(table.columns[2:])
+    map_index = {value: index for index, value in enumerate(C)}
+    D = ['subconcept']
+
+    k = 1
+    delta = [dict(), dict()]
+    # only tuple type can store in set, so ...
+    # explain delta structure: [{None}, {}, ..., {(), (), ...,(a1, a2,..., ak), ..., ()}, ..., {}].
+    for ai in C:
+        delta[k][tuple([ai])] = funcs.PC(D, [ai])  # Adding {ai} to delta_{size_k}
+    while len(delta[k]):
+        # print(f'In level {k}...', end=' ')
+        # print(f'delta[{k}] = {delta[k]}.')
+        delta.append(dict())  # init next delta_{k + 1}
+        for Pi_tuple in delta[k].keys():
+            Pi_list = list(Pi_tuple)
+            max_index = map_index[Pi_list[-1]]
+            for j in range(max_index + 1, len(C)):
+                P_list = Pi_list.copy()
+                P_list.append(C[j])
+                P_tuple = tuple(P_list)
+                # if len(funcs.POS(P_list, D)) > len(funcs.POS(Pi_list, D)) or funcs.PC(D, P_list) > delta[k][Pi_tuple]:
+                    # print(f'Choose the {Pi_tuple} -> {P_tuple}. POS_Pi(D) = {funcs.POS(Pi_list, D)}, POS_P(D) = {funcs.POS(P_list, D)}; PC(D|Pi) = {funcs.PC(D, Pi_list)}, PC(D|P) = {funcs.PC(D, P_list)}.')
+                delta[k + 1][P_tuple] = funcs.PC(D, P_list)
+        k += 1
+
+    _delta = dict()
+    _delta[tuple(C)] = funcs.PC(D, C)
+
+    for dct in delta:
+        for key in dct.keys():
+            _delta[key] = dct[key]
+    max_value = max(_delta.values())
+    delta = {k: v for k, v in _delta.items() if v == max_value}
+    filtered_delta = filter_superset_keys(delta)
+    end_time = time.time()
+    if return_run_time:
+        return end_time - start_time
+    return filtered_delta
+
+
+def execute(save_path):
+    for idx in [8, 34, 68, 84]:
+        delta = DAAR(idx)
+        delta = {str(k): v for k, v in delta.items()}
+        file_path = os.path.join(save_path, f'DAAR_of_concept{idx}.json')
+        with open(file_path, 'w') as json_file:
+            json.dump(delta, json_file, indent=4)  # indent参数可以美化输出
+        print(f'DAAR_of_concept{idx}.json is Done.')
+    pass
+
+
+# path_to_result = '/home/huangdn/Attribute_Reduction/result'
+# execute(path_to_result)
diff --git a/attribute_reduction/DAAR_for_custom.py b/attribute_reduction/DAAR_for_custom.py
@@ -0,0 +1,99 @@
+import os
+import json
+from Pfunc import *
+import pandas as pd
+from concurrent.futures import ProcessPoolExecutor
+
+
+def DAAR_for_func(concept_idx, func_type: str):
+    table_name = f'concept_{concept_idx}_binary.xlsx'
+    table_path = os.path.join('/home/huangdn/Attribute_Reduction/data', table_name)
+    table = pd.read_excel(table_path, index_col=0)
+    funcs = Functional(filter_data(table))
+    func_map = {
+        "PC": funcs.PC,
+        "Phi": funcs.Phi,
+        "H": funcs.H,
+        "E": funcs.E,
+        "K": funcs.K
+    }
+    func = func_map[func_type]
+    C = list(table.columns[2:])
+    map_index = {value: index for index, value in enumerate(C)}
+    D = ['subconcept']
+    func_C = func(D, C)
+
+    k = 1
+    delta = [dict(), dict()]
+    _delta = dict()
+    # only tuple type can store in set, so ...
+    # explain delta structure: [{None}, {}, ..., {(), (), ...,(a1, a2,..., ak), ..., ()}, ..., {}].
+    # print('--------------------------------Start searching--------------------------------')
+    for ai in C:
+        delta[k][tuple([ai])] = func(D, [ai])  # Adding {ai} to delta_{size_k}
+    while len(delta[k]):
+        # print(f'In {k}th level, max have {_delta}.')
+        delta.append(dict())  # init next delta_{k + 1}
+        for Pi_tuple in delta[k].keys():
+            Pi_list = list(Pi_tuple)
+            max_index = map_index[Pi_list[-1]]
+            for j in range(max_index+1, len(C)):
+                P_list = Pi_list.copy()
+                P_list.append(C[j])
+                P_tuple = tuple(P_list)
+                func_P = func(D, P_list)
+                if func_P == func_C:
+                    _delta[P_tuple] = func_P
+                # else:
+                delta[k + 1][P_tuple] = func_P
+                pass
+            pass
+        k += 1
+    res_delta = dict()
+    # print('--------------------------------End searching--------------------------------')
+    # print(f'Equal max have {_delta}.')
+    # print('--------------------------------Check search--------------------------------')
+    for Pi_tuple in _delta.keys():
+        # print(f'For Att set {Pi_tuple}, the sub att set situation is: ')
+        lower_subsets = [Pi_tuple[:i] + Pi_tuple[i+1:] for i in range(len(Pi_tuple))]
+        func_lower_subsets = {subset: func(D, list(subset)) for subset in lower_subsets}
+        # print(func_lower_subsets)
+        all_greater = all(value > func_C for value in func_lower_subsets.values())
+        if all_greater:
+            res_delta[Pi_tuple] = func_C
+    # print('--------------------------------Finish check search--------------------------------')
+    # print(f'However, the result max have {res_delta}.')
+    return res_delta
+
+
+def process_task(concept_idx, func_type, save_path):
+    delta = DAAR_for_func(concept_idx, func_type)
+    df = pd.DataFrame({
+        "Attribute_Set": [", ".join(key) for key in delta.keys()],
+        func_type: list(delta.values())
+    })
+    output_file = os.path.join(save_path, f'daar_funcs_concept_{concept_idx}_{func_type}.xlsx')
+    df.to_excel(output_file, index=False)
+    print(f"Saved: {output_file}")
+
+
+def execute_daar_for_others_func(save_path='/home/huangdn/Attribute_Reduction/result', max_workers=16):
+    concept_list = [8, 34, 68, 84]
+    funcs_list = ["Phi", "H", "E", "K"]
+
+    # 准备所有任务
+    tasks = [(cpt, fc_tp, save_path) for cpt in concept_list for fc_tp in funcs_list]
+
+    # 使用 ProcessPoolExecutor 进行多进程处理
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(process_task, cpt, fc_tp, save_path) for cpt, fc_tp, save_path in tasks]
+
+        # 等待所有任务完成
+        for future in futures:
+            future.result()  # 如果需要捕获异常，可以在这里处理
+
+    print("All tasks completed.")
+
+
+execute_daar_for_others_func()
+# DAAR_for_func(68, 'K')
diff --git a/attribute_reduction/Heuri.py b/attribute_reduction/Heuri.py
@@ -0,0 +1,133 @@
+import os
+import time
+import random
+from Pfunc import *
+import pandas as pd
+
+
+def heuri(concept_idx, search_times=500, optimization_unit=100, return_run_time=False):
+    table_name = f'concept_{concept_idx}_binary.xlsx'
+    table_path = os.path.join('./data', table_name)
+    table = pd.read_excel(table_path, index_col=0)
+
+    start_time = time.time()
+
+    funcs = Functional(filter_data(table))
+    C = set(table.columns[2:])
+    D = ['subconcept']
+    delta = {
+        k: funcs.PC(D, list(k))
+        for k in set([
+            tuple(sorted(random.sample(list(C), golden_ratio(len(C)))))
+            for rp in range(10)
+        ])
+    }  # init 10 attribute set.
+    _ = delta.copy()  # for check init state.
+    for t in range(1, search_times+1):
+        if t % optimization_unit == 0:
+            max_pc = max(delta.values())
+            att_tus = [key for key, value in delta.items() if value == max_pc]
+        else:
+            att_tus = [random.choice(list(delta.keys()))]
+        for att_tu in att_tus:
+            att_set = set(att_tu)
+            s = C - att_set
+            # expand element.
+            for col in s:
+                expand = tuple(sorted(att_set | {col}))
+                if expand in delta.keys():
+                    continue
+                delta[expand] = funcs.PC(D, list(expand))
+            # shrink element
+            for col in att_set:
+                shrink = tuple(sorted(att_set - {col}))
+                if len(shrink) == 0 or shrink in delta.keys():
+                    continue
+                delta[shrink] = funcs.PC(D, list(shrink))
+    # filter PC value == max{PC}.
+    max_pc = max(delta.values())
+    delta = {k: v for k, v in delta.items() if v == max_pc}
+    filtered_delta = filter_superset_keys(delta)
+    end_time = time.time()
+    if return_run_time:
+        return end_time - start_time
+    return _, filtered_delta
+
+
+# def heuri(concept_idx, search_times=500):
+#     table_name = f'concept_{concept_idx}_binary.xlsx'
+#     table_path = os.path.join('./data', table_name)
+#     table = pd.read_excel(table_path, index_col=0)
+#     funcs = Functional(filter_data(table))
+#     C = set(table.columns[2:])
+#     D = ['subconcept']
+#     delta = {
+#         k: funcs.PC(D, list(k)) 
+#         for k in set([
+#             tuple(sorted(random.sample(list(C), golden_ratio(len(C))))) 
+#             for rp in range(10)
+#         ])
+#     }  # init 10 attribute set.
+#     _ = delta.copy()  # for check init state.
+#     for t in range(search_times):
+#         att_tu = random.choice(list(delta.keys()))
+#         att_set = set(att_tu)
+#         s = C - att_set
+#         # expand element.
+#         for col in s:
+#             expand = tuple(sorted(att_set | {col}))
+#             if expand in delta.keys():
+#                 continue
+#             delta[expand] = funcs.PC(D, list(expand))
+#         # shrink element
+#         for col in att_set:
+#             shrink = tuple(sorted(att_set - {col}))
+#             if len(shrink) == 0 or shrink in delta.keys():
+#                 continue
+#             delta[shrink] = funcs.PC(D, list(shrink))
+#     # filter PC value == max{PC}.
+#     max_pc = max(delta.values())
+#     delta = {k: v for k, v in delta.items() if v == max_pc}
+#     return _, filter_superset_keys(delta)
+
+    # delta = {}
+    # for col in C:
+    #     delta[tuple([col])] = funcs.PC(D, [col])
+    # for t in range(search_times):
+    #     length = len(delta)
+    #     key_list = get_top_n_keys(delta, length, False)
+    #     div = init_div_list(length, n)
+    #     choice_list = init_choice_list(div, key_list, n)
+    #     random_tuple = random.choice(choice_list)
+    #     s = C - set(random_tuple)
+    #     for col in s:
+    #         li = list(random_tuple)
+    #         li.append(col)
+    #         li = sorted(li)
+    #         tl = tuple(li)
+    #         if tl in delta.keys():
+    #             continue
+    #         delta[tl] = funcs.PC(D, li)
+    #     pass
+
+    # max_value = max(delta.values())
+    # # 筛选出所有与最大值相等的 key, value 对，并保存为字典
+    # delta = {k: v for k, v in delta.items() if v == max_value}
+    # return filter_superset_keys(delta)
+
+
+# print(heuri(84, 10)[-1])
+
+
+# def execute(save_path='/home/huangdn/Attribute_Reduction/result'):
+#     _, delta = heuri(-1, search_times=3000, optimization_unit=10)
+#     delta = {str(k): v for k, v in delta.items()}
+#     file_path = os.path.join(save_path, 'merge_reduction.json')
+#     with open(file_path, 'w') as json_file:
+#         json.dump(delta, json_file, indent=4)  # indent参数可以美化输出
+#     print('merge_reduction.json is Done.')
+#     pass
+
+
+# execute()
+