Skip to content

Commit

Permalink
v2.2 record data
Browse files Browse the repository at this point in the history
  • Loading branch information
tangminji committed Aug 2, 2023
1 parent 22f9218 commit 439e961
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ nrun/

# data
data/
data_meta/
model/

# cache file
Expand Down
14 changes: 11 additions & 3 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@ def __init__(self, path, device, seed, train=True, transform=None, target_transf
dataset1, labels1 = np.load('{}/data/cifar10/train_images.npy'.format(path)), np.load('{}/data/cifar10/train_labels.npy'.format(path))
dataset2, labels2 = np.load('{}/data/svhn/train_images.npy'.format(path)), np.load('{}/data/svhn/train_labels.npy'.format(path))

noisy_dataset1, noisy_labels1 = data_process.open_closed_noisy_labels(dataset1, labels1, dataset2, device,
noisy_dataset1, noisy_labels1, real_labels = data_process.open_closed_noisy_labels(dataset1, labels1, dataset2, device,
closed_noise_type=noise_type,
openset_noise_rate=noise_rate1,
closed_set_noise_rate=noise_rate2,
num_classes=10, random_seed=seed)

self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split_without_noise(
noisy_dataset1, noisy_labels1, split_per, seed)
self.train_data, self.val_data, self.train_labels, self.val_labels, self.train_real_labels, self.val_real_labels = tools.dataset_split_without_noise(
noisy_dataset1, noisy_labels1, real_labels, split_per, seed)

save_path = f'{path}/data_meta/cifar10s/{noise_rate1}_{noise_rate2}/{seed}'
os.makedirs(save_path, exist_ok=True)
np.save(f"{save_path}/train_labels.npy",self.train_labels)
np.save(f"{save_path}/val_labels.npy",self.val_labels)
np.save(f"{save_path}/train_real_labels.npy",self.train_real_labels)
np.save(f"{save_path}/val_real_labels.npy",self.val_real_labels)


if self.train:
self.train_data = self.train_data.reshape((45000,3,32,32))
Expand Down
4 changes: 3 additions & 1 deletion data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def open_closed_noisy_labels(dataset1, dataset1_label, dataset2, device, closed_
# not -> dataset1 and dataset2 do not have same classes, e.g., CIFAR-10 and SVHN (MNIST, *CIFAR-100)

num_total_1, num_total_2 = int(dataset1.shape[0]), int(dataset2.shape[0])
real_label = np.copy(dataset1_label)

noise_rate = float(openset_noise_rate + closed_set_noise_rate)
num_noisy_labels_1 = int(noise_rate * num_total_1)
Expand All @@ -21,6 +22,7 @@ def open_closed_noisy_labels(dataset1, dataset1_label, dataset2, device, closed_

# open_set_corruption (images corruption)
dataset1[corrupted_open_noisy_labels_index_1] = dataset2[corrupted_labels_index_2]
real_label[corrupted_open_noisy_labels_index_1] = num_classes # OOD

# closed_set_corruption (labels corruption)
labels = dataset1_label[corrupted_closed_noisy_labels_index_1]
Expand All @@ -33,7 +35,7 @@ def open_closed_noisy_labels(dataset1, dataset1_label, dataset2, device, closed_
data_labels = zip(dataset, labels)
noisy_labels = tools.get_instance_noisy_label(1.0, data_labels, labels, num_classes, feature_size, norm_std, device, random_seed)
dataset1_label[corrupted_closed_noisy_labels_index_1] = noisy_labels.squeeze()
return dataset1, dataset1_label
return dataset1, dataset1_label, real_label



Expand Down
9 changes: 6 additions & 3 deletions enum_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def get_trials(fixed, space, MAX_EVALS):
tpe_algorithm = tpe.suggest
bayes_trials = Trials()

assert args.dataset == "cifar10s"
assert args.dataset == "cnwl"

MAX_EVALS = 3 # TODO 设置轮次
MAX_EVALS = 5 # TODO 设置轮次
# TODO: times, sigma (key hyperparameters)
# space中所有参数都需要是hp对象,否则best会缺失相应超参数值
# python ../main_ce1.py \
Expand All @@ -90,8 +90,11 @@ def get_trials(fixed, space, MAX_EVALS):
}

space = {
'warm_up': [25, 30, 35],
'warm_up': [10, 15, 20, 25, 30],
}
# space = {
# 'warm_up': [25, 30, 35],
# }

trials = get_trials(fixed, space, MAX_EVALS)
all_trials = []
Expand Down
148 changes: 148 additions & 0 deletions five_run_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
# Author: jlgao HIT-SCIR
import os
import re
import numpy


def cal_avg(path,reverse=True,top3=False,choose=[]):
histo = []
for seed_n in os.listdir(path):
# res_path = os.path.join(path, "%s/weights" % (seed_n))
# print(res_path, end="\t")
res_path = os.path.join(path,seed_n,'best_results.txt')
if not os.path.exists(res_path):
continue
# 只取seed0,1,2
if not top3:
if seed_n not in ['seed0','seed1','seed2']:
continue
elif choose:
if seed_n not in choose:
continue
with open(res_path, 'r', encoding='utf-8') as f:
last_line = f.readline()
test_val_acc = float(last_line.strip().split("\t")[0]) # Val, Test, test_val
test_acc = float(last_line.strip().split("\t")[-1])
histo.append((seed_n,test_acc, test_val_acc))

histo.sort(key=lambda x: x[1],reverse=reverse)
# histo = histo[:5]
histo = histo[:3]

histo_value = [t[1] for t in histo]
avg_score = sum(histo_value) / len(histo_value)
test_val_value = [t[2] for t in histo]
avg_test_val = sum(test_val_value) / len(test_val_value)
print("%s\tTest: %.2f±%.2f\ttest_val: %.2f±%.2f" % (path, avg_score, numpy.std(histo_value),avg_test_val, numpy.std(test_val_value)))

for seed_n, sc,test_val in histo:
print(seed_n, sc, test_val)
fname = 'five_run_top3_score.txt' if top3 else 'five_run_score.txt'
with open(os.path.join(path, fname), 'w', encoding='utf-8') as f:
f.write("%s\tTest: %.2f±%.2f\ttest_val: %.2f±%.2f\n" % (path, avg_score, numpy.std(histo_value), avg_test_val, numpy.std(test_val_value) ))
for seed_n, sc, test_val in histo:
f.write(f'{seed_n} Test: {sc} Test_val: {test_val}\n')


if __name__ == '__main__':
print("===> Seed 0,1,2 avg")


# cal_avg('nrun/SST_STGN_GCE/nr0.2', reverse=True) #q=0.7
# cal_avg('nrun/SST_STGN_GCE/nr0.4', reverse=True)
# cal_avg('nrun/SST_STGN_GCE/nr0.6', reverse=True)

# cal_avg('nrun/SST_STGN/nr0.2', reverse=True)
# cal_avg('nrun/SST_STGN/nr0.4', reverse=True)
# cal_avg('nrun/SST_STGN/nr0.6', reverse=True)

print("===> Five run top3")

# cal_avg('nrun/SST_base/nr0.0', reverse=True,top3=True)
# cal_avg('nrun/SST_base/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_base/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_base/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_SLN/nr0.2', reverse=True,top3=True,choose=['seed0','seed1','seed4'])
# cal_avg('nrun/SST_SLN/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_SLN/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_GCE/nr0.2', reverse=True,top3=True) #q=0.7
# cal_avg('nrun/SST_GCE/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE/nr0.6', reverse=True,top3=True)

cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.05_0.15/ours_instance_dwt_J=9_enh_red_lam=0.5_wm=25_del=0.2_eps=0.4_eta=0.2_inc=0.01', reverse=True,top3=True) #q=0.7
cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.15_0.05/ours_instance_dwt_J=9_enh_red_lam=0.3_wm=30_del=0.2_eps=0.4_eta=0.1_inc=0.02', reverse=True,top3=True)
cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.1_0.3/ours_instance_dwt_J=9_enh_red_lam=0.3_wm=20_del=0.05_eps=0.4_eta=0.3_inc=0.02', reverse=True,top3=True)
cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.2_0.2/ours_instance_dwt_J=9_enh_red_lam=0.3_wm=25_del=0.1_eps=0.30000000000000004_eta=0.2_inc=0.02', reverse=True,top3=True)
cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.3_0.1/ours_instance_dwt_J=9_enh_red_lam=0.3_wm=25_del=0.05_eps=0.30000000000000004_eta=0.1_inc=0.01', reverse=True,top3=True)
cal_avg('/home/mjtang/wtt/Extend_T/nrun/cifar10s/0.45_0.15/ours_instance_dwt_J=9_enh_red_lam=0.3_wm=25_del=0.05_eps=0.4_eta=0.3_inc=0.02', reverse=True,top3=True)

# cal_avg('nrun/SST_STGN/nr0.2', reverse=True,top3=True, choose=['seed0','seed2','seed3'])
# cal_avg('nrun/SST_STGN/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_STGN/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_GNMO/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_GNMO/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GNMO/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_GNMP/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_GNMP/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GNMP/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_SLN-sigma0.1/nr0.2',top3=True)
# cal_avg('nrun/SST_SLN-sigma0.2/nr0.2',top3=True)
# cal_avg('nrun/SST_SLN-sigma0.5/nr0.2',top3=True)
# cal_avg('nrun/SST_SLN-sigma1/nr0.2',top3=True,choose=['seed0','seed1','seed4'])

# print("===> GCE 0,1,2 AVG")

# cal_avg('nrun/SST_GCE-q0.4/nr0.2', reverse=True)
# cal_avg('nrun/SST_GCE-q0.4/nr0.4', reverse=True)
# cal_avg('nrun/SST_GCE-q0.4/nr0.6', reverse=True)

# cal_avg('nrun/SST_GCE-q0.5/nr0.2', reverse=True)
# cal_avg('nrun/SST_GCE-q0.5/nr0.4', reverse=True)
# cal_avg('nrun/SST_GCE-q0.5/nr0.6', reverse=True)

# cal_avg('nrun/SST_GCE/nr0.2', reverse=True) #q=0.7
# cal_avg('nrun/SST_GCE/nr0.4', reverse=True)
# cal_avg('nrun/SST_GCE/nr0.6', reverse=True)

# cal_avg('nrun/SST_GCE-q0.9/nr0.2', reverse=True)
# cal_avg('nrun/SST_GCE-q0.9/nr0.4', reverse=True)
# cal_avg('nrun/SST_GCE-q0.9/nr0.6', reverse=True)

# print("===> GCE top3 AVG")

# cal_avg('nrun/SST_GCE-q0.4/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.4/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.4/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_GCE-q0.5/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.5/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.5/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_GCE/nr0.2', reverse=True, top3=True) #q=0.7
# cal_avg('nrun/SST_GCE/nr0.4', reverse=True, top3=True)
# cal_avg('nrun/SST_GCE/nr0.6', reverse=True, top3=True)

# cal_avg('nrun/SST_GCE-q0.9/nr0.2', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.9/nr0.4', reverse=True,top3=True)
# cal_avg('nrun/SST_GCE-q0.9/nr0.6', reverse=True,top3=True)

# cal_avg('nrun/SST_STGN_GCE/nr0.2', reverse=True)
# cal_avg('nrun/SST_STGN_GCE/nr0.4', reverse=True)
# cal_avg('nrun/SST_STGN_GCE/nr0.6', reverse=True)

# cal_avg('ablation/0/SST_STGN/nr0.2', reverse=True)
# cal_avg('ablation/0/SST_STGN/nr0.4', reverse=True)
# cal_avg('ablation/0/SST_STGN/nr0.6', reverse=True)

# cal_avg('ablation/1/SST_STGN/nr0.2', reverse=True)
# cal_avg('ablation/1/SST_STGN/nr0.4', reverse=True)
# cal_avg('ablation/1/SST_STGN/nr0.6', reverse=True)



2 changes: 1 addition & 1 deletion hy_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def main(params):
tpe_algorithm = tpe.suggest
bayes_trials = Trials()

assert args.dataset == "cifar10s"
assert args.dataset == "cnwl"

MAX_EVALS = 15 # TODO 设置轮次
# TODO: times, sigma (key hyperparameters)
Expand Down
6 changes: 3 additions & 3 deletions tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch.nn.functional as F
import torch

def dataset_split_without_noise(train_images, train_labels, split_per=0.9, seed=1):
def dataset_split_without_noise(train_images, train_labels, real_labels, split_per=0.9, seed=1):
total_labels = train_labels[:, np.newaxis]

num_samples = int(total_labels.shape[0])
Expand All @@ -15,8 +15,8 @@ def dataset_split_without_noise(train_images, train_labels, split_per=0.9, seed=
print(train_images.shape)
train_set, val_set = train_images[train_set_index], train_images[val_set_index]
train_labels, val_labels = total_labels[train_set_index], total_labels[val_set_index]

return train_set, val_set, train_labels.squeeze(), val_labels.squeeze()
train_real_labels, val_real_labels = real_labels[train_set_index], real_labels[val_set_index]
return train_set, val_set, train_labels.squeeze(), val_labels.squeeze(), train_real_labels, val_real_labels

def get_mean_and_std(dataset):
'''Compute the mean and std value of dataset.'''
Expand Down

0 comments on commit 439e961

Please sign in to comment.