MeowCollections · pull · Apr 26, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 16, 2024
diff --git a/.gitignore b/.gitignore
@@ -158,5 +158,10 @@ data_loader_all.py
 /utils/self_tools.py
 /scripts/exp_scripts/
 
-/checkpoints/
-/results/
+checkpoints/
+results/
+result_long_term_forecast.txt
+result_anomaly_detection.txt
+scripts/augmentation/
+run_anylearn.py
+environment.txt
diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py
@@ -37,6 +37,7 @@ def data_provider(args, flag):
     if args.task_name == 'anomaly_detection':
         drop_last = False
         data_set = Data(
+            args = args,
             root_path=args.root_path,
             win_size=args.seq_len,
             flag=flag,
@@ -52,6 +53,7 @@ def data_provider(args, flag):
     elif args.task_name == 'classification':
         drop_last = False
         data_set = Data(
+            args = args,
             root_path=args.root_path,
             flag=flag,
         )
@@ -69,6 +71,7 @@ def data_provider(args, flag):
         if args.data == 'm4':
             drop_last = False
         data_set = Data(
+            args = args,
             root_path=args.root_path,
             data_path=args.data_path,
             flag=flag,

diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py
@@ -11,15 +11,17 @@
 from data_provider.uea import subsample, interpolate_missing, Normalizer
 from sktime.datasets import load_from_tsfile_to_dataframe
 import warnings
+from utils.augmentation import run_augmentation_single
 
 warnings.filterwarnings('ignore')
 
 
 class Dataset_ETT_hour(Dataset):
-    def __init__(self, root_path, flag='train', size=None,
+    def __init__(self, args, root_path, flag='train', size=None,
                  features='S', data_path='ETTh1.csv',
                  target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
         # size [seq_len, label_len, pred_len]
+        self.args = args
         # info
         if size == None:
             self.seq_len = 24 * 4 * 4
@@ -81,6 +83,10 @@ def __read_data__(self):
 
         self.data_x = data[border1:border2]
         self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
         self.data_stamp = data_stamp
 
     def __getitem__(self, index):
@@ -104,10 +110,11 @@ def inverse_transform(self, data):
 
 
 class Dataset_ETT_minute(Dataset):
-    def __init__(self, root_path, flag='train', size=None,
+    def __init__(self, args, root_path, flag='train', size=None,
                  features='S', data_path='ETTm1.csv',
                  target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
         # size [seq_len, label_len, pred_len]
+        self.args = args
         # info
         if size == None:
             self.seq_len = 24 * 4 * 4
@@ -171,6 +178,10 @@ def __read_data__(self):
 
         self.data_x = data[border1:border2]
         self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
         self.data_stamp = data_stamp
 
     def __getitem__(self, index):
@@ -194,10 +205,11 @@ def inverse_transform(self, data):
 
 
 class Dataset_Custom(Dataset):
-    def __init__(self, root_path, flag='train', size=None,
+    def __init__(self, args, root_path, flag='train', size=None,
                  features='S', data_path='ETTh1.csv',
                  target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
         # size [seq_len, label_len, pred_len]
+        self.args = args
         # info
         if size == None:
             self.seq_len = 24 * 4 * 4
@@ -269,6 +281,10 @@ def __read_data__(self):
 
         self.data_x = data[border1:border2]
         self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
         self.data_stamp = data_stamp
 
     def __getitem__(self, index):
@@ -292,7 +308,7 @@ def inverse_transform(self, data):
 
 
 class Dataset_M4(Dataset):
-    def __init__(self, root_path, flag='pred', size=None,
+    def __init__(self, args, root_path, flag='pred', size=None,
                  features='S', data_path='ETTh1.csv',
                  target='OT', scale=False, inverse=False, timeenc=0, freq='15min',
                  seasonal_patterns='Yearly'):
@@ -371,7 +387,7 @@ def last_insample_window(self):
 
 
 class PSMSegLoader(Dataset):
-    def __init__(self, root_path, win_size, step=1, flag="train"):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
         self.flag = flag
         self.step = step
         self.win_size = win_size
@@ -418,7 +434,7 @@ def __getitem__(self, index):
 
 
 class MSLSegLoader(Dataset):
-    def __init__(self, root_path, win_size, step=1, flag="train"):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
         self.flag = flag
         self.step = step
         self.win_size = win_size
@@ -461,7 +477,7 @@ def __getitem__(self, index):
 
 
 class SMAPSegLoader(Dataset):
-    def __init__(self, root_path, win_size, step=1, flag="train"):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
         self.flag = flag
         self.step = step
         self.win_size = win_size
@@ -505,7 +521,7 @@ def __getitem__(self, index):
 
 
 class SMDSegLoader(Dataset):
-    def __init__(self, root_path, win_size, step=100, flag="train"):
+    def __init__(self, args, root_path, win_size, step=100, flag="train"):
         self.flag = flag
         self.step = step
         self.win_size = win_size
@@ -546,7 +562,7 @@ def __getitem__(self, index):
 
 
 class SWATSegLoader(Dataset):
-    def __init__(self, root_path, win_size, step=1, flag="train"):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
         self.flag = flag
         self.step = step
         self.win_size = win_size
@@ -614,8 +630,10 @@ class UEAloader(Dataset):
             (Moreover, script argument overrides this attribute)
     """
 
-    def __init__(self, root_path, file_list=None, limit_size=None, flag=None):
+    def __init__(self, args, root_path, file_list=None, limit_size=None, flag=None):
+        self.args = args
         self.root_path = root_path
+        self.flag = flag
         self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag)
         self.all_IDs = self.all_df.index.unique()  # all sample IDs (integer indices 0 ... num_samples-1)
 
@@ -712,8 +730,19 @@ def instance_norm(self, case):
             return case
 
     def __getitem__(self, ind):
-        return self.instance_norm(torch.from_numpy(self.feature_df.loc[self.all_IDs[ind]].values)), \
-               torch.from_numpy(self.labels_df.loc[self.all_IDs[ind]].values)
+        batch_x = self.feature_df.loc[self.all_IDs[ind]].values
+        labels = self.labels_df.loc[self.all_IDs[ind]].values
+        if self.flag == "TRAIN" and self.args.augmentation_ratio > 0:
+            num_samples = len(self.all_IDs)
+            num_columns = self.feature_df.shape[1]
+            seq_len = int(self.feature_df.shape[0] / num_samples)
+            batch_x = batch_x.reshape((1, seq_len, num_columns))
+            batch_x, labels, augmentation_tags = run_augmentation_single(batch_x, labels, self.args)
+
+            batch_x = batch_x.reshape((1 * seq_len, num_columns))
+
+        return self.instance_norm(torch.from_numpy(batch_x)), \
+               torch.from_numpy(labels)
 
     def __len__(self):
         return len(self.all_IDs)
diff --git a/exp/exp_classification.py b/exp/exp_classification.py
@@ -36,7 +36,8 @@ def _get_data(self, flag):
         return data_set, data_loader
 
     def _select_optimizer(self):
-        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
         return model_optim
 
     def _select_criterion(self):

diff --git a/exp/exp_long_term_forecasting.py b/exp/exp_long_term_forecasting.py
@@ -9,6 +9,8 @@
 import time
 import warnings
 import numpy as np
+from utils.dtw_metric import dtw,accelerated_dtw
+from utils.augmentation import run_augmentation,run_augmentation_single
 
 warnings.filterwarnings('ignore')
 
@@ -106,7 +108,6 @@ def train(self, setting):
                 iter_count += 1
                 model_optim.zero_grad()
                 batch_x = batch_x.float().to(self.device)
-
                 batch_y = batch_y.float().to(self.device)
                 batch_x_mark = batch_x_mark.float().to(self.device)
                 batch_y_mark = batch_y_mark.float().to(self.device)
@@ -251,12 +252,28 @@ def test(self, setting, test=0):
         folder_path = './results/' + setting + '/'
         if not os.path.exists(folder_path):
             os.makedirs(folder_path)
+
+        # dtw calculation
+        if self.args.use_dtw:
+            dtw_list = []
+            manhattan_distance = lambda x, y: np.abs(x - y)
+            for i in range(preds.shape[0]):
+                x = preds[i].reshape(-1,1)
+                y = trues[i].reshape(-1,1)
+                if i % 100 == 0:
+                    print("calculating dtw iter:", i)
+                d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance)
+                dtw_list.append(d)
+            dtw = np.array(dtw_list).mean()
+        else:
+            dtw = -999
+
 
         mae, mse, rmse, mape, mspe = metric(preds, trues)
-        print('mse:{}, mae:{}'.format(mse, mae))
+        print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
         f = open("result_long_term_forecast.txt", 'a')
         f.write(setting + "  \n")
-        f.write('mse:{}, mae:{}'.format(mse, mae))
+        f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
         f.write('\n')
         f.write('\n')
         f.close()

diff --git a/requirements.txt b/requirements.txt
@@ -9,4 +9,4 @@ scipy==1.10.1
 sktime==0.16.1
 sympy==1.11.1
 torch==1.7.1
-tqdm==4.64.1
+tqdm==4.64.1
diff --git a/run.py b/run.py
@@ -108,6 +108,30 @@
                         help='hidden layer dimensions of projector (List)')
     parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
 
+    # metrics (dtw)
+    parser.add_argument('--use_dtw', type=bool, default=False, 
+                        help='the controller of using dtw metric (dtw is time consuming, not suggested unless necessary)')
+
+    # Augmentation
+    parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment")
+    parser.add_argument('--seed', type=int, default=2, help="Randomization seed")
+    parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation")
+    parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation")
+    parser.add_argument('--permutation', default=False, action="store_true", help="Equal Length Permutation preset augmentation")
+    parser.add_argument('--randompermutation', default=False, action="store_true", help="Random Length Permutation preset augmentation")
+    parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation")
+    parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation")
+    parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation")
+    parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation")
+    parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation")
+    parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation")
+    parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation")
+    parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation")
+    parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation")
+    parser.add_argument('--discdtw', default=False, action="store_true", help="Discrimitive DTW warp preset augmentation")
+    parser.add_argument('--discsdtw', default=False, action="store_true", help="Discrimitive shapeDTW warp preset augmentation")
+    parser.add_argument('--extra_tag', type=str, default="", help="Anything extra")
+
     args = parser.parse_args()
     # args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
     args.use_gpu = True if torch.cuda.is_available() else False

diff --git a/run_anylearn.py b/run_anylearn.py
@@ -0,0 +1,30 @@
+from anylearn import init_sdk, quick_train
+
+
+# init_sdk('http://anylearn.nelbds.cn', 'DigitalLifeYZQiu', 'Qyz20020318!')
+init_sdk('http://111.200.37.154:81/', 'DigitalLifeYZQiu', 'Qyz20020318!',disable_git=True)
+
+
+for dataset in ['ETT-small-h1']:
+
+
+    cmd = "sh ./scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh"
+
+    print(cmd)
+    task, _, _, _ = quick_train(
+                        project_name='Parallel',
+                        algorithm_cloud_name=f"PatchTST_{dataset}",
+                        algorithm_local_dir="./",
+                        algorithm_entrypoint=cmd,
+                        algorithm_force_update=True,
+                        algorithm_output="./outputs",
+                        dataset_id=["DSET924f39a246e2bcba76feef284556"],
+                        image_name="QUICKSTART_PYTORCH2.1.0_CUDA11.8_PYTHON3.11",
+                        quota_group_request={
+                            'name': "QGRPa1b75dd54023ab63d23d65261012",
+                            'RTX-3090-unique': 1,
+                            'CPU': 10,
+                            'Memory': 50},
+
+                    )
+