Fix issues with GLM integration

the-franks · Jun 27, 2023 · 2d3ae99 · 2d3ae99
1 parent 4c34526
commit 2d3ae99
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ All datasets retrieved and/or generated by the scripts will be stored in the `./
 - **_retrieve_ERA5.py_**: this script retrieves numerical simulation data from the ERA5 portal. 
 
 
-#### Script **_gen_sounding_indices.py_** 
+##### Script **_gen_sounding_indices.py_** 
 
 This script will generate atmospheric instability indices for the data retrieveed by the script **_retrieve_as.py_**. Data from the SBGL sounding (located at the Galeão Airport, Rio de Janeiro - Brazil) will be used to calculate atmospheric instability indices, generating a new dataset. This new dataset contains one entry per sounding probe. SBGL sounding station produces two probes per day (at 00:00h and 12:00h UTC). Each entry in the produced contains the values of the computed instability indices for one probe. The following instability indices are computed:
 
@@ -43,6 +43,6 @@ The preprocessing scripts are responsible for performing several operations on t
 
 These scripts will build the train, validation and test dataset from the times series produced in the previous steps. These are the datasets to be given as input to the model training step.
 
-#### Model training
+#### Model training and evaluation
 
 The model generation script is responsible for performing the training and exporting the results obtained by the model after testing. 
diff --git a/src/__pycache__/globals.cpython-310.pyc b/src/__pycache__/globals.cpython-310.pyc
diff --git a/src/build_datasets.py b/src/build_datasets.py
@@ -245,7 +245,7 @@ def build_datasets(station_id: str, join_AS_data_source: bool, join_NWP_data_sou
         pipeline_id = pipeline_id + '_L'
 
     logging.info(f"Loading observations for weather station {station_id}...")
-    df_ws = pd.read_parquet("/mnt/e/atmoseer/data/ws/inmetinmetA652_preprocessed.parquet.gzip")
+    df_ws = pd.read_parquet(WS_INMET_DATA_DIR + station_id + "_preprocessed.parquet.gzip")
     logging.info(f"Done! Shape = {df_ws.shape}.")
 
     ####
@@ -493,36 +493,34 @@ def build_datasets(station_id: str, join_AS_data_source: bool, join_NWP_data_sou
     logging.info('Done!')
 
 def main(argv):
-    # parser = argparse.ArgumentParser(
-    #     description="""This script builds the train/val/test datasets for a given weather station, by using the user-specified data sources.""")
-    # parser.add_argument('-s', '--station_id', type=str, required=True, help='station id')
-    # parser.add_argument('-d', '--datasources', type=str, help='data source spec')
-    # parser.add_argument('-n', '--num_neighbors', type=int, default = 0, help='number of neighbors')
-    # parser.add_argument('-sp', '--subsampling_procedure', type=str, default='NONE', help='Subsampling procedure do be applied.')
-    # args = parser.parse_args(argv[1:])
-
-    station_id = 'A652'
-    datasources = ['L']
-    # num_neighbors = 0
-    # subsampling_procedure = args.subsampling_procedure
+    parser = argparse.ArgumentParser(
+        description="""This script builds the train/val/test datasets for a given weather station, by using the user-specified data sources.""")
+    parser.add_argument('-s', '--station_id', type=str, required=True, help='station id')
+    parser.add_argument('-d', '--datasources', type=str, help='data source spec')
+    parser.add_argument('-sp', '--subsampling_procedure', type=str, default='NONE', help='Subsampling procedure do be applied.')
+    args = parser.parse_args(argv[1:])
+
+    station_id = args.station_id
+    datasources = args.datasources
+    subsampling_procedure = args.subsampling_procedure
 
     lst_subsampling_procedures = ["NONE", "NAIVE", "NEGATIVE"]
-    # if not (subsampling_procedure in lst_subsampling_procedures):
-    #     print(f"Invalid subsampling procedure: {subsampling_procedure}. Valid values: {lst_subsampling_procedures}")
-    #     parser.print_help()
-    #     sys.exit(2)
+    if not (subsampling_procedure in lst_subsampling_procedures):
+        print(f"Invalid subsampling procedure: {subsampling_procedure}. Valid values: {lst_subsampling_procedures}")
+        parser.print_help()
+        sys.exit(2)
 
-    # if not ((station_id in INMET_STATION_CODES_RJ) or (station_id in COR_STATION_NAMES_RJ)):
-    #     print(f"Invalid station identifier: {station_id}")
-    #     parser.print_help()
-    #     sys.exit(2)
+    if not ((station_id in INMET_STATION_CODES_RJ) or (station_id in COR_STATION_NAMES_RJ)):
+        print(f"Invalid station identifier: {station_id}")
+        parser.print_help()
+        sys.exit(2)
 
     fmt = "[%(levelname)s] %(funcName)s():%(lineno)i: %(message)s"
     logging.basicConfig(level=logging.DEBUG, format = fmt)
 
     join_as_data_source = False
     join_nwp_data_source = False
-    subsampling_procedure = "NONE"
+    join_lightning_data_source = False
 
     if datasources:
         if 'R' in datasources:
@@ -533,7 +531,11 @@ def main(argv):
             join_lightning_data_source = True
 
     assert(station_id is not None) and (station_id != "")
-    build_datasets(station_id, join_as_data_source, join_nwp_data_source, join_lightning_data_source, subsampling_procedure)
+    build_datasets(station_id, 
+                   join_as_data_source, 
+                   join_nwp_data_source, 
+                   join_lightning_data_source, 
+                   subsampling_procedure)
 
 if __name__ == "__main__":
     main(sys.argv)
diff --git a/src/train/__pycache__/early_stopping.cpython-310.pyc b/src/train/__pycache__/early_stopping.cpython-310.pyc
diff --git a/src/train/early_stopping.py b/src/train/early_stopping.py
@@ -46,5 +46,5 @@ def save_checkpoint(self, val_loss, model, pipeline_id):
         if self.verbose:
             print(
                 f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
-        torch.save(model.state_dict(), '/mnt/e/atmoseer/data/as/best_' + pipeline_id + '.pt')
+        torch.save(model.state_dict(), globals.MODELS_DIR + '/best_' + pipeline_id + '.pt')
         self.val_loss_min = val_loss