Skip to content

Commit

Permalink
Fix issues with GLM integration
Browse files Browse the repository at this point in the history
  • Loading branch information
edubezerra committed Jun 27, 2023
1 parent 4c34526 commit 2d3ae99
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 26 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ All datasets retrieved and/or generated by the scripts will be stored in the `./
- **_retrieve_ERA5.py_**: this script retrieves numerical simulation data from the ERA5 portal.


#### Script **_gen_sounding_indices.py_**
##### Script **_gen_sounding_indices.py_**

This script will generate atmospheric instability indices for the data retrieveed by the script **_retrieve_as.py_**. Data from the SBGL sounding (located at the Galeão Airport, Rio de Janeiro - Brazil) will be used to calculate atmospheric instability indices, generating a new dataset. This new dataset contains one entry per sounding probe. SBGL sounding station produces two probes per day (at 00:00h and 12:00h UTC). Each entry in the produced contains the values of the computed instability indices for one probe. The following instability indices are computed:

Expand All @@ -43,6 +43,6 @@ The preprocessing scripts are responsible for performing several operations on t

These scripts will build the train, validation and test dataset from the times series produced in the previous steps. These are the datasets to be given as input to the model training step.

#### Model training
#### Model training and evaluation

The model generation script is responsible for performing the training and exporting the results obtained by the model after testing.
Binary file modified src/__pycache__/globals.cpython-310.pyc
Binary file not shown.
48 changes: 25 additions & 23 deletions src/build_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def build_datasets(station_id: str, join_AS_data_source: bool, join_NWP_data_sou
pipeline_id = pipeline_id + '_L'

logging.info(f"Loading observations for weather station {station_id}...")
df_ws = pd.read_parquet("/mnt/e/atmoseer/data/ws/inmetinmetA652_preprocessed.parquet.gzip")
df_ws = pd.read_parquet(WS_INMET_DATA_DIR + station_id + "_preprocessed.parquet.gzip")
logging.info(f"Done! Shape = {df_ws.shape}.")

####
Expand Down Expand Up @@ -493,36 +493,34 @@ def build_datasets(station_id: str, join_AS_data_source: bool, join_NWP_data_sou
logging.info('Done!')

def main(argv):
# parser = argparse.ArgumentParser(
# description="""This script builds the train/val/test datasets for a given weather station, by using the user-specified data sources.""")
# parser.add_argument('-s', '--station_id', type=str, required=True, help='station id')
# parser.add_argument('-d', '--datasources', type=str, help='data source spec')
# parser.add_argument('-n', '--num_neighbors', type=int, default = 0, help='number of neighbors')
# parser.add_argument('-sp', '--subsampling_procedure', type=str, default='NONE', help='Subsampling procedure do be applied.')
# args = parser.parse_args(argv[1:])

station_id = 'A652'
datasources = ['L']
# num_neighbors = 0
# subsampling_procedure = args.subsampling_procedure
parser = argparse.ArgumentParser(
description="""This script builds the train/val/test datasets for a given weather station, by using the user-specified data sources.""")
parser.add_argument('-s', '--station_id', type=str, required=True, help='station id')
parser.add_argument('-d', '--datasources', type=str, help='data source spec')
parser.add_argument('-sp', '--subsampling_procedure', type=str, default='NONE', help='Subsampling procedure do be applied.')
args = parser.parse_args(argv[1:])

station_id = args.station_id
datasources = args.datasources
subsampling_procedure = args.subsampling_procedure

lst_subsampling_procedures = ["NONE", "NAIVE", "NEGATIVE"]
# if not (subsampling_procedure in lst_subsampling_procedures):
# print(f"Invalid subsampling procedure: {subsampling_procedure}. Valid values: {lst_subsampling_procedures}")
# parser.print_help()
# sys.exit(2)
if not (subsampling_procedure in lst_subsampling_procedures):
print(f"Invalid subsampling procedure: {subsampling_procedure}. Valid values: {lst_subsampling_procedures}")
parser.print_help()
sys.exit(2)

# if not ((station_id in INMET_STATION_CODES_RJ) or (station_id in COR_STATION_NAMES_RJ)):
# print(f"Invalid station identifier: {station_id}")
# parser.print_help()
# sys.exit(2)
if not ((station_id in INMET_STATION_CODES_RJ) or (station_id in COR_STATION_NAMES_RJ)):
print(f"Invalid station identifier: {station_id}")
parser.print_help()
sys.exit(2)

fmt = "[%(levelname)s] %(funcName)s():%(lineno)i: %(message)s"
logging.basicConfig(level=logging.DEBUG, format = fmt)

join_as_data_source = False
join_nwp_data_source = False
subsampling_procedure = "NONE"
join_lightning_data_source = False

if datasources:
if 'R' in datasources:
Expand All @@ -533,7 +531,11 @@ def main(argv):
join_lightning_data_source = True

assert(station_id is not None) and (station_id != "")
build_datasets(station_id, join_as_data_source, join_nwp_data_source, join_lightning_data_source, subsampling_procedure)
build_datasets(station_id,
join_as_data_source,
join_nwp_data_source,
join_lightning_data_source,
subsampling_procedure)

if __name__ == "__main__":
main(sys.argv)
Binary file modified src/train/__pycache__/early_stopping.cpython-310.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion src/train/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,5 @@ def save_checkpoint(self, val_loss, model, pipeline_id):
if self.verbose:
print(
f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
torch.save(model.state_dict(), '/mnt/e/atmoseer/data/as/best_' + pipeline_id + '.pt')
torch.save(model.state_dict(), globals.MODELS_DIR + '/best_' + pipeline_id + '.pt')
self.val_loss_min = val_loss

0 comments on commit 2d3ae99

Please sign in to comment.