Skip to content

Commit

Permalink
* feat(core.py): comment out unused model configurations
Browse files Browse the repository at this point in the history
* feat(get_exp_status.py): add filter for non-running experiments
* feat(process_exp_results.py): create script to load and filter CSV data
  • Loading branch information
AntreasAntoniou committed Jan 11, 2024
1 parent 986fa20 commit b86856b
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 80 deletions.
169 changes: 90 additions & 79 deletions gate/menu/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,81 +329,93 @@ def get_model_selection(
mixed_precision_mode: str = MixedPrecisionMode.BF16,
):
return {
EncoderNames.Wave2VecV2Base.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.Wave2VecV2Base.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.WhisperBase.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.WhisperBase.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.MPNet.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.MPNet.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.BERT_TEXT.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.BERT_TEXT.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.BART_TEXT.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.BART_TEXT.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.CLIPViTBase16_224HF_IMAGE.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.CLIPViTBase16_224HF_IMAGE.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.CLIPViTBase16_224HF_TEXT.value.pretty_name: ModelConfig(
# EncoderNames.Wave2VecV2Base.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.Wave2VecV2Base.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.WhisperBase.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.WhisperBase.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.MPNet.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.MPNet.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.BERT_TEXT.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.BERT_TEXT.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.BART_TEXT.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.BART_TEXT.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.CLIPViTBase16_224HF_IMAGE.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.CLIPViTBase16_224HF_IMAGE.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
# EncoderNames.CLIPViTBase16_224HF_TEXT.value.pretty_name: ModelConfig(
# adapter_config=adapter_config,
# encoder_config=EncoderNames.CLIPViTBase16_224HF_TEXT.value.update_config(
# image_size=image_size
# ),
# learning_rate_config=LearningRateConfig(
# default=[vit_lr], dataset_specific={}
# ),
# weight_decay=wd,
# train_batch_size=batch_size,
# eval_batch_size=batch_size,
# ),
EncoderNames.AugRegViTBase16_224.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.CLIPViTBase16_224HF_TEXT.value.update_config(
encoder_config=EncoderNames.AugRegViTBase16_224.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
Expand All @@ -413,21 +425,20 @@ def get_model_selection(
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.AugRegViTBase16_224.value.pretty_name: ModelConfig(
EncoderNames.LaionViTBase16_224.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.AugRegViTBase16_224.value.update_config(
encoder_config=EncoderNames.LaionViTBase16_224.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
default=[vit_lr], dataset_specific={}
),
weight_decay=wd,
train_batch_size=batch_size,
eval_batch_size=batch_size,
),
EncoderNames.LaionViTBase16_224.value.pretty_name: ModelConfig(
EncoderNames.CLIPViTBase16_224.value.pretty_name: ModelConfig(
adapter_config=adapter_config,
encoder_config=EncoderNames.LaionViTBase16_224.value.update_config(
encoder_config=EncoderNames.CLIPViTBase16_224.value.update_config(
image_size=image_size
),
learning_rate_config=LearningRateConfig(
Expand Down
20 changes: 19 additions & 1 deletion tools/get_exp_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def check_wandb_experiments(
token: str = None,
print_table: bool = False,
filter_for_non_completed: bool = False,
filter_for_non_running: bool = False,
):
"""
Check if the given experiments exist in a Weights & Biases project and if they have completed the testing stage.
Expand Down Expand Up @@ -48,14 +49,17 @@ def check_wandb_experiments(
runs = [run for run_list in runs for run in run_list]

exp_name_to_summary_dict = {}
exp_name_to_config_dict = {}

for run in tqdm(runs):
if "exp_name" in run.config:
exp_name = run.config["exp_name"].lower()
if exp_name not in exp_name_to_summary_dict:
exp_name_to_summary_dict[exp_name] = [run.summaryMetrics]
exp_name_to_config_dict[exp_name] = [run.config]
else:
exp_name_to_summary_dict[exp_name].append(run.summaryMetrics)
exp_name_to_config_dict[exp_name].append(run.config)

# Initialize an empty list to store experiment data
exp_data = {}
Expand All @@ -74,6 +78,10 @@ def check_wandb_experiments(
]

testing_completed = any("testing/ensemble" in k for k in keys)
currently_running = any(
"running" == config.state.lower()
for config in exp_name_to_config_dict[exp_name]
)
if "global_step" in keys:
current_iter = max(
[
Expand All @@ -94,6 +102,7 @@ def check_wandb_experiments(
# Append the data to the list
exp_data[exp_name] = {
"testing_completed": testing_completed,
"currently_running": currently_running,
"current_iter": current_iter,
}
pbar.update(1)
Expand All @@ -105,6 +114,13 @@ def check_wandb_experiments(
if not value["testing_completed"]
}

if filter_for_non_running:
exp_data = {
key: value
for key, value in exp_data.items()
if not value["currently_running"]
}

if print_table:
# Create a pandas DataFrame
df = pd.DataFrame(
Expand All @@ -120,6 +136,7 @@ def check_wandb_experiments(
)
table.add_column("idx", justify="right")
table.add_column("Experiment Name", width=50)
table.add_column("Currently Running", justify="right")
table.add_column("Testing Completed", justify="right")
table.add_column("Current Iteration", justify="right")

Expand All @@ -128,6 +145,7 @@ def check_wandb_experiments(
table.add_row(
str(idx),
exp_name,
str(row["currently_running"]),
str(row["testing_completed"]),
str(row["current_iter"]),
)
Expand Down Expand Up @@ -156,7 +174,7 @@ def main(
# Call the function
exp_dict = check_wandb_experiments(
experiments=experiments,
project=["eidf-monitor", "gate-0-9-1"],
project=["gate-0-9-1"],
print_table=print_table,
filter_for_non_completed=filter_for_non_completed,
)
Expand Down
74 changes: 74 additions & 0 deletions tools/process_exp_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import sys

import fire
import pandas as pd
import yaml
from tabulate import tabulate


def load_and_filter_csv(csv_input, metrics_yaml):
# Load the CSV file
df = pd.read_csv(csv_input)

# Load the metrics dictionary from the YAML file
with open(metrics_yaml, "r") as file:
metrics_dict = yaml.safe_load(file)

# Create an empty DataFrame to store the filtered data
filtered_df = pd.DataFrame()

# Iterate over the items in the metrics dictionary
for dataset, metrics in metrics_dict.items():
# Filter the DataFrame for the current dataset
dataset_df = df[df["Dataset-name"] == dataset]

# Keep only the columns present in the metrics list along with 'Experiment-series', 'Dataset-name' and 'Experiment-name'
columns_to_keep = [
"Experiment-series",
"Dataset-name",
"Experiment-name",
] + metrics
dataset_df = dataset_df[columns_to_keep]

# Append the filtered DataFrame to the final DataFrame
filtered_df = filtered_df.append(dataset_df)

# Replace missing values with 'TBA'
filtered_df.fillna("TBA", inplace=True)

return filtered_df


def generate_table(df, key_terms_yaml):
# Load the key terms from the YAML file
with open(key_terms_yaml, "r") as file:
key_terms = yaml.safe_load(file)

# Filter the DataFrame based on the key terms
df = df[df["Experiment-name"].str.contains("|".join(key_terms))]

# Convert the DataFrame to a LaTeX table
latex_table = tabulate(
df, tablefmt="latex", headers="keys", showindex=False
)

return latex_table


def main(csv_input, metrics_yaml, key_terms_yaml_1, key_terms_yaml_2):
# Load and filter the CSV file or standard input
if csv_input == "-":
df = load_and_filter_csv(sys.stdin, metrics_yaml)
else:
df = load_and_filter_csv(csv_input, metrics_yaml)

# Generate the LaTeX tables
table_1 = generate_table(df, key_terms_yaml_1)
table_2 = generate_table(df, key_terms_yaml_2)

print(table_1)
print(table_2)


if __name__ == "__main__":
fire.Fire(main)

0 comments on commit b86856b

Please sign in to comment.