From bfc3bdb9be66eba17be3bb946e7afdd2143fc7f6 Mon Sep 17 00:00:00 2001 From: Ivica Dimitrovski Date: Mon, 3 Jul 2023 10:43:07 +0200 Subject: [PATCH] docs for configs and schemas --- aitlas/base/datasets.py | 17 +++-- aitlas/base/schemas.py | 100 ++++++++++++++++++++++++++++- aitlas/datasets/schemas.py | 33 ++++++++++ aitlas/models/schemas.py | 21 ++++++ aitlas/tasks/schemas.py | 127 +++++++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 8 deletions(-) diff --git a/aitlas/base/datasets.py b/aitlas/base/datasets.py index 894a08c..241fc0f 100644 --- a/aitlas/base/datasets.py +++ b/aitlas/base/datasets.py @@ -13,6 +13,12 @@ class BaseDataset(Dataset, Configurable): name = None def __init__(self, config): + """BaseDataset constructor + :param config: Configuration object which specifies the details of the dataset. + :type config: Config, contains information for the batch size, number of workers, list of labels, + list of transformations + """ + Dataset.__init__(self) Configurable.__init__(self, config) @@ -53,6 +59,7 @@ def prepare(self): return True def dataloader(self): + """Create and return a dataloader for the dataset""" return torch.utils.data.DataLoader( self, batch_size=self.batch_size, @@ -69,31 +76,31 @@ def get_labels(self): ) def show_batch(self, size): - """Implement this if you want to return the complete set of labels of the dataset""" + """Implement this if you want to return a random batch of images from the dataset""" raise NotImplementedError( "Please implement the `show_batch` method for your dataset" ) def show_samples(self): - """Implement this if you want to return the complete set of labels of the dataset""" + """Implement this if you want to return a random samples from the dataset""" raise NotImplementedError( "Please implement the `show_samples` method for your dataset" ) def show_image(self, index): - """Implement this if you want to return the complete set of labels of the dataset""" + """Implement this if you want to return an image with a given index from the dataset""" raise NotImplementedError( "Please implement the `show_image` method for your dataset" ) def data_distribution_table(self): - """Implement this if you want to return the complete set of labels of the dataset""" + """Implement this if you want to return the label distribution of the dataset""" raise NotImplementedError( "Please implement the `data_distribution_table` method for your dataset" ) def data_distribution_barchart(self): - """Implement this if you want to return the complete set of labels of the dataset""" + """Implement this if you want to return the label distribution of the dataset as a barchart""" raise NotImplementedError( "Please implement the `data_distribution_barchart` method for your dataset" ) diff --git a/aitlas/base/schemas.py b/aitlas/base/schemas.py index 4676b10..457c53c 100644 --- a/aitlas/base/schemas.py +++ b/aitlas/base/schemas.py @@ -2,6 +2,35 @@ class BaseDatasetSchema(Schema): + + """ + Schema for configuring a base dataset. + + :param batch_size: Batch size for the dataset. Default is 64. + :type batch_size: int, optional + + :param shuffle: Flag indicating whether to shuffle the dataset. Default is True. + :type shuffle: bool, optional + + :param num_workers: Number of workers to use for data loading. Default is 4. + :type num_workers: int, optional + + :param pin_memory: Flag indicating whether to use page-locked memory. Default is False. + :type pin_memory: bool, optional + + :param transforms: Classes to run transformations over the input data. + :type transforms: List[str], optional + + :param target_transforms: Classes to run transformations over the target data. + :type target_transforms: List[str], optional + + :param joint_transforms: Classes to run transformations over the input and target data. + :type joint_transforms: List[str], optional + + :param labels: Labels for the dataset. + :type labels: List[str], optional + """ + batch_size = fields.Int(missing=64, description="Batch size", example=64) shuffle = fields.Bool( missing=True, description="Should shuffle dataset", example=False @@ -11,13 +40,13 @@ class BaseDatasetSchema(Schema): missing=False, description="Whether to use page-locked memory" ) transforms = fields.List( - fields.String, missing=None, description="Classes to run transformations.", + fields.String, missing=None, description="Classes to run transformations over the input data.", ) target_transforms = fields.List( - fields.String, missing=None, description="Classes to run transformations.", + fields.String, missing=None, description="Classes to run transformations over the target data.", ) joint_transforms = fields.List( - fields.String, missing=None, description="Classes to run transformations.", + fields.String, missing=None, description="Classes to run transformations over the input and target data.", ) labels = fields.List( fields.String, missing=None, description="Labels for the dataset", @@ -25,6 +54,27 @@ class BaseDatasetSchema(Schema): class BaseModelSchema(Schema): + """ + Schema for configuring a base model. + + :param num_classes: Number of classes for the model. Default is 2. + :type num_classes: int, optional + + :param use_cuda: Flag indicating whether to use CUDA if available. Default is True. + :type use_cuda: bool, optional + + :param metrics: Metrics to calculate during training and evaluation. Default is ['f1_score']. + :type metrics: List[str], optional + + :param weights: Class weights to apply for the loss function. Default is None. + :type weights: List[float], optional + + :param rank: Rank value for distributed data processing. Default is 0. + :type rank: int, optional + + :param use_ddp: Flag indicating whether to turn on distributed data processing. Default is False. + :type use_ddp: bool, optional + """ num_classes = fields.Int(missing=2, description="Number of classes", example=2) use_cuda = fields.Bool(missing=True, description="Whether to use CUDA if possible") metrics = fields.List( @@ -49,6 +99,28 @@ class BaseModelSchema(Schema): class BaseClassifierSchema(BaseModelSchema): + """ + Schema for configuring a base classifier. + + :param learning_rate: Learning rate used in training. Default is 0.01. + :type learning_rate: float, optional + + :param weight_decay: Weight decay used in training. Default is 0.0. + :type weight_decay: float, optional + + :param pretrained: Flag indicating whether to use a pretrained model. Default is True. + :type pretrained: bool, optional + + :param local_model_path: Local path of the pretrained model. Default is None. + :type local_model_path: str, optional + + :param threshold: Prediction threshold if needed. Default is 0.5. + :type threshold: float, optional + + :param freeze: Flag indicating whether to freeze all layers except for the classifier layer(s). Default is False. + :type freeze: bool, optional + """ + learning_rate = fields.Float( missing=0.01, description="Learning rate used in training.", example=0.01 ) @@ -71,6 +143,14 @@ class BaseClassifierSchema(BaseModelSchema): class BaseSegmentationClassifierSchema(BaseClassifierSchema): + """ + Schema for configuring a base segmentation classifier. + + :param metrics: Classes of metrics you want to calculate during training and evaluation. + Default is ['iou', 'f1_score', 'accuracy']. + :type metrics: List[str], optional + """ + metrics = fields.List( fields.String, missing=["iou", "f1_score", "accuracy"], @@ -80,6 +160,20 @@ class BaseSegmentationClassifierSchema(BaseClassifierSchema): class BaseObjectDetectionSchema(BaseClassifierSchema): + """ + Schema for configuring a base object detection model. + + :param metrics: Classes of metrics you want to calculate during training and evaluation. + Default is ['map']. + :type metrics: List[str], optional + + :param step_size: Step size for the learning rate scheduler. Default is 15. + :type step_size: int, optional + + :param gamma: Gamma (multiplier) for the learning rate scheduler. Default is 0.1. + :type gamma: float, optional + """ + metrics = fields.List( fields.String, missing=["map"], diff --git a/aitlas/datasets/schemas.py b/aitlas/datasets/schemas.py index cdf4e68..4ca6b24 100644 --- a/aitlas/datasets/schemas.py +++ b/aitlas/datasets/schemas.py @@ -4,6 +4,9 @@ class MatDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring a classification dataset given as mat file. + """ mat_file = fields.String( missing=None, description="mat file on disk", example="./data/dataset.mat", ) @@ -21,6 +24,9 @@ class MatDatasetSchema(BaseDatasetSchema): class NPZDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring a classification dataset given as npz file. + """ npz_file = fields.String( missing=None, description="npz file on disk", example="./data/dataset.npz", ) @@ -38,6 +44,9 @@ class NPZDatasetSchema(BaseDatasetSchema): class ClassificationDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring a classification dataset. + """ data_dir = fields.String( missing="/", description="Dataset path on disk", example="./data/BigEarthNet/" ) @@ -47,6 +56,9 @@ class ClassificationDatasetSchema(BaseDatasetSchema): class SegmentationDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring a segmentation dataset. + """ data_dir = fields.String( missing="/", description="Dataset path on disk", example="./data/BigEarthNet/" ) @@ -56,6 +68,9 @@ class SegmentationDatasetSchema(BaseDatasetSchema): class ObjectDetectionPascalDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring an object detection dataset given in PASCAL VOC format. + """ imageset_file = fields.String( missing="/", description="File with the image ids in the set", @@ -72,6 +87,9 @@ class ObjectDetectionPascalDatasetSchema(BaseDatasetSchema): class ObjectDetectionCocoDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring an object detection dataset given in COCO format. + """ data_dir = fields.String( missing="/", description="Dataset path on disk", example="./data/DIOR/" ) @@ -86,6 +104,9 @@ class ObjectDetectionCocoDatasetSchema(BaseDatasetSchema): class BigEarthNetSchema(BaseDatasetSchema): + """ + Schema for configuring the BigEarthNet dataset. + """ csv_file = fields.String( missing=None, description="CSV file on disk", example="./data/train.csv" ) @@ -119,6 +140,9 @@ class BigEarthNetSchema(BaseDatasetSchema): class SpaceNet6DatasetSchema(BaseDatasetSchema): + """ + Schema for configuring the SpaceNet6 dataset. + """ orients = fields.String( required=False, example="path/to/data/train/AOI_11_Roterdam/SummaryData/SAR_orientations.csv", @@ -211,6 +235,9 @@ class SpaceNet6DatasetSchema(BaseDatasetSchema): class BreizhCropsSchema(BaseDatasetSchema): + """ + Schema for configuring the BreizhCrops dataset for crop type prediction. + """ regions = fields.List( fields.String, required=True, @@ -242,6 +269,9 @@ class BreizhCropsSchema(BaseDatasetSchema): class CropsDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring dataset for crop type prediction. + """ csv_file_path = fields.String( missing=None, description="CSV file on disk", example="./data/train.csv" ) @@ -264,6 +294,9 @@ class CropsDatasetSchema(BaseDatasetSchema): class So2SatDatasetSchema(BaseDatasetSchema): + """ + Schema for configuring the So2Sat dataset. + """ h5_file = fields.String( required=True, description="H5 file on disk", example="./data/train.h5" ) diff --git a/aitlas/models/schemas.py b/aitlas/models/schemas.py index 53d632e..f97a190 100644 --- a/aitlas/models/schemas.py +++ b/aitlas/models/schemas.py @@ -6,6 +6,9 @@ class TransformerModelSchema(BaseClassifierSchema): + """ + Schema for configuring a transformer model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -34,6 +37,9 @@ class TransformerModelSchema(BaseClassifierSchema): class InceptionTimeSchema(BaseClassifierSchema): + """ + Schema for configuring a InceptionTime model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -54,6 +60,9 @@ class InceptionTimeSchema(BaseClassifierSchema): class LSTMSchema(BaseClassifierSchema): + """ + Schema for configuring a LSTM model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -80,6 +89,9 @@ class LSTMSchema(BaseClassifierSchema): class MSResNetSchema(BaseClassifierSchema): + """ + Schema for configuring a MSResNet model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -100,6 +112,9 @@ class MSResNetSchema(BaseClassifierSchema): class TempCNNSchema(BaseClassifierSchema): + """ + Schema for configuring a TempCNN model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -124,6 +139,9 @@ class TempCNNSchema(BaseClassifierSchema): class StarRNNSchema(BaseClassifierSchema): + """ + Schema for configuring a StarRNN model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", @@ -150,6 +168,9 @@ class StarRNNSchema(BaseClassifierSchema): class OmniScaleCNNSchema(BaseClassifierSchema): + """ + Schema for configuring a OmniScaleCNN model. + """ input_dim = fields.Int( required=True, description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia", diff --git a/aitlas/tasks/schemas.py b/aitlas/tasks/schemas.py index 39157d0..ff66d9b 100644 --- a/aitlas/tasks/schemas.py +++ b/aitlas/tasks/schemas.py @@ -4,6 +4,15 @@ class BaseTaskShema(Schema): + """ + Schema for configuring a base task. + + :param log: Flag indicating whether to turn on logging. Default is True. + :type log: bool, optional + + :param id: Run name/ID for the task. Default is None. + :type id: str, optional + """ log = fields.Boolean(required=False, missing=True, description="Turn on logging") id = fields.String( required=False, @@ -14,6 +23,15 @@ class BaseTaskShema(Schema): class SplitSetObjectSchema(Schema): + """ + Schema for configuring a split dataset object. + + :param ratio: Ratio of the dataset to include in the split. This is required. + :type ratio: int + + :param file: File containing the indices for the split. This is required. + :type file: str + """ ratio = fields.Int(required=True, description="Ratio of dataset", example=60) file = fields.String( required=True, description="File indices", example="./data/indices.csv" @@ -27,6 +45,18 @@ class SplitObjectSchema(Schema): class SplitTaskSchema(BaseTaskShema): + """ + Schema for configuring a split task. + + :param data_dir: Path to the dataset on disk. This is required. + :type data_dir: str + + :param csv_file: CSV file on disk containing dataset information. Default is None. + :type csv_file: str, optional + + :param split: Configuration on how to split the dataset. Default is None. + :type split: SplitObjectSchema, optional + """ data_dir = fields.String( required=True, description="Dataset path on disk", @@ -43,6 +73,27 @@ class SplitTaskSchema(BaseTaskShema): class TrainTaskSchema(BaseTaskShema): + """ + Schema for configuring a training task. + + :param dataset_config: Train dataset type and configuration. This is required. + :type dataset_config: ObjectConfig + + :param epochs: Number of epochs used in training. This is required. + :type epochs: int + + :param model_directory: Directory of the model output. This is required. + :type model_directory: str + + :param save_epochs: Number of training steps between model checkpoints. Default is 100. + :type save_epochs: int, optional + + :param iterations_log: After how many mini-batches do we want to show something in the log. Default is 200. + :type iterations_log: int, optional + + :param resume_model: File path to the model to be resumed. Default is None. + :type resume_model: str, optional + """ dataset_config = fields.Nested( nested=ObjectConfig, required=True, @@ -71,6 +122,30 @@ class TrainTaskSchema(BaseTaskShema): class TrainAndEvaluateTaskSchema(BaseTaskShema): + """ + Schema for configuring a task that involves training and evaluation. + + :param epochs: Number of epochs used in training. This is required. + :type epochs: int + + :param model_directory: Directory of the model output. This is required. + :type model_directory: str + + :param save_epochs: Number of training steps between model checkpoints. Default is 100. + :type save_epochs: int, optional + + :param iterations_log: After how many mini-batches do we want to show something in the log. Default is 200. + :type iterations_log: int, optional + + :param resume_model: File path to the model to be resumed. Default is None. + :type resume_model: str, optional + + :param train_dataset_config: Train dataset type and configuration. This is required. + :type train_dataset_config: ObjectConfig + + :param val_dataset_config: Validation dataset type and configuration. This is required. + :type val_dataset_config: ObjectConfig + """ epochs = fields.Int( required=True, description="Number of epochs used in training", example=50 ) @@ -113,6 +188,9 @@ class ParameterSchema(Schema): class OptimizeTaskSchema(BaseTaskShema): + """ + Schema for configuring an optimization task. + """ epochs = fields.Int( required=True, description="Number of epochs used in training", example=50 ) @@ -146,6 +224,21 @@ class OptimizeTaskSchema(BaseTaskShema): class EvaluateTaskSchema(BaseTaskShema): + """ + Schema for configuring an evaluation task. + + :param dataset_config: Dataset type and configuration. This is required. + :type dataset_config: ObjectConfig + + :param model_path: Path to the model. This is required. + :type model_path: str + + :param metrics: Metric classes you want to calculate. Default is an empty list. + :type metrics: List[str], optional + + :param visualizations: Visualization classes you want to show. Default is an empty list. + :type visualizations: List[str], optional + """ dataset_config = fields.Nested( nested=ObjectConfig, required=True, @@ -171,6 +264,37 @@ class EvaluateTaskSchema(BaseTaskShema): class PredictTaskSchema(BaseTaskShema): + """ + Schema for configuring a prediction task. + + :param data_dir: Directory with the image to perform prediction on. This is required. + :type data_dir: str + + :param model_path: Path to the model. This is required. + :type model_path: str + + :param output_dir: Folder path where the plot images with predictions will be stored. Default is '/predictions'. + :type output_dir: str, optional + + :param output_file: CSV file path where the predictions will be stored. Default is 'predictions.csv'. + :type output_file: str, optional + + :param dataset_config: Dataset type and configuration. Default is None. + :type dataset_config: ObjectConfig, optional + + :param batch_size: Batch size. Default is 64. + :type batch_size: int, optional + + :param labels: Labels needed to tag the predictions. Default is None. + :type labels: List[str], optional + + :param transforms: Classes to run transformations. Default is a list of common torchvision transformations. + :type transforms: List[str], optional + + :param output_format: Whether to output the predictions to CSV or plots. Default is 'plot'. + Must be one of ['plot', 'csv', 'image']. + :type output_format: str, optional + """ data_dir = fields.String( required=True, description="Directory with the image to perform prediction on", @@ -226,6 +350,9 @@ class PrepareTaskSchema(BaseTaskShema): class ExtractFeaturesTaskSchema(BaseTaskShema): + """ + Schema for configuring a task to extract features from images. + """ data_dir = fields.String( required=True, description="Directory with images to extract features from",