From bfc3bdb9be66eba17be3bb946e7afdd2143fc7f6 Mon Sep 17 00:00:00 2001
From: Ivica Dimitrovski <ivica.dimitrovski@yahoo.com>
Date: Mon, 3 Jul 2023 10:43:07 +0200
Subject: [PATCH] docs for configs and schemas

---
 aitlas/base/datasets.py    |  17 +++--
 aitlas/base/schemas.py     | 100 ++++++++++++++++++++++++++++-
 aitlas/datasets/schemas.py |  33 ++++++++++
 aitlas/models/schemas.py   |  21 ++++++
 aitlas/tasks/schemas.py    | 127 +++++++++++++++++++++++++++++++++++++
 5 files changed, 290 insertions(+), 8 deletions(-)

diff --git a/aitlas/base/datasets.py b/aitlas/base/datasets.py
index 894a08c..241fc0f 100644
--- a/aitlas/base/datasets.py
+++ b/aitlas/base/datasets.py
@@ -13,6 +13,12 @@ class BaseDataset(Dataset, Configurable):
     name = None
 
     def __init__(self, config):
+        """BaseDataset constructor
+        :param config: Configuration object which specifies the details of the dataset.
+        :type config: Config, contains information for the batch size, number of workers, list of labels,
+        list of transformations
+        """
+
         Dataset.__init__(self)
         Configurable.__init__(self, config)
 
@@ -53,6 +59,7 @@ def prepare(self):
         return True
 
     def dataloader(self):
+        """Create and return a dataloader for the dataset"""
         return torch.utils.data.DataLoader(
             self,
             batch_size=self.batch_size,
@@ -69,31 +76,31 @@ def get_labels(self):
         )
 
     def show_batch(self, size):
-        """Implement this if you want to return the complete set of labels of the dataset"""
+        """Implement this if you want to return a random batch of images from the dataset"""
         raise NotImplementedError(
             "Please implement the `show_batch` method for your dataset"
         )
 
     def show_samples(self):
-        """Implement this if you want to return the complete set of labels of the dataset"""
+        """Implement this if you want to return a random samples from the dataset"""
         raise NotImplementedError(
             "Please implement the `show_samples` method for your dataset"
         )
 
     def show_image(self, index):
-        """Implement this if you want to return the complete set of labels of the dataset"""
+        """Implement this if you want to return an image with a given index from the dataset"""
         raise NotImplementedError(
             "Please implement the `show_image` method for your dataset"
         )
 
     def data_distribution_table(self):
-        """Implement this if you want to return the complete set of labels of the dataset"""
+        """Implement this if you want to return the label distribution of the dataset"""
         raise NotImplementedError(
             "Please implement the `data_distribution_table` method for your dataset"
         )
 
     def data_distribution_barchart(self):
-        """Implement this if you want to return the complete set of labels of the dataset"""
+        """Implement this if you want to return the label distribution of the dataset as a barchart"""
         raise NotImplementedError(
             "Please implement the `data_distribution_barchart` method for your dataset"
         )
diff --git a/aitlas/base/schemas.py b/aitlas/base/schemas.py
index 4676b10..457c53c 100644
--- a/aitlas/base/schemas.py
+++ b/aitlas/base/schemas.py
@@ -2,6 +2,35 @@
 
 
 class BaseDatasetSchema(Schema):
+
+    """
+    Schema for configuring a base dataset.
+
+    :param batch_size: Batch size for the dataset. Default is 64.
+    :type batch_size: int, optional
+
+    :param shuffle: Flag indicating whether to shuffle the dataset. Default is True.
+    :type shuffle: bool, optional
+
+    :param num_workers: Number of workers to use for data loading. Default is 4.
+    :type num_workers: int, optional
+
+    :param pin_memory: Flag indicating whether to use page-locked memory. Default is False.
+    :type pin_memory: bool, optional
+
+    :param transforms: Classes to run transformations over the input data.
+    :type transforms: List[str], optional
+
+    :param target_transforms: Classes to run transformations over the target data.
+    :type target_transforms: List[str], optional
+
+    :param joint_transforms: Classes to run transformations over the input and target data.
+    :type joint_transforms: List[str], optional
+
+    :param labels: Labels for the dataset.
+    :type labels: List[str], optional
+    """
+
     batch_size = fields.Int(missing=64, description="Batch size", example=64)
     shuffle = fields.Bool(
         missing=True, description="Should shuffle dataset", example=False
@@ -11,13 +40,13 @@ class BaseDatasetSchema(Schema):
         missing=False, description="Whether to use page-locked memory"
     )
     transforms = fields.List(
-        fields.String, missing=None, description="Classes to run transformations.",
+        fields.String, missing=None, description="Classes to run transformations over the input data.",
     )
     target_transforms = fields.List(
-        fields.String, missing=None, description="Classes to run transformations.",
+        fields.String, missing=None, description="Classes to run transformations over the target data.",
     )
     joint_transforms = fields.List(
-        fields.String, missing=None, description="Classes to run transformations.",
+        fields.String, missing=None, description="Classes to run transformations over the input and target data.",
     )
     labels = fields.List(
         fields.String, missing=None, description="Labels for the dataset",
@@ -25,6 +54,27 @@ class BaseDatasetSchema(Schema):
 
 
 class BaseModelSchema(Schema):
+    """
+    Schema for configuring a base model.
+
+    :param num_classes: Number of classes for the model. Default is 2.
+    :type num_classes: int, optional
+
+    :param use_cuda: Flag indicating whether to use CUDA if available. Default is True.
+    :type use_cuda: bool, optional
+
+    :param metrics: Metrics to calculate during training and evaluation. Default is ['f1_score'].
+    :type metrics: List[str], optional
+
+    :param weights: Class weights to apply for the loss function. Default is None.
+    :type weights: List[float], optional
+
+    :param rank: Rank value for distributed data processing. Default is 0.
+    :type rank: int, optional
+
+    :param use_ddp: Flag indicating whether to turn on distributed data processing. Default is False.
+    :type use_ddp: bool, optional
+    """
     num_classes = fields.Int(missing=2, description="Number of classes", example=2)
     use_cuda = fields.Bool(missing=True, description="Whether to use CUDA if possible")
     metrics = fields.List(
@@ -49,6 +99,28 @@ class BaseModelSchema(Schema):
 
 
 class BaseClassifierSchema(BaseModelSchema):
+    """
+    Schema for configuring a base classifier.
+
+    :param learning_rate: Learning rate used in training. Default is 0.01.
+    :type learning_rate: float, optional
+
+    :param weight_decay: Weight decay used in training. Default is 0.0.
+    :type weight_decay: float, optional
+
+    :param pretrained: Flag indicating whether to use a pretrained model. Default is True.
+    :type pretrained: bool, optional
+
+    :param local_model_path: Local path of the pretrained model. Default is None.
+    :type local_model_path: str, optional
+
+    :param threshold: Prediction threshold if needed. Default is 0.5.
+    :type threshold: float, optional
+
+    :param freeze: Flag indicating whether to freeze all layers except for the classifier layer(s). Default is False.
+    :type freeze: bool, optional
+    """
+
     learning_rate = fields.Float(
         missing=0.01, description="Learning rate used in training.", example=0.01
     )
@@ -71,6 +143,14 @@ class BaseClassifierSchema(BaseModelSchema):
 
 
 class BaseSegmentationClassifierSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a base segmentation classifier.
+
+    :param metrics: Classes of metrics you want to calculate during training and evaluation.
+        Default is ['iou', 'f1_score', 'accuracy'].
+    :type metrics: List[str], optional
+    """
+
     metrics = fields.List(
         fields.String,
         missing=["iou", "f1_score", "accuracy"],
@@ -80,6 +160,20 @@ class BaseSegmentationClassifierSchema(BaseClassifierSchema):
 
 
 class BaseObjectDetectionSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a base object detection model.
+
+    :param metrics: Classes of metrics you want to calculate during training and evaluation.
+        Default is ['map'].
+    :type metrics: List[str], optional
+
+    :param step_size: Step size for the learning rate scheduler. Default is 15.
+    :type step_size: int, optional
+
+    :param gamma: Gamma (multiplier) for the learning rate scheduler. Default is 0.1.
+    :type gamma: float, optional
+    """
+
     metrics = fields.List(
         fields.String,
         missing=["map"],
diff --git a/aitlas/datasets/schemas.py b/aitlas/datasets/schemas.py
index cdf4e68..4ca6b24 100644
--- a/aitlas/datasets/schemas.py
+++ b/aitlas/datasets/schemas.py
@@ -4,6 +4,9 @@
 
 
 class MatDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring a classification dataset given as mat file.
+    """
     mat_file = fields.String(
         missing=None, description="mat file on disk", example="./data/dataset.mat",
     )
@@ -21,6 +24,9 @@ class MatDatasetSchema(BaseDatasetSchema):
 
 
 class NPZDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring a classification dataset given as npz file.
+    """
     npz_file = fields.String(
         missing=None, description="npz file on disk", example="./data/dataset.npz",
     )
@@ -38,6 +44,9 @@ class NPZDatasetSchema(BaseDatasetSchema):
 
 
 class ClassificationDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring a classification dataset.
+    """
     data_dir = fields.String(
         missing="/", description="Dataset path on disk", example="./data/BigEarthNet/"
     )
@@ -47,6 +56,9 @@ class ClassificationDatasetSchema(BaseDatasetSchema):
 
 
 class SegmentationDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring a segmentation dataset.
+    """
     data_dir = fields.String(
         missing="/", description="Dataset path on disk", example="./data/BigEarthNet/"
     )
@@ -56,6 +68,9 @@ class SegmentationDatasetSchema(BaseDatasetSchema):
 
 
 class ObjectDetectionPascalDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring an object detection dataset given in PASCAL VOC format.
+    """
     imageset_file = fields.String(
         missing="/",
         description="File with the image ids in the set",
@@ -72,6 +87,9 @@ class ObjectDetectionPascalDatasetSchema(BaseDatasetSchema):
 
 
 class ObjectDetectionCocoDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring an object detection dataset given in COCO format.
+    """
     data_dir = fields.String(
         missing="/", description="Dataset path on disk", example="./data/DIOR/"
     )
@@ -86,6 +104,9 @@ class ObjectDetectionCocoDatasetSchema(BaseDatasetSchema):
 
 
 class BigEarthNetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring the BigEarthNet dataset.
+    """
     csv_file = fields.String(
         missing=None, description="CSV file on disk", example="./data/train.csv"
     )
@@ -119,6 +140,9 @@ class BigEarthNetSchema(BaseDatasetSchema):
 
 
 class SpaceNet6DatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring the SpaceNet6 dataset.
+    """
     orients = fields.String(
         required=False,
         example="path/to/data/train/AOI_11_Roterdam/SummaryData/SAR_orientations.csv",
@@ -211,6 +235,9 @@ class SpaceNet6DatasetSchema(BaseDatasetSchema):
 
 
 class BreizhCropsSchema(BaseDatasetSchema):
+    """
+    Schema for configuring the BreizhCrops dataset for crop type prediction.
+    """
     regions = fields.List(
         fields.String,
         required=True,
@@ -242,6 +269,9 @@ class BreizhCropsSchema(BaseDatasetSchema):
 
 
 class CropsDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring dataset for crop type prediction.
+    """
     csv_file_path = fields.String(
         missing=None, description="CSV file on disk", example="./data/train.csv"
     )
@@ -264,6 +294,9 @@ class CropsDatasetSchema(BaseDatasetSchema):
 
 
 class So2SatDatasetSchema(BaseDatasetSchema):
+    """
+    Schema for configuring the So2Sat dataset.
+    """
     h5_file = fields.String(
         required=True, description="H5 file on disk", example="./data/train.h5"
     )
diff --git a/aitlas/models/schemas.py b/aitlas/models/schemas.py
index 53d632e..f97a190 100644
--- a/aitlas/models/schemas.py
+++ b/aitlas/models/schemas.py
@@ -6,6 +6,9 @@
 
 
 class TransformerModelSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a transformer model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -34,6 +37,9 @@ class TransformerModelSchema(BaseClassifierSchema):
 
 
 class InceptionTimeSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a InceptionTime model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -54,6 +60,9 @@ class InceptionTimeSchema(BaseClassifierSchema):
 
 
 class LSTMSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a LSTM model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -80,6 +89,9 @@ class LSTMSchema(BaseClassifierSchema):
 
 
 class MSResNetSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a MSResNet model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -100,6 +112,9 @@ class MSResNetSchema(BaseClassifierSchema):
 
 
 class TempCNNSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a TempCNN model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -124,6 +139,9 @@ class TempCNNSchema(BaseClassifierSchema):
 
 
 class StarRNNSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a StarRNN model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
@@ -150,6 +168,9 @@ class StarRNNSchema(BaseClassifierSchema):
 
 
 class OmniScaleCNNSchema(BaseClassifierSchema):
+    """
+    Schema for configuring a OmniScaleCNN model.
+    """
     input_dim = fields.Int(
         required=True,
         description="Number of bands (13 for L1C, 10 for L2A), 11 for eopatch slovenia",
diff --git a/aitlas/tasks/schemas.py b/aitlas/tasks/schemas.py
index 39157d0..ff66d9b 100644
--- a/aitlas/tasks/schemas.py
+++ b/aitlas/tasks/schemas.py
@@ -4,6 +4,15 @@
 
 
 class BaseTaskShema(Schema):
+    """
+    Schema for configuring a base task.
+
+    :param log: Flag indicating whether to turn on logging. Default is True.
+    :type log: bool, optional
+
+    :param id: Run name/ID for the task. Default is None.
+    :type id: str, optional
+    """
     log = fields.Boolean(required=False, missing=True, description="Turn on logging")
     id = fields.String(
         required=False,
@@ -14,6 +23,15 @@ class BaseTaskShema(Schema):
 
 
 class SplitSetObjectSchema(Schema):
+    """
+    Schema for configuring a split dataset object.
+
+    :param ratio: Ratio of the dataset to include in the split. This is required.
+    :type ratio: int
+
+    :param file: File containing the indices for the split. This is required.
+    :type file: str
+    """
     ratio = fields.Int(required=True, description="Ratio of dataset", example=60)
     file = fields.String(
         required=True, description="File indices", example="./data/indices.csv"
@@ -27,6 +45,18 @@ class SplitObjectSchema(Schema):
 
 
 class SplitTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring a split task.
+
+    :param data_dir: Path to the dataset on disk. This is required.
+    :type data_dir: str
+
+    :param csv_file: CSV file on disk containing dataset information. Default is None.
+    :type csv_file: str, optional
+
+    :param split: Configuration on how to split the dataset. Default is None.
+    :type split: SplitObjectSchema, optional
+    """
     data_dir = fields.String(
         required=True,
         description="Dataset path on disk",
@@ -43,6 +73,27 @@ class SplitTaskSchema(BaseTaskShema):
 
 
 class TrainTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring a training task.
+
+    :param dataset_config: Train dataset type and configuration. This is required.
+    :type dataset_config: ObjectConfig
+
+    :param epochs: Number of epochs used in training. This is required.
+    :type epochs: int
+
+    :param model_directory: Directory of the model output. This is required.
+    :type model_directory: str
+
+    :param save_epochs: Number of training steps between model checkpoints. Default is 100.
+    :type save_epochs: int, optional
+
+    :param iterations_log: After how many mini-batches do we want to show something in the log. Default is 200.
+    :type iterations_log: int, optional
+
+    :param resume_model: File path to the model to be resumed. Default is None.
+    :type resume_model: str, optional
+    """
     dataset_config = fields.Nested(
         nested=ObjectConfig,
         required=True,
@@ -71,6 +122,30 @@ class TrainTaskSchema(BaseTaskShema):
 
 
 class TrainAndEvaluateTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring a task that involves training and evaluation.
+
+    :param epochs: Number of epochs used in training. This is required.
+    :type epochs: int
+
+    :param model_directory: Directory of the model output. This is required.
+    :type model_directory: str
+
+    :param save_epochs: Number of training steps between model checkpoints. Default is 100.
+    :type save_epochs: int, optional
+
+    :param iterations_log: After how many mini-batches do we want to show something in the log. Default is 200.
+    :type iterations_log: int, optional
+
+    :param resume_model: File path to the model to be resumed. Default is None.
+    :type resume_model: str, optional
+
+    :param train_dataset_config: Train dataset type and configuration. This is required.
+    :type train_dataset_config: ObjectConfig
+
+    :param val_dataset_config: Validation dataset type and configuration. This is required.
+    :type val_dataset_config: ObjectConfig
+    """
     epochs = fields.Int(
         required=True, description="Number of epochs used in training", example=50
     )
@@ -113,6 +188,9 @@ class ParameterSchema(Schema):
 
 
 class OptimizeTaskSchema(BaseTaskShema):
+    """
+        Schema for configuring an optimization task.
+    """
     epochs = fields.Int(
         required=True, description="Number of epochs used in training", example=50
     )
@@ -146,6 +224,21 @@ class OptimizeTaskSchema(BaseTaskShema):
 
 
 class EvaluateTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring an evaluation task.
+
+    :param dataset_config: Dataset type and configuration. This is required.
+    :type dataset_config: ObjectConfig
+
+    :param model_path: Path to the model. This is required.
+    :type model_path: str
+
+    :param metrics: Metric classes you want to calculate. Default is an empty list.
+    :type metrics: List[str], optional
+
+    :param visualizations: Visualization classes you want to show. Default is an empty list.
+    :type visualizations: List[str], optional
+    """
     dataset_config = fields.Nested(
         nested=ObjectConfig,
         required=True,
@@ -171,6 +264,37 @@ class EvaluateTaskSchema(BaseTaskShema):
 
 
 class PredictTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring a prediction task.
+
+    :param data_dir: Directory with the image to perform prediction on. This is required.
+    :type data_dir: str
+
+    :param model_path: Path to the model. This is required.
+    :type model_path: str
+
+    :param output_dir: Folder path where the plot images with predictions will be stored. Default is '/predictions'.
+    :type output_dir: str, optional
+
+    :param output_file: CSV file path where the predictions will be stored. Default is 'predictions.csv'.
+    :type output_file: str, optional
+
+    :param dataset_config: Dataset type and configuration. Default is None.
+    :type dataset_config: ObjectConfig, optional
+
+    :param batch_size: Batch size. Default is 64.
+    :type batch_size: int, optional
+
+    :param labels: Labels needed to tag the predictions. Default is None.
+    :type labels: List[str], optional
+
+    :param transforms: Classes to run transformations. Default is a list of common torchvision transformations.
+    :type transforms: List[str], optional
+
+    :param output_format: Whether to output the predictions to CSV or plots. Default is 'plot'.
+                          Must be one of ['plot', 'csv', 'image'].
+    :type output_format: str, optional
+    """
     data_dir = fields.String(
         required=True,
         description="Directory with the image to perform prediction on",
@@ -226,6 +350,9 @@ class PrepareTaskSchema(BaseTaskShema):
 
 
 class ExtractFeaturesTaskSchema(BaseTaskShema):
+    """
+    Schema for configuring a task to extract features from images.
+    """
     data_dir = fields.String(
         required=True,
         description="Directory with images to extract features from",