diff --git a/BiSTNet-NTIRE2023/.gitignore b/BiSTNet-NTIRE2023/.gitignore
new file mode 100644
index 0000000..a29546e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/.gitignore
@@ -0,0 +1,14 @@
+data
+sample_videos
+runs
+checkpoints
+*.mp4
+*.pyc
+tmp
+tmp_train
+tmp_test
+tmp_seg
+log
+result
+pretrain
+mmediting
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/LICENSE b/BiSTNet-NTIRE2023/LICENSE
new file mode 100644
index 0000000..af5d721
--- /dev/null
+++ b/BiSTNet-NTIRE2023/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 zhangmozhe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/BiSTNet-NTIRE2023/environment.yaml b/BiSTNet-NTIRE2023/environment.yaml
new file mode 100644
index 0000000..4c63aec
--- /dev/null
+++ b/BiSTNet-NTIRE2023/environment.yaml
@@ -0,0 +1,114 @@
+name: bistnet
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2023.01.10=h06a4308_0
+  - certifi=2021.5.30=py36h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.4=h6a678d5_0
+  - openssl=1.1.1t=h7f8727e_0
+  - python=3.6.13=h12debd9_1
+  - readline=8.2=h5eee18b_0
+  - sqlite=3.40.1=h5082296_0
+  - tk=8.6.12=h1ccaba5_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.10=h5eee18b_1
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+      - absl-py==1.0.0
+      - addict==2.4.0
+      - av==8.0.3
+      - cachetools==4.2.4
+      - charset-normalizer==2.0.12
+      - click==8.0.4
+      - colorama==0.4.5
+      - commonmark==0.9.1
+      - cycler==0.11.0
+      - dataclasses==0.8
+      - decorator==4.4.2
+      - easydict==1.9
+      - einops==0.4.1
+      - facexlib==0.2.5
+      - filterpy==1.4.5
+      - future==0.18.2
+      - google-auth==2.6.0
+      - google-auth-oauthlib==0.4.6
+      - grpcio==1.44.0
+      - idna==3.3
+      - imageio==2.15.0
+      - imageio-ffmpeg==0.4.7
+      - importlib-metadata==4.8.3
+      - importlib-resources==5.4.0
+      - joblib==1.1.1
+      - kiwisolver==1.3.1
+      - llvmlite==0.36.0
+      - lmdb==1.3.0
+      - logger==1.4
+      - lpips==0.1.4
+      - markdown==3.3.6
+      - matplotlib==3.3.4
+      - mmcv-full==1.7.1
+      - model-index==0.1.11
+      - moviepy==1.0.3
+      - msgpack==1.0.4
+      - networkx==2.5.1
+      - numba==0.53.1
+      - numpy==1.19.5
+      - oauthlib==3.2.0
+      - opencv-contrib-python==4.5.3.56
+      - openmim==0.3.6
+      - ordered-set==4.0.2
+      - packaging==21.3
+      - pandas==1.1.5
+      - pillow==8.4.0
+      - pip==21.3.1
+      - prefetch-generator==1.0.1
+      - proglog==0.1.10
+      - protobuf==3.19.4
+      - pyarrow==6.0.1
+      - pyasn1==0.4.8
+      - pyasn1-modules==0.2.8
+      - pygments==2.14.0
+      - pyparsing==3.0.7
+      - pypng==0.0.21
+      - python-dateutil==2.8.2
+      - python-graphviz==0.19.1
+      - pytz==2022.1
+      - pywavelets==1.1.1
+      - pyyaml==6.0
+      - requests==2.27.1
+      - requests-oauthlib==1.3.1
+      - rich==12.6.0
+      - rsa==4.8
+      - scikit-image==0.17.2
+      - scikit-learn==0.24.2
+      - scipy==1.5.4
+      - setuptools==59.6.0
+      - six==1.16.0
+      - sklearn==0.0.post1
+      - tabulate==0.8.10
+      - tensorboard==2.8.0
+      - tensorboard-data-server==0.6.1
+      - tensorboard-plugin-wit==1.8.1
+      - threadpoolctl==3.1.0
+      - tifffile==2020.9.3
+      - timm==0.6.7
+      - torch==1.10.0+cu113
+      - torchaudio==0.10.0+cu113
+      - torchcontrib==0.0.2
+      - torchvision==0.11.1+cu113
+      - tqdm==4.63.0
+      - typing-extensions==4.1.1
+      - urllib3==1.26.8
+      - werkzeug==2.0.3
+      - wget==3.2
+      - yacs==0.1.8
+      - yapf==0.32.0
+      - zipp==3.6.0
+prefix: /data2/yangyixin/anaconda3/envs/bistnet
diff --git a/BiSTNet-NTIRE2023/lib/.vscode/settings.json b/BiSTNet-NTIRE2023/lib/.vscode/settings.json
new file mode 100644
index 0000000..2b7e46d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.formatting.provider": "yapf"
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/lib/FeatVGG.py b/BiSTNet-NTIRE2023/lib/FeatVGG.py
new file mode 100644
index 0000000..32e33fe
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/FeatVGG.py
@@ -0,0 +1,65 @@
+import torch.nn as nn
+import utils.vgg_util as vgg_util
+from torchvision import models
+
+
+def conv_to_relu(layer_names):
+    out_layernames = []
+    for name in layer_names:
+        if name.startswith("conv"):
+            out_layernames.append("relu" + name[4:])
+        else:
+            out_layernames.append(name)
+    return out_layernames
+
+
+class FeatVGG(nn.Module):
+    def __init__(self, content_layers=["relu3_1"]):
+        super(FeatVGG, self).__init__()
+        self.content_layers = conv_to_relu(content_layers)
+        self.vgg19 = vgg_util.get_renamed_vgg()
+        self.last_c_layer = self.content_layers[-1]
+        is_last_content = False
+        replace_layers, del_layers = [], []
+        for name, mod in self.vgg19.named_children():
+            if is_last_content:
+                del_layers.append(name)
+            else:
+                if name == self.last_c_layer:
+                    is_last_content = True
+
+        for name in del_layers:
+            delattr(self.vgg19, name)
+
+        # no need for gradweight vgg19
+        for param in self.vgg19.parameters():
+            param.requires_grad = False
+
+    def forward(self, input_img):
+        # input image is BGR image
+        # each channel ranges in [0,255]
+        # should be normalized with mean = [0.406*255, 0.456*255, 0.485*255] = [103,116,123]
+        # out = {}
+        return self.vgg19(input_img)
+
+
+class VGGNet_multilayer(nn.Module):
+    def __init__(self):
+        """Select conv1_1 ~ conv5_1 activation maps."""
+        super(VGGNet_multilayer, self).__init__()
+        self.select = ["0", "5", "10", "19", "28"]
+        self.vgg = models.vgg19(pretrained=True).features
+
+    def forward(self, x):
+        """Extract multiple convolutional feature maps.
+        x: rgb image
+        ranges in [0,1]
+        should be normalzied with mean = [0.485, 0.456, 0.406]
+        and variance = [0.229, 0.224, 0.225]
+        """
+        features = []
+        for name, layer in self.vgg._modules.items():
+            x = layer(x)
+            if name in self.select:
+                features.append(x)
+        return features
diff --git a/BiSTNet-NTIRE2023/lib/TestTransforms.py b/BiSTNet-NTIRE2023/lib/TestTransforms.py
new file mode 100644
index 0000000..f9ac6f0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/TestTransforms.py
@@ -0,0 +1,353 @@
+from __future__ import division
+
+import collections
+import numbers
+import random
+
+import torch
+from PIL import Image
+from skimage import color
+
+import lib.functional as F
+
+__all__ = [
+    "Compose",
+    "Concatenate",
+    "ToTensor",
+    "Normalize",
+    "Resize",
+    "Scale",
+    "CenterCrop",
+    "Pad",
+    "RandomCrop",
+    "RandomHorizontalFlip",
+    "RandomVerticalFlip",
+    "RandomResizedCrop",
+    "RandomSizedCrop",
+    "FiveCrop",
+    "TenCrop",
+    "RGB2Lab",
+]
+
+
+def CustomFunc(inputs, func, *args, **kwargs):
+    im_l = func(inputs[0], *args, **kwargs)
+    im_ab = func(inputs[1], *args, **kwargs)
+    warp_ba = func(inputs[2], *args, **kwargs)
+    warp_aba = func(inputs[3], *args, **kwargs)
+    # im_gbl_ab  = func(inputs[4], *args, **kwargs)
+    # bgr_mc_im = func(inputs[5], *args, **kwargs)
+    layer_data = [im_l, im_ab, warp_ba, warp_aba]
+    # layer_data = [im_l, im_ab, warp_ba, warp_aba, im_gbl_ab, bgr_mc_im]
+    for l in range(5):
+        layer = inputs[4 + l]
+        err_ba = func(layer[0], *args, **kwargs)
+        err_ab = func(layer[1], *args, **kwargs)
+
+        layer_data.append([err_ba, err_ab])
+
+    return layer_data
+
+
+class Compose(object):
+    """Composes several transforms together.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, inputs):
+        for t in self.transforms:
+            inputs = t(inputs)
+        return inputs
+
+
+class Concatenate(object):
+    """
+    Input: [im_l, im_ab, inputs]
+    inputs = [warp_ba_l, warp_ba_ab, warp_aba, err_pm, err_aba]
+
+    Output:[im_l, err_pm, warp_ba, warp_aba, im_ab, err_aba]
+    """
+
+    def __call__(self, inputs):
+        im_l = inputs[0]
+        im_ab = inputs[1]
+        warp_ba = inputs[2]
+        warp_aba = inputs[3]
+        # im_glb_ab = inputs[4]
+        # bgr_mc_im = inputs[5]
+        # bgr_mc_im = bgr_mc_im[[2, 1, 0], ...]
+
+        err_ba = []
+        err_ab = []
+
+        for l in range(5):
+            layer = inputs[4 + l]
+            err_ba.append(layer[0])
+            err_ab.append(layer[1])
+
+        cerr_ba = torch.cat(err_ba, 0)
+        cerr_ab = torch.cat(err_ab, 0)
+
+        return (im_l, cerr_ba, warp_ba, warp_aba, im_ab, cerr_ab)
+        # return (im_l, cerr_ba, warp_ba, warp_aba, im_glb_ab, bgr_mc_im, im_ab, cerr_ab)
+
+
+class ToTensor(object):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+    Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
+    """
+
+    def __call__(self, inputs):
+        """
+        Args:
+            pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+        Returns:
+            Tensor: Converted image.
+        """
+        inputs = CustomFunc(inputs, F.to_mytensor)
+        return inputs
+
+
+class Normalize(object):
+    """Normalize an tensor image with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+    will normalize each channel of the input ``torch.*Tensor`` i.e.
+    ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+    Args:
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channel.
+    """
+
+    def __call__(self, inputs):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+
+        im_l = F.normalize(inputs[0], 50, 1)  # [0, 100]
+        im_ab = F.normalize(inputs[1], (0, 0), (1, 1))  # [-100, 100]
+
+        inputs[2][0:1, :, :] = F.normalize(inputs[2][0:1, :, :], 50, 1)
+        inputs[2][1:3, :, :] = F.normalize(inputs[2][1:3, :, :], (0, 0), (1, 1))
+        warp_ba = inputs[2]
+
+        inputs[3][0:1, :, :] = F.normalize(inputs[3][0:1, :, :], 50, 1)
+        inputs[3][1:3, :, :] = F.normalize(inputs[3][1:3, :, :], (0, 0), (1, 1))
+        warp_aba = inputs[3]
+
+        # im_gbl_ab = F.normalize(inputs[4], (0, 0), (1, 1))  # [-100, 100]
+        #
+        # bgr_mc_im = F.normalize(inputs[5], (123.68, 116.78, 103.938), (1, 1, 1))
+
+        # layer_data = [im_l, im_ab, warp_ba, warp_aba, im_gbl_ab, bgr_mc_im]
+        layer_data = [im_l, im_ab, warp_ba, warp_aba]
+
+        for l in range(5):
+            layer = inputs[4 + l]
+            err_ba = F.normalize(layer[0], 127, 2)  # [0, 255]
+            err_ab = F.normalize(layer[1], 127, 2)  # [0, 255]
+            layer_data.append([err_ba, err_ab])
+
+        return layer_data
+
+
+class Resize(object):
+    """Resize the input PIL Image to the given size.
+
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+
+    def __call__(self, inputs):
+        """
+        Args:
+            img (PIL Image): Image to be scaled.
+
+        Returns:
+            PIL Image: Rescaled image.
+        """
+        return CustomFunc(inputs, F.resize, self.size, self.interpolation)
+
+
+class RandomCrop(object):
+    """Crop the given PIL Image at a random location.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is 0, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively.
+    """
+
+    def __init__(self, size, padding=0):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+
+    @staticmethod
+    def get_params(img, output_size):
+        """Get parameters for ``crop`` for a random crop.
+
+        Args:
+            img (PIL Image): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+
+        i = random.randint(0, h - th)
+        j = random.randint(0, w - tw)
+        return i, j, th, tw
+
+    def __call__(self, inputs):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+
+        Returns:
+            PIL Image: Cropped image.
+        """
+        if self.padding > 0:
+            inputs = CustomFunc(inputs, F.pad, self.padding)
+
+        i, j, h, w = self.get_params(inputs[0], self.size)
+        return CustomFunc(inputs, F.crop, i, j, h, w)
+
+
+class CenterCrop(object):
+    """Crop the given PIL Image at a random location.
+
+    Args:
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
+        padding (int or sequence, optional): Optional padding on each border
+            of the image. Default is 0, i.e no padding. If a sequence of length
+            4 is provided, it is used to pad left, top, right, bottom borders
+            respectively.
+    """
+
+    def __init__(self, size, padding=0):
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        else:
+            self.size = size
+        self.padding = padding
+
+    @staticmethod
+    def get_params(img, output_size):
+        """Get parameters for ``crop`` for a random crop.
+
+        Args:
+            img (PIL Image): Image to be cropped.
+            output_size (tuple): Expected output size of the crop.
+
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+        """
+        w, h = img.size
+        th, tw = output_size
+        if w == tw and h == th:
+            return 0, 0, h, w
+
+        i = (h - th) // 2
+        j = (w - tw) // 2
+        return i, j, th, tw
+
+    def __call__(self, inputs):
+        """
+        Args:
+            img (PIL Image): Image to be cropped.
+
+        Returns:
+            PIL Image: Cropped image.
+        """
+        if self.padding > 0:
+            inputs = CustomFunc(inputs, F.pad, self.padding)
+
+        if type(inputs) is list:
+            i, j, h, w = self.get_params(inputs[0], self.size)
+        else:
+            i, j, h, w = self.get_params(inputs, self.size)
+        return CustomFunc(inputs, F.crop, i, j, h, w)
+
+
+class RandomHorizontalFlip(object):
+    """Horizontally flip the given PIL Image randomly with a probability of 0.5."""
+
+    def __call__(self, inputs):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+
+        if random.random() < 0.5:
+            return CustomFunc(inputs, F.hflip)
+        return inputs
+
+
+class RGB2Lab(object):
+    def __call__(self, inputs):
+        """
+        Args:
+            img (PIL Image): Image to be flipped.
+
+        Returns:
+            PIL Image: Randomly flipped image.
+        """
+
+    def __call__(self, inputs):
+        image_lab = color.rgb2lab(inputs[0])
+        warp_ba_lab = color.rgb2lab(inputs[2])
+        warp_aba_lab = color.rgb2lab(inputs[3])
+        # im_gbl_lab = color.rgb2lab(inputs[4])
+
+        inputs[0] = image_lab[:, :, :1]  # l channel
+        inputs[1] = image_lab[:, :, 1:]  # ab channel
+        inputs[2] = warp_ba_lab  # lab channel
+        inputs[3] = warp_aba_lab  # lab channel
+        # inputs[4] = im_gbl_lab[:, :, 1:]    # ab channel
+
+        return inputs
diff --git a/BiSTNet-NTIRE2023/lib/VGGFeatureLoss.py b/BiSTNet-NTIRE2023/lib/VGGFeatureLoss.py
new file mode 100644
index 0000000..d9c782e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/VGGFeatureLoss.py
@@ -0,0 +1,90 @@
+import torch.nn as nn
+import utils.vgg_util as vgg_util
+
+
+def conv_to_relu(layer_names):
+    out_layernames = []
+    for name in layer_names:
+        if name.startswith("conv"):
+            out_layernames.append("relu" + name[4:])
+        else:
+            out_layernames.append(name)
+    return out_layernames
+
+
+class VGGFeatureLoss(object):
+    def __init__(self, content_layers=["relu3_1"], content_weights=[1]):
+        self.content_layers = conv_to_relu(content_layers)
+        self.content_weights = content_weights
+        # set up vgg19 model
+        self.vgg19 = vgg_util.get_renamed_vgg()
+        self.last_c_layer = self.content_layers[-1]
+        is_last_content = False
+        replace_layers, del_layers = [], []
+        for name, mod in self.vgg19.named_children():
+            if is_last_content:
+                del_layers.append(name)
+            else:
+                if name == self.last_c_layer:
+                    is_last_content = True
+        print("del_layers", del_layers)
+        for name in del_layers:
+            delattr(self.vgg19, name)
+
+        # set up loss
+        self.loss_layers = self.content_layers
+        self.contentloss_fns = [nn.MSELoss()] * len(self.content_layers)
+        self.content_loss = None
+
+        # target
+        self.content_targets = None
+
+        # no need for gradweight vgg19
+        for param in self.vgg19.parameters():
+            param.requires_grad = False
+
+    def vgg_forward(self, input_img, layers, last_layer):
+        # out = {}
+        x = input_img
+        out = None
+        # print(last_layer)
+        assert len(layers) == 1 and layers[-1] == last_layer
+        for name, mod in self.vgg19.named_children():
+            # print('name is : ', name)
+            x = mod(x)
+            if name == last_layer:
+                out = x
+                break
+            # out[name] = mod(prev_input)
+            # prev_input = out[name]
+            # if name == last_layer:
+            #     break
+        return [out]
+        # return [out[key] for key in layers]
+
+    def cuda(self, gpu_ids):
+        if len(gpu_ids) > 1:
+            self.vgg19 = nn.DataParallel(self.vgg19.cuda(), gpu_ids)
+        else:
+            self.vgg19.cuda()
+        self.contentloss_fns = [loss_fn.cuda() for loss_fn in self.contentloss_fns]
+
+    def set_content_targets(self, content_img):
+        self.content_targets = [
+            A.detach() for A in self.vgg_forward(content_img, self.content_layers, self.last_c_layer)
+        ]
+
+    def forward(self, img):
+        assert self.content_targets is not None, "set up content targets first"
+        out = self.vgg_forward(img, self.content_layers, self.last_c_layer)
+        self.content_loss = sum(
+            [
+                self.content_weights[idx] * self.contentloss_fns[idx](out[idx], self.content_targets[idx])
+                for idx in range(len(self.content_layers))
+            ]
+        )
+        return self.content_loss
+
+    # def backward(self):
+    #     self.loss.backward()
+
diff --git a/BiSTNet-NTIRE2023/lib/__init__.py b/BiSTNet-NTIRE2023/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/lib/functional.py b/BiSTNet-NTIRE2023/lib/functional.py
new file mode 100644
index 0000000..0b276ee
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/functional.py
@@ -0,0 +1,605 @@
+from __future__ import division
+
+import math
+import random
+
+import torch
+from PIL import Image, ImageEnhance, ImageOps
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import collections
+import numbers
+import types
+import warnings
+
+import numpy as np
+
+
+def _is_pil_image(img):
+    if accimage is not None:
+        return isinstance(img, (Image.Image, accimage.Image))
+    else:
+        return isinstance(img, Image.Image)
+
+
+def _is_tensor_image(img):
+    return torch.is_tensor(img) and img.ndimension() == 3
+
+
+def _is_numpy_image(img):
+    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+
+
+def to_tensor(pic):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+    See ``ToTensor`` for more details.
+
+    Args:
+        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    if not (_is_pil_image(pic) or _is_numpy_image(pic)):
+        raise TypeError("pic should be PIL Image or ndarray. Got {}".format(type(pic)))
+
+    if isinstance(pic, np.ndarray):
+        # handle numpy array
+        img = torch.from_numpy(pic.transpose((2, 0, 1)))
+        # backward compatibility
+        return img.float().div(255)
+
+    if accimage is not None and isinstance(pic, accimage.Image):
+        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
+        pic.copyto(nppic)
+        return torch.from_numpy(nppic)
+
+    # handle PIL Image
+    if pic.mode == "I":
+        img = torch.from_numpy(np.array(pic, np.int32, copy=False))
+    elif pic.mode == "I;16":
+        img = torch.from_numpy(np.array(pic, np.int16, copy=False))
+    else:
+        img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
+    # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
+    if pic.mode == "YCbCr":
+        nchannel = 3
+    elif pic.mode == "I;16":
+        nchannel = 1
+    else:
+        nchannel = len(pic.mode)
+    img = img.view(pic.size[1], pic.size[0], nchannel)
+    # put it from HWC to CHW format
+    # yikes, this transpose takes 80% of the loading time/CPU
+    img = img.transpose(0, 1).transpose(0, 2).contiguous()
+    if isinstance(img, torch.ByteTensor):
+        return img.float().div(255)
+    else:
+        return img
+
+
+def to_mytensor(pic):
+    """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+    See ``ToTensor`` for more details.
+
+    Args:
+        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    pic_arr = np.array(pic)
+    if pic_arr.ndim == 2:
+        pic_arr = pic_arr[..., np.newaxis]
+    img = torch.from_numpy(pic_arr.transpose((2, 0, 1)))
+    if not isinstance(img, torch.FloatTensor):
+        return img.float()  # no normalize .div(255)
+    else:
+        return img
+
+
+def to_pil_image(pic, mode=None):
+    """Convert a tensor or an ndarray to PIL Image.
+
+    See :class:`~torchvision.transforms.ToPIlImage` for more details.
+
+    Args:
+        pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+        mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+
+    .. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
+
+    Returns:
+        PIL Image: Image converted to PIL Image.
+    """
+    if not (_is_numpy_image(pic) or _is_tensor_image(pic)):
+        raise TypeError("pic should be Tensor or ndarray. Got {}.".format(type(pic)))
+
+    npimg = pic
+    if isinstance(pic, torch.FloatTensor):
+        pic = pic.mul(255).byte()
+    if torch.is_tensor(pic):
+        npimg = np.transpose(pic.numpy(), (1, 2, 0))
+
+    if not isinstance(npimg, np.ndarray):
+        raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, " + "not {}".format(type(npimg)))
+
+    if npimg.shape[2] == 1:
+        expected_mode = None
+        npimg = npimg[:, :, 0]
+        if npimg.dtype == np.uint8:
+            expected_mode = "L"
+        if npimg.dtype == np.int16:
+            expected_mode = "I;16"
+        if npimg.dtype == np.int32:
+            expected_mode = "I"
+        elif npimg.dtype == np.float32:
+            expected_mode = "F"
+        if mode is not None and mode != expected_mode:
+            raise ValueError(
+                "Incorrect mode ({}) supplied for input type {}. Should be {}".format(mode, np.dtype, expected_mode)
+            )
+        mode = expected_mode
+
+    elif npimg.shape[2] == 4:
+        permitted_4_channel_modes = ["RGBA", "CMYK"]
+        if mode is not None and mode not in permitted_4_channel_modes:
+            raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
+
+        if mode is None and npimg.dtype == np.uint8:
+            mode = "RGBA"
+    else:
+        permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"]
+        if mode is not None and mode not in permitted_3_channel_modes:
+            raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
+        if mode is None and npimg.dtype == np.uint8:
+            mode = "RGB"
+
+    if mode is None:
+        raise TypeError("Input type {} is not supported".format(npimg.dtype))
+
+    return Image.fromarray(npimg, mode=mode)
+
+
+def normalize(tensor, mean, std):
+    """Normalize a tensor image with mean and standard deviation.
+
+    See ``Normalize`` for more details.
+
+    Args:
+        tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        mean (sequence): Sequence of means for each channel.
+        std (sequence): Sequence of standard deviations for each channely.
+
+    Returns:
+        Tensor: Normalized Tensor image.
+    """
+    if not _is_tensor_image(tensor):
+        raise TypeError("tensor is not a torch image.")
+    # TODO: make efficient
+    if tensor.size(0) == 1:
+        tensor.sub_(mean).div_(std)
+    else:
+        for t, m, s in zip(tensor, mean, std):
+            t.sub_(m).div_(s)
+    return tensor
+
+
+def resize(img, size, interpolation=Image.BILINEAR):
+    """Resize the input PIL Image to the given size.
+
+    Args:
+        img (PIL Image): Image to be resized.
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaing
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+
+    Returns:
+        PIL Image: Resized image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+    if not isinstance(size, int) and (not isinstance(size, collections.Iterable) or len(size) != 2):
+        raise TypeError("Got inappropriate size arg: {}".format(size))
+
+    if not isinstance(size, int):
+        return img.resize(size[::-1], interpolation)
+
+    w, h = img.size
+    if (w <= h and w == size) or (h <= w and h == size):
+        return img
+    if w < h:
+        ow = size
+        oh = int(round(size * h / w))
+    else:
+        oh = size
+        ow = int(round(size * w / h))
+    return img.resize((ow, oh), interpolation)
+
+
+def scale(*args, **kwargs):
+    warnings.warn("The use of the transforms.Scale transform is deprecated, " + "please use transforms.Resize instead.")
+    return resize(*args, **kwargs)
+
+
+def pad(img, padding, fill=0):
+    """Pad the given PIL Image on all sides with the given "pad" value.
+
+    Args:
+        img (PIL Image): Image to be padded.
+        padding (int or tuple): Padding on each border. If a single int is provided this
+            is used to pad all borders. If tuple of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            this is the padding for the left, top, right and bottom borders
+            respectively.
+        fill: Pixel fill value. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+
+    Returns:
+        PIL Image: Padded image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if not isinstance(padding, (numbers.Number, tuple)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (numbers.Number, str, tuple)):
+        raise TypeError("Got inappropriate fill arg")
+
+    if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
+        raise ValueError(
+            "Padding must be an int or a 2, or 4 element tuple, not a " + "{} element tuple".format(len(padding))
+        )
+
+    return ImageOps.expand(img, border=padding, fill=fill)
+
+
+def crop(img, i, j, h, w):
+    """Crop the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be cropped.
+        i: Upper pixel coordinate.
+        j: Left pixel coordinate.
+        h: Height of the cropped image.
+        w: Width of the cropped image.
+
+    Returns:
+        PIL Image: Cropped image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.crop((j, i, j + w, i + h))
+
+
+def center_crop(img, output_size):
+    if isinstance(output_size, numbers.Number):
+        output_size = (int(output_size), int(output_size))
+    w, h = img.size
+    th, tw = output_size
+    i = int(round((h - th) / 2.0))
+    j = int(round((w - tw) / 2.0))
+    return crop(img, i, j, th, tw)
+
+
+def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
+    """Crop the given PIL Image and resize it to desired size.
+
+    Notably used in RandomResizedCrop.
+
+    Args:
+        img (PIL Image): Image to be cropped.
+        i: Upper pixel coordinate.
+        j: Left pixel coordinate.
+        h: Height of the cropped image.
+        w: Width of the cropped image.
+        size (sequence or int): Desired output size. Same semantics as ``scale``.
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``.
+    Returns:
+        PIL Image: Cropped image.
+    """
+    assert _is_pil_image(img), "img should be PIL Image"
+    img = crop(img, i, j, h, w)
+    img = resize(img, size, interpolation)
+    return img
+
+
+def hflip(img):
+    """Horizontally flip the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be flipped.
+
+    Returns:
+        PIL Image:  Horizontall flipped image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def vflip(img):
+    """Vertically flip the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be flipped.
+
+    Returns:
+        PIL Image:  Vertically flipped image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def five_crop(img, size):
+    """Crop the given PIL Image into four corners and the central crop.
+
+    .. Note::
+        This transform returns a tuple of images and there may be a
+        mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+    Args:
+       size (sequence or int): Desired output size of the crop. If size is an
+           int instead of sequence like (h, w), a square crop (size, size) is
+           made.
+    Returns:
+        tuple: tuple (tl, tr, bl, br, center) corresponding top left,
+            top right, bottom left, bottom right and center crop.
+    """
+    if isinstance(size, numbers.Number):
+        size = (int(size), int(size))
+    else:
+        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+    w, h = img.size
+    crop_h, crop_w = size
+    if crop_w > w or crop_h > h:
+        raise ValueError("Requested crop size {} is bigger than input size {}".format(size, (h, w)))
+    tl = img.crop((0, 0, crop_w, crop_h))
+    tr = img.crop((w - crop_w, 0, w, crop_h))
+    bl = img.crop((0, h - crop_h, crop_w, h))
+    br = img.crop((w - crop_w, h - crop_h, w, h))
+    center = center_crop(img, (crop_h, crop_w))
+    return (tl, tr, bl, br, center)
+
+
+def ten_crop(img, size, vertical_flip=False):
+    """Crop the given PIL Image into four corners and the central crop plus the
+       flipped version of these (horizontal flipping is used by default).
+
+    .. Note::
+        This transform returns a tuple of images and there may be a
+        mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+       Args:
+           size (sequence or int): Desired output size of the crop. If size is an
+               int instead of sequence like (h, w), a square crop (size, size) is
+               made.
+           vertical_flip (bool): Use vertical flipping instead of horizontal
+
+        Returns:
+            tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
+                br_flip, center_flip) corresponding top left, top right,
+                bottom left, bottom right and center crop and same for the
+                flipped image.
+    """
+    if isinstance(size, numbers.Number):
+        size = (int(size), int(size))
+    else:
+        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+    first_five = five_crop(img, size)
+
+    if vertical_flip:
+        img = vflip(img)
+    else:
+        img = hflip(img)
+
+    second_five = five_crop(img, size)
+    return first_five + second_five
+
+
+def adjust_brightness(img, brightness_factor):
+    """Adjust brightness of an Image.
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        brightness_factor (float):  How much to adjust the brightness. Can be
+            any non negative number. 0 gives a black image, 1 gives the
+            original image while 2 increases the brightness by a factor of 2.
+
+    Returns:
+        PIL Image: Brightness adjusted image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    enhancer = ImageEnhance.Brightness(img)
+    img = enhancer.enhance(brightness_factor)
+    return img
+
+
+def adjust_contrast(img, contrast_factor):
+    """Adjust contrast of an Image.
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        contrast_factor (float): How much to adjust the contrast. Can be any
+            non negative number. 0 gives a solid gray image, 1 gives the
+            original image while 2 increases the contrast by a factor of 2.
+
+    Returns:
+        PIL Image: Contrast adjusted image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    enhancer = ImageEnhance.Contrast(img)
+    img = enhancer.enhance(contrast_factor)
+    return img
+
+
+def adjust_saturation(img, saturation_factor):
+    """Adjust color saturation of an image.
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        saturation_factor (float):  How much to adjust the saturation. 0 will
+            give a black and white image, 1 will give the original image while
+            2 will enhance the saturation by a factor of 2.
+
+    Returns:
+        PIL Image: Saturation adjusted image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    enhancer = ImageEnhance.Color(img)
+    img = enhancer.enhance(saturation_factor)
+    return img
+
+
+def adjust_hue(img, hue_factor):
+    """Adjust hue of an image.
+
+    The image hue is adjusted by converting the image to HSV and
+    cyclically shifting the intensities in the hue channel (H).
+    The image is then converted back to original image mode.
+
+    `hue_factor` is the amount of shift in H channel and must be in the
+    interval `[-0.5, 0.5]`.
+
+    See https://en.wikipedia.org/wiki/Hue for more details on Hue.
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        hue_factor (float):  How much to shift the hue channel. Should be in
+            [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
+            HSV space in positive and negative direction respectively.
+            0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+            with complementary colors while 0 gives the original image.
+
+    Returns:
+        PIL Image: Hue adjusted image.
+    """
+    if not (-0.5 <= hue_factor <= 0.5):
+        raise ValueError("hue_factor is not in [-0.5, 0.5].".format(hue_factor))
+
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    input_mode = img.mode
+    if input_mode in {"L", "1", "I", "F"}:
+        return img
+
+    h, s, v = img.convert("HSV").split()
+
+    np_h = np.array(h, dtype=np.uint8)
+    # uint8 addition take cares of rotation across boundaries
+    with np.errstate(over="ignore"):
+        np_h += np.uint8(hue_factor * 255)
+    h = Image.fromarray(np_h, "L")
+
+    img = Image.merge("HSV", (h, s, v)).convert(input_mode)
+    return img
+
+
+def adjust_gamma(img, gamma, gain=1):
+    """Perform gamma correction on an image.
+
+    Also known as Power Law Transform. Intensities in RGB mode are adjusted
+    based on the following equation:
+
+        I_out = 255 * gain * ((I_in / 255) ** gamma)
+
+    See https://en.wikipedia.org/wiki/Gamma_correction for more details.
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        gamma (float): Non negative real number. gamma larger than 1 make the
+            shadows darker, while gamma smaller than 1 make dark regions
+            lighter.
+        gain (float): The constant multiplier.
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if gamma < 0:
+        raise ValueError("Gamma should be a non-negative real number")
+
+    input_mode = img.mode
+    img = img.convert("RGB")
+
+    np_img = np.array(img, dtype=np.float32)
+    np_img = 255 * gain * ((np_img / 255) ** gamma)
+    np_img = np.uint8(np.clip(np_img, 0, 255))
+
+    img = Image.fromarray(np_img, "RGB").convert(input_mode)
+    return img
+
+
+def rotate(img, angle, resample=False, expand=False, center=None):
+    """Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
+
+
+    Args:
+        img (PIL Image): PIL Image to be rotated.
+        angle ({float, int}): In degrees degrees counter clockwise order.
+        resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
+            An optional resampling filter.
+            See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
+            If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
+        expand (bool, optional): Optional expansion flag.
+            If true, expands the output image to make it large enough to hold the entire rotated image.
+            If false or omitted, make the output image the same size as the input image.
+            Note that the expand flag assumes rotation around the center and no translation.
+        center (2-tuple, optional): Optional center of rotation.
+            Origin is the upper left corner.
+            Default is the center of the image.
+    """
+
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    return img.rotate(angle, resample, expand, center)
+
+
+def to_grayscale(img, num_output_channels=1):
+    """Convert image to grayscale version of image.
+
+    Args:
+        img (PIL Image): Image to be converted to grayscale.
+
+    Returns:
+        PIL Image:  Grayscale version of the image.
+                    if num_output_channels == 1 : returned image is single channel
+                    if num_output_channels == 3 : returned image is 3 channel with r == g == b
+    """
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if num_output_channels == 1:
+        img = img.convert("L")
+    elif num_output_channels == 3:
+        img = img.convert("L")
+        np_img = np.array(img, dtype=np.uint8)
+        np_img = np.dstack([np_img, np_img, np_img])
+        img = Image.fromarray(np_img, "RGB")
+    else:
+        raise ValueError("num_output_channels should be either 1 or 3")
+
+    return img
diff --git a/BiSTNet-NTIRE2023/lib/videoloader.py b/BiSTNet-NTIRE2023/lib/videoloader.py
new file mode 100644
index 0000000..00c1d5d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/videoloader.py
@@ -0,0 +1,301 @@
+import sys
+
+sys.path.insert(0, "..")
+import os
+import random
+
+import cv2
+import numpy as np
+import torch
+import torchvision.utils as vutils
+from PIL import Image
+from skimage import color
+from torch.autograd import Variable
+from utils.flowlib import read_flow
+from utils.util_distortion import CenterPad
+
+import lib.functional as F
+
+cv2.setNumThreads(0)
+
+
+class RGB2Lab(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        return color.rgb2lab(inputs)
+
+
+class Normalize(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        inputs[0:1, :, :] = F.normalize(inputs[0:1, :, :], 50, 1)
+        inputs[1:3, :, :] = F.normalize(inputs[1:3, :, :], (0, 0), (1, 1))
+        return inputs
+
+
+class ToTensor(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        outputs = F.to_mytensor(inputs)  # permute channel and transform to tensor
+        return outputs
+
+
+class RandomErasing(object):
+    def __init__(self, probability=0.6, sl=0.05, sh=0.6):
+        self.probability = probability
+        self.sl = sl
+        self.sh = sh
+
+    def __call__(self, img):
+        img = np.array(img)
+        if random.uniform(0, 1) > self.probability:
+            return Image.fromarray(img)
+
+        area = img.shape[0] * img.shape[1]
+        h0 = img.shape[0]
+        w0 = img.shape[1]
+        channel = img.shape[2]
+
+        h = int(round(random.uniform(self.sl, self.sh) * h0))
+        w = int(round(random.uniform(self.sl, self.sh) * w0))
+
+        if w < img.shape[1] and h < img.shape[0]:
+            x1 = random.randint(0, img.shape[0] - h)
+            y1 = random.randint(0, img.shape[1] - w)
+            img[x1 : x1 + h, y1 : y1 + w, :] = np.random.rand(h, w, channel) * 255
+            return Image.fromarray(img)
+
+        return Image.fromarray(img)
+
+
+class CenterCrop(object):
+    """
+    center crop the numpy array
+    """
+
+    def __init__(self, image_size):
+        self.h0, self.w0 = image_size
+
+    def __call__(self, input_numpy):
+        if input_numpy.ndim == 3:
+            h, w, channel = input_numpy.shape
+            output_numpy = np.zeros((self.h0, self.w0, channel))
+            output_numpy = input_numpy[
+                (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0, :
+            ]
+        else:
+            h, w = input_numpy.shape
+            output_numpy = np.zeros((self.h0, self.w0))
+            output_numpy = input_numpy[
+                (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0
+            ]
+        return output_numpy
+
+
+def parse_images(data_root):
+    image_pairs = []
+    subdirs = sorted(os.listdir(data_root))
+    for subdir in subdirs:
+        path = os.path.join(data_root, subdir)
+        if not os.path.isdir(path):
+            continue
+
+        parse_file = os.path.join(path, "pairs_output_new.txt")
+        if os.path.exists(parse_file):
+            with open(parse_file, "r") as f:
+                lines = f.readlines()
+                for line in lines:
+                    line = line.replace("\n", "")
+                    (
+                        image1_name,
+                        image2_name,
+                        reference_video_name,
+                        reference_video_name1,
+                        reference_name1,
+                        reference_name2,
+                        reference_name3,
+                        reference_name4,
+                        reference_name5,
+                        reference_gt1,
+                        reference_gt2,
+                        reference_gt3,
+                    ) = line.split()
+                    image1_name = image1_name.split(".")[0]
+                    image2_name = image2_name.split(".")[0]
+                    reference_video_name = reference_video_name.split(".")[0]
+                    reference_video_name1 = reference_video_name1.split(".")[0]
+                    reference_name1 = reference_name1.split(".")[0]
+                    reference_name2 = reference_name2.split(".")[0]
+                    reference_name3 = reference_name3.split(".")[0]
+                    reference_name4 = reference_name4.split(".")[0]
+                    reference_name5 = reference_name5.split(".")[0]
+
+                    reference_gt1 = reference_gt1.split(".")[0]
+                    reference_gt2 = reference_gt2.split(".")[0]
+                    reference_gt3 = reference_gt3.split(".")[0]
+
+                    flow_forward_name = image1_name + "_forward"
+                    flow_backward_name = image1_name + "_backward"
+                    mask_name = image1_name + "_mask"
+
+                    item = (
+                        image1_name + ".jpg",
+                        image2_name + ".jpg",
+                        reference_video_name + ".jpg",
+                        reference_name1 + ".JPEG",
+                        reference_name2 + ".JPEG",
+                        reference_name3 + ".JPEG",
+                        reference_name4 + ".JPEG",
+                        reference_name5 + ".JPEG",
+                        flow_forward_name + ".flo",
+                        flow_backward_name + ".flo",
+                        mask_name + ".pgm",
+                        reference_gt1 + ".jpg",
+                        reference_gt2 + ".jpg",
+                        reference_gt3 + ".jpg",
+                        path,
+                    )
+                    image_pairs.append(item)
+
+        else:
+            raise (RuntimeError("Error when parsing pair_output_count.txt in subfolders of: " + path + "\n"))
+
+    return image_pairs
+
+
+class VideosDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs = parse_images(self.data_root)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image pairs in %s: %d pairs #####" % (data_root, self.real_len))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            image1_name,
+            image2_name,
+            reference_video_name,
+            reference_name1,
+            reference_name2,
+            reference_name3,
+            reference_name4,
+            reference_name5,
+            flow_forward_name,
+            flow_backward_name,
+            mask_name,
+            reference_gt1,
+            reference_gt2,
+            reference_gt3,
+            path,
+        ) = self.image_pairs[index]
+        try:
+            I1 = Image.open(os.path.join(path, "input_pad", image1_name))
+            I2 = Image.open(os.path.join(path, "input_pad", image2_name))
+
+            I_reference_video = Image.open(
+                os.path.join(path, "reference_gt", random.choice([reference_gt1, reference_gt2, reference_gt3]))
+            )
+            I_reference_video_real = Image.open(
+                os.path.join(
+                    path,
+                    "reference",
+                    random.choice(
+                        [reference_name1, reference_name2, reference_name3, reference_name4, reference_name5]
+                    ),
+                )
+            )
+
+            flow_forward = read_flow(os.path.join(path, "flow", flow_forward_name))  # numpy
+            flow_backward = read_flow(os.path.join(path, "flow", flow_backward_name))  # numpy
+            mask = Image.open(os.path.join(path, "mask", mask_name))
+
+            # binary mask
+            mask = np.array(mask)
+            mask[mask < 240] = 0
+            mask[mask >= 240] = 1
+
+            # transform
+            I1 = self.image_transform(I1)
+            I2 = self.image_transform(I2)
+            I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            I_reference_video_real = self.image_transform(self.CenterPad(I_reference_video_real))
+            flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            mask = self.ToTensor(self.CenterCrop(mask))
+
+            if np.random.random() < self.real_reference_probability:
+                I_reference_output = I_reference_video_real
+                placeholder = torch.zeros_like(I1)
+                self_ref_flag = torch.zeros_like(I1)
+            else:
+                I_reference_output = I_reference_video
+                placeholder = I2 if np.random.random() < self.nonzero_placeholder_probability else torch.zeros_like(I1)
+                self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I1,
+                I2,
+                I_reference_output,
+                flow_forward,
+                flow_backward,
+                mask,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+
+
+def batch_lab2rgb_transpose_mc(img_l_mc, img_ab_mc, nrow=8):
+    if isinstance(img_l_mc, Variable):
+        img_l_mc = img_l_mc.data.cpu()
+    if isinstance(img_ab_mc, Variable):
+        img_ab_mc = img_ab_mc.data.cpu()
+
+    if img_l_mc.is_cuda:
+        img_l_mc = img_l_mc.cpu()
+    if img_ab_mc.is_cuda:
+        img_ab_mc = img_ab_mc.cpu()
+
+    assert img_l_mc.dim() == 4 and img_ab_mc.dim() == 4, "only for batch input"
+
+    l_norm, ab_norm = 1.0, 1.0
+    l_mean, ab_mean = 50.0, 0
+    img_l = img_l_mc * l_norm + l_mean
+    img_ab = img_ab_mc * ab_norm + ab_mean
+    pred_lab = torch.cat((img_l, img_ab), dim=1)
+    grid_lab = vutils.make_grid(pred_lab, nrow=nrow).numpy().astype("float64")
+    return (np.clip(color.lab2rgb(grid_lab.transpose((1, 2, 0))), 0, 1) * 255).astype("uint8")
diff --git a/BiSTNet-NTIRE2023/lib/videoloader_imagenet.py b/BiSTNet-NTIRE2023/lib/videoloader_imagenet.py
new file mode 100644
index 0000000..e7eb3c2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/videoloader_imagenet.py
@@ -0,0 +1,290 @@
+import os
+import os.path as osp
+import struct
+
+import cv2
+import numpy as np
+import torch
+import torch.utils.data as data
+import torchvision.transforms as transforms
+from PIL import Image
+from scipy.ndimage import map_coordinates
+from scipy.ndimage.filters import gaussian_filter
+from utils.util_distortion import CenterPadCrop_numpy, Distortion_with_flow, Normalize, RGB2Lab, ToTensor
+
+cv2.setNumThreads(0)
+
+
+def parse_images(dir, with_bad, with_mid):
+    print("dir is: ", dir)
+    dir = osp.expanduser(dir)
+
+    image_pairs = []
+
+    no_mid_cnt, no_bad_cnt = 0, 0
+    for target in sorted(os.listdir(dir)):
+        d = osp.join(dir, target)
+        if not osp.isdir(d):
+            continue
+
+        pair_file = osp.join(d, "pairs.txt")
+
+        if osp.exists(pair_file):
+            with open(pair_file, "r") as f:
+                lines = f.readlines()
+                for line in lines:
+                    pair = line.strip().split(" ")
+                    name0, postfix = pair[0].split(".")
+                    name1, postfix = pair[1].split(".")
+                    if float(pair[2]) > 0:
+                        item0 = (dir, target, name0, name1, 2)
+                        item1 = (dir, target, name1, name0, 2)
+                        image_pairs.append(item0)
+                        image_pairs.append(item1)
+
+        else:
+            raise (RuntimeError("Found no pair.txt in subfolders of: " + d + "\n"))
+
+        if with_mid:
+            pair_file = osp.join(d, "pairs_mid.txt")
+
+            if osp.exists(pair_file):
+                with open(pair_file, "r") as f:
+                    for line in f:
+                        pair = line.strip().split(" ")
+                        name0, postfix = pair[0].split(".")
+                        name1, postfix = pair[1].split(".")
+                        item0 = (dir, target, name0, name1, 0)
+                        item1 = (dir, target, name1, name0, 0)
+                        image_pairs.append(item0)
+                        image_pairs.append(item1)
+
+            else:
+                no_mid_cnt += 1
+
+        if with_bad:
+            pair_file = osp.join(d, "pairs_bad.txt")
+
+            if osp.exists(pair_file):
+                with open(pair_file, "r") as f:
+                    for line in f:
+                        pair = line.strip().split(" ")
+                        name0, postfix = pair[0].split(".")
+                        name1, postfix = pair[1].split(".")
+                        item0 = (dir, target, name0, name1, -1)
+                        item1 = (dir, target, name1, name0, -1)
+                        image_pairs.append(item0)
+                        image_pairs.append(item1)
+
+            else:
+                no_bad_cnt += 1
+
+    if no_bad_cnt > 0:
+        print("find no pairs_bad.txt in %d folders for dir: %s" % (no_bad_cnt, dir))
+
+    if no_mid_cnt > 0:
+        print("find no pairs_mid.txt in %d folders for dir: %s" % (no_mid_cnt, dir))
+
+    return image_pairs
+
+
+def pil_loader(path):
+    with open(path, "rb") as f:
+        with Image.open(f) as img:
+            return img.convert("RGB")
+
+
+def image_loader(path):
+    return pil_loader(path)
+
+
+def combo5_loader(path, real_w, real_h):
+    with open(path, "rb") as f:
+        d = f.read(4)
+        im_sz = struct.unpack("i", d)
+        h = im_sz[0]
+
+        d = f.read(4)
+        im_sz = struct.unpack("i", d)
+        w = im_sz[0]
+
+        d = f.read(4)
+        im_sz = struct.unpack("i", d)
+        d = f.read(im_sz[0])
+        file_bytes = np.asarray(bytearray(d), dtype=np.uint8)
+        img_data_ndarray = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+        img_data_ndarray = cv2.cvtColor(img_data_ndarray, cv2.COLOR_BGR2RGB)
+        warp_ba = Image.fromarray(img_data_ndarray)
+        img_data_ndarray = img_data_ndarray * 0
+        warp_aba = Image.fromarray(img_data_ndarray)
+
+        errs = []
+        empty_array = np.zeros([np.array(warp_ba).shape[0], np.array(warp_ba).shape[1]])
+        for _ in range(5):
+            err_ba = Image.fromarray(empty_array)
+            err_ab = Image.fromarray(empty_array)
+            errs.append([err_ba, err_ab])
+    return errs, warp_ba, warp_aba
+
+
+class VideosDataset_ImageNet(data.Dataset):
+    def __init__(
+        self,
+        data_root,
+        epoch,
+        image_size,
+        with_bad=False,
+        with_mid=False,
+        transforms_imagenet=None,
+        distortion_level=3,
+        brightnessjitter=0,
+        nonzero_placeholder_probability=0.5,
+        extra_reference_transform=None,
+        real_reference_probability=1,
+    ):
+        image_pairs = []
+        curr_image_pairs = parse_images(data_root, with_bad, with_mid)
+        image_pairs += curr_image_pairs
+        print("##### parsing image_a pairs in %s: %d pairs #####" % (data_root, len(curr_image_pairs)))
+        if not image_pairs:
+            raise RuntimeError("Found 0 image_a pairs in all the data_roots")
+
+        self.image_pairs = image_pairs
+        self.transforms_imagenet_raw = transforms_imagenet
+        self.extra_reference_transform = transforms.Compose(extra_reference_transform)
+        self.real_reference_probability = real_reference_probability
+        self.transforms_imagenet = transforms.Compose(transforms_imagenet)
+        self.epoch = epoch
+        self.image_size = image_size
+        self.real_len = len(self.image_pairs)
+        self.image_pairs *= epoch
+        self.distortion_level = distortion_level
+        self.distortion_transform = Distortion_with_flow()
+        self.brightnessjitter = brightnessjitter
+        self.flow_transform = transforms.Compose([CenterPadCrop_numpy(self.image_size), ToTensor()])
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+        self.ToTensor = ToTensor()
+        self.Normalize = Normalize()
+
+    def __getitem__(self, index):
+        try:
+            image_id = 0
+            pair_id = index
+
+            combo_path = None
+            image_a_path = None
+            image_b_path = None
+
+            image_names = ["", ""]
+            dir_root, cls_dir, image_names[0], image_names[1], is_good = self.image_pairs[pair_id]
+            sub_dir = osp.join(dir_root, cls_dir)
+            if is_good >= 1:
+                image_a_path = osp.join(sub_dir, "input", "%s.JPEG" % image_names[image_id])
+                image_b_path = osp.join(sub_dir, "input", "%s.JPEG" % image_names[1 - image_id])
+            elif is_good == 0:
+                image_a_path = osp.join(sub_dir, "input_mid", "%s.JPEG" % image_names[image_id])
+                image_b_path = osp.join(sub_dir, "input_mid", "%s.JPEG" % image_names[1 - image_id])
+            else:
+                image_a_path = osp.join(sub_dir, "input_bad", "%s.JPEG" % image_names[image_id])
+                image_b_path = osp.join(sub_dir, "input_bad", "%s.JPEG" % image_names[1 - image_id])
+
+            if np.random.random() > 0.5:
+                image_a_path, image_b_path = image_b_path, image_a_path
+
+            I1 = image_loader(image_a_path)
+            I2 = I1
+            I_reference_video = I1
+            I_reference_video_real = image_loader(image_b_path)
+
+            ## generate the flow
+            height, width = np.array(I2).shape[0], np.array(I2).shape[1]
+            alpha = np.random.rand() * self.distortion_level
+            distortion_range = 50
+            random_state = np.random.RandomState(None)
+            shape = self.image_size[0], self.image_size[1]
+            # dx: flow on the vertical direction; dy: flow on the horizontal direction
+            forward_dx = (
+                gaussian_filter((random_state.rand(*shape) * 2 - 1), distortion_range, mode="constant", cval=0)
+                * alpha
+                * 1000
+            )
+            forward_dy = (
+                gaussian_filter((random_state.rand(*shape) * 2 - 1), distortion_range, mode="constant", cval=0)
+                * alpha
+                * 1000
+            )
+
+            for transform in self.transforms_imagenet_raw:
+                if type(transform) is RGB2Lab:
+                    I1_raw = I1
+                I1 = transform(I1)
+            for transform in self.transforms_imagenet_raw:
+                if type(transform) is RGB2Lab:
+                    I2 = self.distortion_transform(I2, forward_dx, forward_dy)
+                    I2_raw = I2
+                I2 = transform(I2)
+            I2[0:1, :, :] = I2[0:1, :, :] + torch.randn(1) * self.brightnessjitter
+
+            I_reference_video = self.extra_reference_transform(I_reference_video)
+            for transform in self.transforms_imagenet_raw:
+                I_reference_video = transform(I_reference_video)
+
+            I_reference_video_real = self.transforms_imagenet(I_reference_video_real)
+
+            flow_forward_raw = np.stack((forward_dy, forward_dx), axis=-1)
+            flow_backward_raw = np.zeros_like(flow_forward_raw)
+            flow_forward = self.flow_transform(flow_forward_raw)
+            flow_backward = self.flow_transform(flow_backward_raw)
+
+            # update the mask for the pixels on the border
+            grid_x, grid_y = np.meshgrid(np.arange(self.image_size[0]), np.arange(self.image_size[1]), indexing="ij")
+            grid = np.stack((grid_y, grid_x), axis=-1)
+            grid_warp = grid + flow_forward_raw
+            location_y = grid_warp[:, :, 0].flatten()
+            location_x = grid_warp[:, :, 1].flatten()
+            I2_raw = np.array(I2_raw).astype(float)
+            I21_r = map_coordinates(I2_raw[:, :, 0], np.stack((location_x, location_y)), cval=-1).reshape(
+                (self.image_size[0], self.image_size[1])
+            )
+            I21_g = map_coordinates(I2_raw[:, :, 1], np.stack((location_x, location_y)), cval=-1).reshape(
+                (self.image_size[0], self.image_size[1])
+            )
+            I21_b = map_coordinates(I2_raw[:, :, 2], np.stack((location_x, location_y)), cval=-1).reshape(
+                (self.image_size[0], self.image_size[1])
+            )
+            I21_raw = np.stack((I21_r, I21_g, I21_b), axis=2)
+            mask = np.ones((self.image_size[0], self.image_size[1]))
+            mask[(I21_raw[:, :, 0] == -1) & (I21_raw[:, :, 1] == -1) & (I21_raw[:, :, 2] == -1)] = 0
+            mask[abs(I21_raw - I1_raw).sum(axis=-1) > 50] = 0
+            mask = self.ToTensor(mask)
+
+            if np.random.random() < self.real_reference_probability:
+                I_reference_output = I_reference_video_real
+                placeholder = torch.zeros_like(I1)
+                self_ref_flag = torch.zeros_like(I1)
+            else:
+                I_reference_output = I_reference_video
+                placeholder = I2 if np.random.random() < self.nonzero_placeholder_probability else torch.zeros_like(I1)
+                self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I1,
+                I2,
+                I_reference_output,
+                flow_forward,
+                flow_backward,
+                mask,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            if combo_path is not None:
+                print("problem in ", combo_path)
+            print("problem in, ", image_a_path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
diff --git a/BiSTNet-NTIRE2023/lib/videoloader_woAugImg.py b/BiSTNet-NTIRE2023/lib/videoloader_woAugImg.py
new file mode 100644
index 0000000..444db82
--- /dev/null
+++ b/BiSTNet-NTIRE2023/lib/videoloader_woAugImg.py
@@ -0,0 +1,1715 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+sys.path.insert(0, "..")
+import os
+import random
+
+import cv2
+import numpy as np
+import torch
+import torchvision.utils as vutils
+from PIL import Image
+from skimage import color
+from torch.autograd import Variable
+from utils.flowlib import read_flow
+from utils.util_distortion import CenterPad
+from utils.util_distortion import (CenterPad_threshold, Normalize, RGB2Lab,
+                                   ToTensor)
+import lib.TrainTransforms as train_transforms
+from torchvision.transforms import RandomCrop
+
+import utils as utils
+import os,time,cv2,scipy.io
+import scipy.misc as sic
+import subprocess
+import argparse
+import torch
+import imageio
+import codecs
+import torchvision.transforms as transforms
+
+
+import lib.functional as F
+
+import math
+
+# SuperSlomo
+from torchvision import transforms as superslomo_transforms
+import pickle
+
+cv2.setNumThreads(0)
+
+
+class RGB2Lab(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        return color.rgb2lab(inputs)
+
+
+class Normalize(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        inputs[0:1, :, :] = F.normalize(inputs[0:1, :, :], 50, 1)
+        inputs[1:3, :, :] = F.normalize(inputs[1:3, :, :], (0, 0), (1, 1))
+        return inputs
+
+
+class ToTensor(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        outputs = F.to_mytensor(inputs)  # permute channel and transform to tensor
+        return outputs
+
+
+class RandomErasing(object):
+    def __init__(self, probability=0.6, sl=0.05, sh=0.6):
+        self.probability = probability
+        self.sl = sl
+        self.sh = sh
+
+    def __call__(self, img):
+        img = np.array(img)
+        if random.uniform(0, 1) > self.probability:
+            return Image.fromarray(img)
+
+        area = img.shape[0] * img.shape[1]
+        h0 = img.shape[0]
+        w0 = img.shape[1]
+        channel = img.shape[2]
+
+        h = int(round(random.uniform(self.sl, self.sh) * h0))
+        w = int(round(random.uniform(self.sl, self.sh) * w0))
+
+        if w < img.shape[1] and h < img.shape[0]:
+            x1 = random.randint(0, img.shape[0] - h)
+            y1 = random.randint(0, img.shape[1] - w)
+            img[x1 : x1 + h, y1 : y1 + w, :] = np.random.rand(h, w, channel) * 255
+            return Image.fromarray(img)
+
+        return Image.fromarray(img)
+
+
+class CenterCrop(object):
+    """
+    center crop the numpy array
+    """
+
+    def __init__(self, image_size):
+        self.h0, self.w0 = image_size
+
+    def __call__(self, input_numpy):
+        if input_numpy.ndim == 3:
+            h, w, channel = input_numpy.shape
+            output_numpy = np.zeros((self.h0, self.w0, channel))
+            output_numpy = input_numpy[
+                (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0, :
+            ]
+        else:
+            h, w = input_numpy.shape
+            output_numpy = np.zeros((self.h0, self.w0))
+            output_numpy = input_numpy[
+                (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0
+            ]
+        return output_numpy
+
+# import from favc 
+def read_image_sequence(filename, num_frames):
+    filename = os.path.join(filename, 'pics')
+    print(filename)
+    assert 1==0
+    file1 = os.path.splitext(os.path.basename(filename))[0]
+    ext = os.path.splitext(os.path.basename(filename))[1]
+    try:
+        img1 = imageio.imread(filename).astype(np.float32)
+        # img1 = sic.imread(filename).astype(np.float32)
+        imgh1 = img1
+    except:
+        print("Cannot read the first frame.")
+        return None, None
+    if len(img1.shape) == 2: # discard grayscale images
+        return None, None
+
+    img1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY)
+    img1 = np.expand_dims(img1,2)
+    
+    img_l_seq=img1/255.0
+    img_h_seq=imgh1/255.0
+    for i in range(num_frames-1):
+        filei = int(file1) + i + 1
+        filenamei = os.path.split(filename)[0] + "/" + "{:>05}".format(filei).format() + ext
+        try:
+            imgi = imageio.imread(filenamei).astype(np.float32)
+            # imgi = sic.imread(filenamei).astype(np.float32)
+            imghi = imgi
+        except:
+            print("Cannot read the following %d frames\n"%(num_frames))
+            return None, None
+        imgi = cv2.cvtColor(imgi, cv2.COLOR_RGB2GRAY)
+        imgi = np.expand_dims(imgi,2)
+
+        img_l_seq = np.concatenate((img_l_seq,imgi/255.0),axis=2)
+        img_h_seq = np.concatenate((img_h_seq,imghi/255.0),axis=2)
+
+    return img_l_seq, img_h_seq
+
+# import from favc 
+def read_flow_sequence(filename, num_frames):
+    file1 = os.path.splitext(os.path.basename(filename))[0]
+    folder = os.path.split(filename)[0]
+    ext = os.path.splitext(os.path.basename(filename))[1]
+    
+    filej = file1
+    for i in range(num_frames-1):
+        filei = int(file1) + i + 1
+        if "SPMC" in filename:
+            flow_forward = flowlib.read_flow(folder+"/Forward/{:>04}".format(filej).format()+"_"+"{:>04}".format(filei).format()+".flo")
+            flow_backward = flowlib.read_flow(folder+"/Backward/{:>04}".format(filei).format()+"_"+"{:>04}".format(filej).format()+".flo")
+        else:
+            # flow_forward = flowlib.read_flow(folder.replace("480p","Forward")+"/"+"{:>05}".format(filej).format()+"_"+"{:>05}".format(filei).format()+".flo")
+            # flow_backward = flowlib.read_flow(folder.replace("480p","Backward")+"/"+"{:>05}".format(filei).format()+"_"+"{:>05}".format(filej).format()+".flo")
+            flow_forward = flowlib.read_flow(folder+"/Forward"+"/"+"{:>05}".format(filej).format()+"_"+"{:>05}".format(filei).format()+".flo")
+            # flow_backward = flowlib.read_flow(folder+"/Backward"+"/"+"{:>05}".format(filei).format()+"_"+"{:>05}".format(filej).format()+".flo")
+            flow_backward = flowlib.read_flow(folder+"/Backward"+"/"+"{:>05}".format(filej).format()+"_"+"{:>05}".format(filei).format()+".flo")
+
+            # print(folder+"/Forward"+"/"+"{:>05}".format(filej).format()+"_"+"{:>05}".format(filei).format()+".flo")
+            # print(folder+"/Backward"+"/"+"{:>05}".format(filei).format()+"_"+"{:>05}".format(filej).format()+".flo")
+            # assert 1==0
+        filej = filei
+        if i == 0:
+            flow_forward_seq = flow_forward
+            flow_backward_seq = flow_backward
+        else:
+            flow_forward_seq = np.concatenate((flow_forward_seq, flow_forward), axis=2)
+            flow_backward_seq = np.concatenate((flow_backward_seq, flow_backward), axis=2)
+
+    return flow_forward_seq, flow_backward_seq
+
+# occlusion_mask
+def load_flow_to_numpy(path):
+    with open(path, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        assert (202021.25 == magic), 'Magic number incorrect. Invalid .flo file'
+        h = np.fromfile(f, np.int32, count=1)[0]
+        w = np.fromfile(f, np.int32, count=1)[0]
+        data = np.fromfile(f, np.float32, count=2 * w * h)
+    data2D = np.resize(data, (w, h, 2))
+    return data2D
+
+def occlusion_mask(im0, im1, flow10):
+    im1 = transforms.ToTensor()(im1).unsqueeze(0)
+    # warp_im0 = flow_warp_op.flow_warp(im0, flow10)
+    warp_im0 = warp(im0, flow10)
+    # print(type(warp_im0))
+    warp_im0=warp_im0.cpu()
+    # diff = tf.abs(im1 - warp_im0)
+    diff = torch.abs(im1 - warp_im0)
+    # mask = tf.reduce_max(diff, axis=3, keep_dims=True) #计算一个张量的各个维度上元素的最大值。 
+    # mask = tf.less(mask, 0.05)
+    # diff_augmax = torch.max(diff, 3, keepdim=True)
+    mask = torch.le(diff, 0.05, out=None).int() #以元素方式返回(x <y)的真值.
+    # mask = tf.less(diff, 0.05) #以元素方式返回(x <y)的真值.
+    # mask = tf.cast(mask, tf.float32) #tf.cast()函数的作用是执行 tensorflow 中张量数据类型转换，比如读入的图片如果是int8类型的，一般在要在训练前把图像的数据格式转换为float32。
+    mask=torch.FloatTensor(mask.float())
+    # mask = tf.transpose(mask, perm=[0,2,3,1])
+    mask = mask.permute([0,2,3,1])
+    # print(mask.shape)
+    mask = mask.repeat(1,1,1,2)
+    # mask = tf.tile(mask, [1,1,1,3]) #做成3通道的  每一维数据的扩展都是将前面的数据进行复制然后直接接在原数据后面。
+    # print("end occlusion mask")
+    return mask, warp_im0
+
+def warp(image2, flo):
+    """
+    warp an image/tensor (im2) back to im1, according to the optical flow
+    x: [B, C, H, W] (im2)
+    flo: [B, 2, H, W] flow
+    """
+    # print(image2.size())
+    # 
+    # # B, C, H, W = y.size()
+    # 
+    flo = transforms.ToTensor()(flo).unsqueeze(0)
+    # print(flo.shape)  # (436, 1024, 2)
+    image2 = transforms.ToTensor()(image2).unsqueeze(0)
+    B, C, H, W = image2.shape
+    # print(image2.shape, flo.shape)
+    # mesh grid 
+    xx = torch.arange(0,W).view(1,-1).repeat(H,1)
+    yy = torch.arange(0,H).view(-1,1).repeat(1,W)
+    xx = xx.view(1,1,H,W).repeat(B,1,1,1)
+    yy = yy.view(1,1,H,W).repeat(B,1,1,1)
+    grid = torch.cat((xx,yy),1).float()
+    
+    image2 = image2.cuda()
+    flo = flo.cuda()
+    grid = grid.cuda()
+    vgrid = torch.autograd.Variable(grid) + flo # B,2,H,W
+    #图二的每个像素坐标加上它的光流即为该像素点对应在图一的坐标
+
+    # scale grid to [-1,1] 
+    ##2019 code
+    vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone()/max(W-1,1)-1.0 
+    #取出光流v这个维度，原来范围是0~W-1，再除以W-1，范围是0~1，再乘以2，范围是0~2，再-1，范围是-1~1
+    vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone()/max(H-1,1)-1.0 #取出光流u这个维度，同上
+    # print(type(vgrid))
+    vgrid = vgrid.permute(0,2,3,1)#from B,2,H,W -> B,H,W,2，为什么要这么变呢？是因为要配合grid_sample这个函数的使用
+    # output = torch.nn.functional.grid_sample(image2, vgrid, align_corners=True)  #由于pytorch版本较低没有align_corners参数 只能暂时删掉
+    output = torch.nn.functional.grid_sample(image2, vgrid)
+    mask = torch.autograd.Variable(torch.ones(image2.size())).cuda()
+    # mask = torch.nn.functional.grid_sample(mask, vgrid, align_corners=True)#由于pytorch版本较低没有align_corners参数 只能暂时删掉
+    mask = torch.nn.functional.grid_sample(mask, vgrid)
+
+    ##2019 author
+    mask[mask<0.9999] = 0
+    mask[mask>0] = 1
+    ##2019 code
+    # mask = torch.floor(torch.clamp(mask, 0 ,1))
+
+    # # copy tensors to cpu to avoid oom
+    # image2 = image2.cpu()
+    # flo = flo.cpu()
+    # grid = grid.cpu()
+    # output = output.cpu()
+    # mask = mask.cpu()
+
+    return output*mask
+
+
+def parse_images(data_root):
+    image_pairs = []
+
+    clips = os.listdir(data_root)
+    for c_idx, clip in enumerate(clips):
+        # img 
+        img_names = sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+
+        for i_idx in range(len(img_names)-1):
+            img1_name = os.path.join(data_root, clip, 'pics',img_names[i_idx])
+            img2_name = os.path.join(data_root, clip, 'pics',img_names[i_idx+1])
+            I_reference_output = os.path.join(data_root, clip, 'pics',img_names[0])
+            # I_reference_output = os.path.join(data_root, clip, 'ref', os.listdir(os.path.join(data_root, clip, 'ref'))[0])
+            # mask_name = os.path.join(data_root, clip, 'mask', os.listdir(os.path.join(data_root, clip, 'ref'))[0])
+            flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+            # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+            flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+                        
+            # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+            item = (
+                img1_name,
+                img2_name,
+                I_reference_output,
+                flow_forward_name,
+                flow_backward_name,
+                # mask_name,
+            )
+            image_pairs.append(item)
+
+    return image_pairs
+
+def parse_images_wBasicVSR(data_root, max_seq_len=None):
+
+    clips = os.listdir(data_root)
+
+    if max_seq_len is None:
+        result = []
+        for c_idx, clip in enumerate(clips):
+            # img 
+            img_names = sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+
+            image_pairs = []
+            for i_idx in range(len(img_names)):
+                img1_name = os.path.join(data_root, clip, 'pics',img_names[i_idx])
+                I_reference_output = os.path.join(data_root, clip, 'pics',img_names[0])
+                item = img1_name
+                image_pairs.append(item)
+            
+            result.append(image_pairs)
+
+    else:
+        '''
+        list:
+            result[0] = FANGHUA_V1-001
+            result[1] = FANGHUA_V1-002
+                result[0][0] = '/home/ysls/Desktop/data/yangyixin/Fanghua_traindata_frame25/FANGHUA_V1-002/pics/0001.png'
+                result[0][1] = '/home/ysls/Desktop/data/yangyixin/Fanghua_traindata_frame25/FANGHUA_V1-002/pics/0002.png'
+
+        '''
+        result = []
+        n_clips = []
+        for c_idx, clip in enumerate(clips):
+            # img 
+            img_names = sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+
+            image_pairs = []
+            for i in range(0, len(img_names), max_seq_len):
+                if i == max_seq_len * (len(img_names) // max_seq_len):
+                    # corner case: make sure every frame in same length
+                    n_image_frame_name = img_names[-max_seq_len:]
+                else:
+                    n_image_frame_name = img_names[i:i + max_seq_len]
+                img1_name = [os.path.join(data_root, clip, 'pics',img_name) for img_name in n_image_frame_name]
+                item = img1_name
+                image_pairs.append(item)
+
+                # print(i, item)
+            # print(len(image_pairs))
+            # assert 1==0
+
+            assert len(image_pairs) == math.ceil(len(img_names) / max_seq_len)
+
+            result.append(image_pairs)
+            n_clips.append(math.ceil(len(img_names) / max_seq_len))
+
+        # print(len(result))
+        # print(len(result[1]))
+        # print(n_clips)
+        assert len(result)==len(n_clips), 'n_clip中记录的是每个Video切分出来的等长clips的数目'
+    return result, n_clips
+
+def parse_images_0618_tcvc_v6(data_root, max_num_sequence, flag_use_precompute_flo=False):
+    image_pairs = []
+    clips = os.listdir(data_root)
+    for c_idx, clip in enumerate(clips):
+        # img 
+        img_list = []
+        img_seg_list = []
+        flow_forward_list = []
+        flow_backward_list = []
+        img_names = sorted(os.listdir(os.path.join(data_root, clip, 'pics')))[:max_num_sequence] if max_num_sequence >= 2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+
+        Ireference1_name = os.path.join(data_root, clip, 'pics', img_names[0])
+        Ireference2_name = os.path.join(data_root, clip, 'pics', img_names[-1])
+
+        for i_idx in range(len(img_names)):
+            img1_name = os.path.join(data_root, clip, 'pics', img_names[i_idx])
+            img1_seg_name = os.path.join(data_root, clip, 'seg_prop', img_names[i_idx]).split('.')[0]+'.pkl'
+
+            img_list.append(img1_name)
+            img_seg_list.append(img1_seg_name)
+
+            if i_idx < len(img_names) - 1 and flag_use_precompute_flo:
+                flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+                # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+                flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+
+                flow_forward_list.append(flow_forward_name)
+                flow_backward_list.append(flow_backward_name)
+                        
+            # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+        item = (
+            img_list,
+            img_seg_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+            # mask_name,
+        )
+        image_pairs.append(item)
+
+    return image_pairs, len(img_names)
+
+def parse_images_0618_tcvc(data_root, max_num_sequence, flag_use_precompute_flo=False):
+    image_pairs = []
+    clips = os.listdir(data_root)
+    for c_idx, clip in enumerate(clips):
+        # img 
+        img_list = []
+        flow_forward_list = []
+        flow_backward_list = []
+        img_names = sorted(os.listdir(os.path.join(data_root, clip, 'pics')))[:max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+
+        Ireference1_name = os.path.join(data_root, clip, 'pics',img_names[0])
+        Ireference2_name = os.path.join(data_root, clip, 'pics',img_names[-1])
+
+        for i_idx in range(len(img_names)):
+            img1_name = os.path.join(data_root, clip, 'pics',img_names[i_idx])
+            img_list.append(img1_name)
+
+            # print(i_idx, img1_name, img2_name, I_reference_output)
+            # assert 1==0 
+
+            if i_idx < len(img_names) - 1 and flag_use_precompute_flo:
+                flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+                # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+                flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+
+                flow_forward_list.append(flow_forward_name)
+                flow_backward_list.append(flow_backward_name)
+                        
+            # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+        item = (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+            # mask_name,
+        )
+        image_pairs.append(item)
+
+    return image_pairs, len(img_names)
+
+def parse_images_20230227_ntire23(data_root, max_num_sequence, flag_use_precompute_flo=False):
+    image_pairs = []
+    clips = os.listdir(data_root)
+    for c_idx, clip in enumerate(clips):
+        # img 
+        img_list = []
+        flow_forward_list = []
+        flow_backward_list = []
+        img_names = sorted(os.listdir(os.path.join(data_root, clip)))[:max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip)))
+
+        Ireference1_name = os.path.join(data_root, clip, img_names[0])
+        Ireference2_name = os.path.join(data_root, clip, img_names[-1])
+
+        for i_idx in range(len(img_names)):
+            img1_name = os.path.join(data_root, clip, img_names[i_idx])
+            img_list.append(img1_name)
+
+            # print(i_idx, img1_name, img2_name, I_reference_output)
+            # assert 1==0 
+
+            if i_idx < len(img_names) - 1 and flag_use_precompute_flo:
+                flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+                # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+                flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+
+                flow_forward_list.append(flow_forward_name)
+                flow_backward_list.append(flow_backward_name)
+                        
+            # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+        item = (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+            # mask_name,
+        )
+        image_pairs.append(item)
+
+    return image_pairs, len(img_names)
+
+class VideosDataset_wBasicVSR(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_root,
+        epoch,
+        image_size,
+        max_seq_len = None,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.max_seq_len = max_seq_len
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_clips = parse_images_wBasicVSR(self.data_root, self.max_seq_len)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image pairs in %s: %d pairs #####" % (data_root, self.real_len))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        # try:
+        
+        tmp = self.n_clips.copy()
+        tmp.insert(0, 0)
+        bin = np.cumsum(tmp)
+        
+        n_video = np.digitize(index, bin) - 1
+        n_video_clip = index - bin[n_video]
+
+        video_clip_names = self.image_pairs[n_video][n_video_clip]
+
+
+        # print(video_clip_names)
+        # assert 1==0
+
+        I_reference_name = self.image_pairs[n_video][0][0]
+        I_reference_video = Image.open(I_reference_name)
+        # transform
+        I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+        
+        # print(I_reference_name)
+        
+        I_all_lab = []
+        
+        for idx, image1_name in enumerate(video_clip_names):
+            # print(image1_name)
+            I1 = Image.open(image1_name)
+            # transform
+            I1 = self.image_transform(self.CenterPad(I1))
+            I_all_lab.append(I1.unsqueeze_(dim=0))
+
+        # assert 1==0
+        I_all_lab = torch.cat(I_all_lab, dim=0)   
+
+        # print(I_all_lab.shape)
+        # assert 1==0 
+
+        I_reference_output = I_reference_video
+        placeholder = torch.zeros_like(I_all_lab)
+        self_ref_flag = torch.ones_like(I_all_lab)
+        # self_ref_flag = self_ref_flag[1:,:,:,:]
+
+        outputs = [
+            I_all_lab,
+            I_reference_output,
+            placeholder,
+            self_ref_flag,
+        ]
+
+        # except Exception as e:
+        #     print("problem in, ", path)
+        #     print(e)
+        #     return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return sum(self.n_clips)
+
+
+
+class VideosDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs = parse_images(self.data_root)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image pairs in %s: %d pairs #####" % (data_root, self.real_len))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            image1_name,
+            image2_name,
+            reference_video_name,
+            flow_forward_name,
+            flow_backward_name,
+            # mask_name,
+
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     reference_name1,
+        #     reference_name2,
+        #     reference_name3,
+        #     reference_name4,
+        #     reference_name5,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     mask_name,
+        #     reference_gt1,
+        #     reference_gt2,
+        #     reference_gt3,
+        #     path,
+        ) = self.image_pairs[index]
+        try:
+            I1 = Image.open(image1_name)
+            I2 = Image.open(image2_name)
+
+            # print(image1_name, np.shape(I2))
+
+            I_reference_video = Image.open(reference_video_name)
+            # I_reference_video = Image.open(image2_name)
+
+            flow_forward = read_flow(flow_forward_name)  # numpy
+            flow_backward = read_flow(flow_backward_name)  # numpy
+
+            # calculate occlusion_mask
+            cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'),flow_backward)
+            cmap_X = cmap_X.squeeze(0)
+            cmap_X = cmap_X.numpy()
+            mask = cmap_X
+            # mask = Image.open(mask_name)
+
+            # binary mask
+            mask = np.array(mask)
+            # print(np.shape(mask))
+            mask[mask < 240] = 0
+            mask[mask >= 240] = 1
+
+            # transform
+            I1 = self.image_transform(self.CenterPad(I1))
+            I2 = self.image_transform(self.CenterPad(I2))
+            I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            mask = self.ToTensor(self.CenterCrop(mask))
+
+            I_reference_output = I_reference_video
+            placeholder = torch.zeros_like(I1)
+            self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I1,
+                I2,
+                I_reference_output,
+                flow_forward,
+                flow_backward,
+                mask,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+
+# 0720修改，tcvc_v2版本的基础上，加了segmask
+class VideosDataset_0618_tcvc_v6(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_0618_tcvc_v6(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)*self.n_imgs_pclip))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            img_seg_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     # mask_name,
+        ) = self.image_pairs[index]
+        try:
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+            I_seg_list = [open(image_seg_name, 'rb') for image_seg_name in img_seg_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            trans_backward = superslomo_transforms.Compose([superslomo_transforms.Resize([216, 384])])
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            I_seg_list = [trans_backward(torch.from_numpy(np.array(pickle.load(I1_seg)))[0:21, :, :].type(torch.FloatTensor)) for I1_seg in I_seg_list]
+
+            I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+            # I1 = self.image_transform(self.CenterPad(I1))
+            # I2 = self.image_transform(self.CenterPad(I2))
+            # I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            # flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            # flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            # mask = self.ToTensor(self.CenterCrop(mask))
+
+            # I_reference_output = I_reference_video
+            # placeholder = torch.zeros_like(I1)
+            # self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I_list,
+                I_seg_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+
+# 0720修改，tcvc_v2版本的基础上，加了segmask
+class VideosDataset_0618_tcvc_v7(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        self.image_size = image_size
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_0618_tcvc_v6(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)*self.n_imgs_pclip))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            img_seg_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        ) = self.image_pairs[index]
+        try:
+            ### define transforms based on RandomCrop
+            self_RandomCrop = RandomCrop(self.image_size)
+            transforms_video_ref = [
+                self_RandomCrop,
+                RGB2Lab(),
+                ToTensor(),
+                Normalize(),
+            ]
+            image_transform_ref = train_transforms.Compose(transforms_video_ref)
+
+
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+            I_seg_list = [open(image_seg_name, 'rb') for image_seg_name in img_seg_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [image_transform_ref(I1) for I1 in I_list]
+            I_seg_list = [self_RandomCrop(torch.from_numpy(np.array(pickle.load(I1_seg)))[0:21, :, :].type(torch.FloatTensor)) for I1_seg in I_seg_list]
+            I1reference_video = image_transform_ref(I1reference_video)
+            I2reference_video = image_transform_ref(I2reference_video)
+
+            # print(I_list[0].shape, I_seg_list[0].shape, np.shape(flow_forward[0]));assert 1==0
+
+            flow_forward = [self_RandomCrop(self.ToTensor(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self_RandomCrop(self.ToTensor(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self_RandomCrop(self.ToTensor(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            # flow_forward = [self.ToTensor(self_RandomCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            # flow_backward = [self.ToTensor(self_RandomCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            # mask_list = [self.ToTensor(self_RandomCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+
+            # I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            # I_seg_list = [trans_backward(torch.from_numpy(np.array(pickle.load(I1_seg)))[0:21, :, :].type(torch.FloatTensor)) for I1_seg in I_seg_list]
+            # I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            # I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            # flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            # flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            # mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            # placeholder = torch.zeros_like(I_list[0])
+            # self_ref_flag = torch.ones_like(I_list[0])
+
+
+            outputs = [
+                I_list,
+                I_seg_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+# 1005修改版本，dataloader处理修改，使用重复epoch v2
+def parse_images_0930_DavidVideovo_repeatEpoch(data_root, max_num_sequence, flag_use_precompute_flo=False, epoch=4000):
+
+    image_pairs = []
+    clips = os.listdir(data_root)
+
+    for c_idx, clip in enumerate(clips):
+        # generate random number
+        RandomNum = random.randint(0,len(os.listdir(os.path.join(data_root, clip)))-max_num_sequence)
+
+        # img 
+        img_list = []
+        flow_forward_list = []
+        flow_backward_list = []
+        # img_names = sorted(os.listdir(os.path.join(data_root, clip)))[:max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+        img_names = sorted(os.listdir(os.path.join(data_root, clip)))[RandomNum : RandomNum + max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+        
+        # debug code 
+        # print(idx_epoch, c_idx, clip, RandomNum, img_names)
+
+        Ireference1_name = os.path.join(data_root, clip, img_names[0])
+        Ireference2_name = os.path.join(data_root, clip, img_names[-1])
+
+        for i_idx in range(len(img_names)):
+            img1_name = os.path.join(data_root, clip, img_names[i_idx])
+            img_list.append(img1_name)
+
+            # print(i_idx, img1_name, img2_name, I_reference_output)
+            # assert 1==0 
+
+            if i_idx < len(img_names) - 1 and flag_use_precompute_flo:
+                flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+                # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+                flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+
+                flow_forward_list.append(flow_forward_name)
+                flow_backward_list.append(flow_backward_name)
+                        
+            # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+        item = (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+            # mask_name,
+        )
+        image_pairs.append(item)
+
+    return image_pairs, len(img_names)
+
+
+
+def parse_images_0930_DavidVideovo(data_root, max_num_sequence, flag_use_precompute_flo=False, epoch=4000):
+
+    image_pairs = []
+    clips = os.listdir(data_root)
+    for idx_epoch in range(epoch):
+        for c_idx, clip in enumerate(clips):
+            # generate random number
+            RandomNum = random.randint(0,len(os.listdir(os.path.join(data_root, clip)))-max_num_sequence)
+
+            # img 
+            img_list = []
+            flow_forward_list = []
+            flow_backward_list = []
+            # img_names = sorted(os.listdir(os.path.join(data_root, clip)))[:max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+            img_names = sorted(os.listdir(os.path.join(data_root, clip)))[RandomNum : RandomNum + max_num_sequence] if max_num_sequence>=2 else sorted(os.listdir(os.path.join(data_root, clip, 'pics')))
+            
+            # debug code 
+            # print(idx_epoch, c_idx, clip, RandomNum, img_names)
+
+            Ireference1_name = os.path.join(data_root, clip, img_names[0])
+            Ireference2_name = os.path.join(data_root, clip, img_names[-1])
+
+            for i_idx in range(len(img_names)):
+                img1_name = os.path.join(data_root, clip, img_names[i_idx])
+                img_list.append(img1_name)
+
+                # print(i_idx, img1_name, img2_name, I_reference_output)
+                # assert 1==0 
+
+                if i_idx < len(img_names) - 1 and flag_use_precompute_flo:
+                    flow_forward_name = os.path.join(data_root, clip, 'flo', 'Forward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' ) 
+                    # flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx].split(".")[0] + '_' + img_names[i_idx+1].split(".")[0] + '.flo' )
+                    flow_backward_name = os.path.join(data_root, clip, 'flo', 'Backward', img_names[i_idx+1].split(".")[0] + '_' + img_names[i_idx].split(".")[0] + '.flo' )
+
+                    flow_forward_list.append(flow_forward_name)
+                    flow_backward_list.append(flow_backward_name)
+                            
+                # print(c_idx, i_idx, clip, img1_name, img2_name, I_reference_output, flow_forward_name, flow_backward_name)
+
+            item = (
+                img_list,
+                Ireference1_name,
+                Ireference2_name,
+                flow_forward_list,
+                flow_backward_list,
+                # mask_name,
+            )
+            image_pairs.append(item)
+
+    return image_pairs, len(img_names)
+
+
+
+# 1005修改，原型VideosDataset_0618_tcvc， 2参考帧， DAVIS+VIDEOVO 修改了dataloader，每个epoch内部重复
+class VideosDataset_0930_DavisVideovo_repeatEpoch(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_0930_DavidVideovo_repeatEpoch(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo, self.epoch)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     # mask_name,
+        ) = self.image_pairs[index]
+        try:
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    # print(len(I_list), type(I1));assert 1==0
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+            # I1 = self.image_transform(self.CenterPad(I1))
+            # I2 = self.image_transform(self.CenterPad(I2))
+            # I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            # flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            # flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            # mask = self.ToTensor(self.CenterCrop(mask))
+
+            # I_reference_output = I_reference_video
+            # placeholder = torch.zeros_like(I1)
+            # self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+
+# 0930修改，原型VideosDataset_0618_tcvc， 2参考帧， DAVIS+VIDEOVO
+class VideosDataset_0930_DavisVideovo(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_0930_DavidVideovo(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo, self.epoch)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)))
+        # self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     # mask_name,
+        ) = self.image_pairs[index]
+        try:
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    # print(len(I_list), type(I1));assert 1==0
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+            # I1 = self.image_transform(self.CenterPad(I1))
+            # I2 = self.image_transform(self.CenterPad(I2))
+            # I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            # flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            # flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            # mask = self.ToTensor(self.CenterCrop(mask))
+
+            # I_reference_output = I_reference_video
+            # placeholder = torch.zeros_like(I1)
+            # self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+
+# 20230227修改，ntire23
+class VideosDataset_20230227_ntire23(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_20230227_ntire23(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)*self.n_imgs_pclip))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     # mask_name,
+        ) = self.image_pairs[index]
+        try:
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    # print(len(I_list), type(I1));assert 1==0
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+            # I1 = self.image_transform(self.CenterPad(I1))
+            # I2 = self.image_transform(self.CenterPad(I2))
+            # I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            # flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            # flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            # mask = self.ToTensor(self.CenterCrop(mask))
+
+            # I_reference_output = I_reference_video
+            # placeholder = torch.zeros_like(I1)
+            # self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+
+
+# 0618修改，视频序列首尾两帧作为参考帧进行训练
+class VideosDataset_0618_tcvc(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        flag_use_precompute_flo,
+        max_num_sequence,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.flag_use_precompute_flo = flag_use_precompute_flo
+        self.max_num_sequence = max_num_sequence
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs, self.n_imgs_pclip = parse_images_0618_tcvc(self.data_root, self.max_num_sequence, self.flag_use_precompute_flo)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image clips in %s: %d clips, n_imgs_pclip %s total_iters_perEpoch %s ##### :" % (data_root, len(self.image_pairs), self.n_imgs_pclip, len(self.image_pairs)*self.n_imgs_pclip))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            img_list,
+            Ireference1_name,
+            Ireference2_name,
+            flow_forward_list,
+            flow_backward_list,
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     # mask_name,
+        ) = self.image_pairs[index]
+        try:
+            # Input Images
+            I_list = [Image.open(image_name).convert('RGB') for image_name in img_list]
+
+            # Reference Image
+            I1reference_video = Image.open(Ireference1_name).convert('RGB')
+            I2reference_video = Image.open(Ireference2_name).convert('RGB')
+
+
+            # Optical flow 
+            flow_forward = [read_flow(flow_forward_name) for flow_forward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+            flow_backward = [read_flow(flow_backward_name) for flow_backward_name in flow_forward_list] if self.flag_use_precompute_flo else []
+
+            # calculate occlusion_mask
+            if self.flag_use_precompute_flo:
+                mask_list = []
+                for i_idx in range(len(I_list)-1):
+                    I1 = I_list[i_idx]
+                    I2 = I_list[i_idx+1]
+
+                    # print(len(I_list), type(I1));assert 1==0
+
+                    cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'), flow_backward[i_idx])
+                    cmap_X = cmap_X.squeeze(0)
+                    cmap_X = cmap_X.numpy()
+                    mask = cmap_X
+                    # mask = Image.open(mask_name)
+
+                    # binary mask
+                    mask = np.array(mask)
+                    # print(np.shape(mask))
+                    mask[mask < 240] = 0
+                    mask[mask >= 240] = 1
+
+                    mask_list.append(mask)
+
+            # transform
+            I_list = [self.image_transform(self.CenterPad(I1)) for I1 in I_list]
+            I1reference_video = self.image_transform(self.CenterPad(I1reference_video))
+            I2reference_video = self.image_transform(self.CenterPad(I2reference_video))
+            flow_forward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_forward] if self.flag_use_precompute_flo else []
+            flow_backward = [self.ToTensor(self.CenterCrop(flow)) for flow in flow_backward] if self.flag_use_precompute_flo else []
+            mask_list = [self.ToTensor(self.CenterCrop(mask)) for mask in mask_list] if self.flag_use_precompute_flo else []
+            placeholder = torch.zeros_like(I_list[0])
+            self_ref_flag = torch.ones_like(I_list[0])
+
+            # I1 = self.image_transform(self.CenterPad(I1))
+            # I2 = self.image_transform(self.CenterPad(I2))
+            # I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            # flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            # flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            # mask = self.ToTensor(self.CenterCrop(mask))
+
+            # I_reference_output = I_reference_video
+            # placeholder = torch.zeros_like(I1)
+            # self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I_list,
+                I1reference_video,
+                I2reference_video,
+                flow_forward,
+                flow_backward,
+                mask_list,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+        # return self.real_len
+
+class VideosDataset_0626_video_interpolation(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data_root,
+        epoch,
+        image_size,
+        image_transform=None,
+        use_google_reference=False,
+        real_reference_probability=1,
+        nonzero_placeholder_probability=0.5,
+    ):
+        self.data_root = data_root
+        self.image_transform = image_transform
+        self.CenterPad = CenterPad(image_size)
+        self.ToTensor = ToTensor()
+        self.CenterCrop = CenterCrop(image_size)
+
+        assert len(self.data_root) > 0, "find no dataroot"
+        self.epoch = epoch
+        self.image_pairs = parse_images(self.data_root)
+        self.real_len = len(self.image_pairs)
+        print("##### parsing image pairs in %s: %d pairs #####" % (data_root, self.real_len))
+        self.image_pairs *= epoch
+        self.use_google_reference = use_google_reference
+        self.real_reference_probability = real_reference_probability
+        self.nonzero_placeholder_probability = nonzero_placeholder_probability
+
+    def __getitem__(self, index):
+        (
+            image1_name,
+            image2_name,
+            reference_video_name,
+            flow_forward_name,
+            flow_backward_name,
+            # mask_name,
+
+        #     image1_name,
+        #     image2_name,
+        #     reference_video_name,
+        #     reference_name1,
+        #     reference_name2,
+        #     reference_name3,
+        #     reference_name4,
+        #     reference_name5,
+        #     flow_forward_name,
+        #     flow_backward_name,
+        #     mask_name,
+        #     reference_gt1,
+        #     reference_gt2,
+        #     reference_gt3,
+        #     path,
+        ) = self.image_pairs[index]
+        try:
+            I1 = Image.open(image1_name)
+            I2 = Image.open(image2_name)
+
+            # print(image1_name, np.shape(I2))
+
+            I_reference_video = Image.open(reference_video_name)
+            # I_reference_video = Image.open(image2_name)
+
+            flow_forward = read_flow(flow_forward_name)  # numpy
+            flow_backward = read_flow(flow_backward_name)  # numpy
+
+            # calculate occlusion_mask
+            cmap_X, warp_X0 = occlusion_mask(I1.convert('L') ,I2.convert('L'),flow_backward)
+            cmap_X = cmap_X.squeeze(0)
+            cmap_X = cmap_X.numpy()
+            mask = cmap_X
+            # mask = Image.open(mask_name)
+
+            # binary mask
+            mask = np.array(mask)
+            # print(np.shape(mask))
+            mask[mask < 240] = 0
+            mask[mask >= 240] = 1
+
+            # transform
+            I1 = self.image_transform(self.CenterPad(I1))
+            I2 = self.image_transform(self.CenterPad(I2))
+            I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+            flow_forward = self.ToTensor(self.CenterCrop(flow_forward))
+            flow_backward = self.ToTensor(self.CenterCrop(flow_backward))
+            mask = self.ToTensor(self.CenterCrop(mask))
+
+            I_reference_output = I_reference_video
+
+            # placeholder = torch.zeros_like(I1)
+            placeholder = torch.zeros_like(I1)
+            placeholder = torch.cat([placeholder[1:,:,:], placeholder, placeholder], dim = 0)
+
+            self_ref_flag = torch.ones_like(I1)
+
+            outputs = [
+                I1,
+                I2,
+                I_reference_output,
+                flow_forward,
+                flow_backward,
+                mask,
+                placeholder,
+                self_ref_flag,
+            ]
+
+        except Exception as e:
+            print("problem in, ", path)
+            print(e)
+            return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+        return outputs
+
+    def __len__(self):
+        return len(self.image_pairs)
+
+
+def batch_lab2rgb_transpose_mc(img_l_mc, img_ab_mc, nrow=8):
+    if isinstance(img_l_mc, Variable):
+        img_l_mc = img_l_mc.data.cpu()
+    if isinstance(img_ab_mc, Variable):
+        img_ab_mc = img_ab_mc.data.cpu()
+
+    if img_l_mc.is_cuda:
+        img_l_mc = img_l_mc.cpu()
+    if img_ab_mc.is_cuda:
+        img_ab_mc = img_ab_mc.cpu()
+
+    assert img_l_mc.dim() == 4 and img_ab_mc.dim() == 4, "only for batch input"
+
+    l_norm, ab_norm = 1.0, 1.0
+    l_mean, ab_mean = 50.0, 0
+    img_l = img_l_mc * l_norm + l_mean
+    img_ab = img_ab_mc * ab_norm + ab_mean
+    pred_lab = torch.cat((img_l, img_ab), dim=1)
+    grid_lab = vutils.make_grid(pred_lab, nrow=nrow).numpy().astype("float64")
+    return (np.clip(color.lab2rgb(grid_lab.transpose((1, 2, 0))), 0, 1) * 255).astype("uint8")
diff --git a/BiSTNet-NTIRE2023/models/ColorVidNet.py b/BiSTNet-NTIRE2023/models/ColorVidNet.py
new file mode 100644
index 0000000..3510b1f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/ColorVidNet.py
@@ -0,0 +1,595 @@
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule
+from mmcv.runner import load_checkpoint
+
+from mmedit.models.common import (PixelShufflePack, ResidualBlockNoBN,
+                                  flow_warp, make_layer)
+from mmedit.models.registry import BACKBONES
+from mmedit.utils import get_root_logger
+
+
+class ATB_block(nn.Module):
+    def __init__(self,channel=2):
+        super(ATB_block, self).__init__()
+        self.channel = channel
+        self.atb_forward = ATB(self.channel)
+        self.atb_backward = ATB(self.channel)
+        self.atb_fuse = ATB(self.channel)
+        self.atb_fuse_videointerp_atb = ATB(self.channel)
+
+    def forward(self, colorvid1, colorvid2, flows_forward, flows_backward):
+        n, c, h, w = colorvid1[0].size()
+        t = len(colorvid1)
+        # print(n,t,c,h,w);assert 1==0
+
+        ab_fuse_videointerp = []
+        ab_fuse_atb = []
+        for i_idx in range(t):
+            # print('**************************************  %s **********************************'%i_idx)
+            a = colorvid1[i_idx][:,1:,:,:]
+            b = colorvid2[i_idx][:,1:,:,:]
+
+            t0 = 1 / (t-1)
+            ti = i_idx * t0
+            # I_current_ab_predict = (1-t)*a + t*b
+            ab_fuse_videointerp.append((1-ti)*a + ti*b)
+
+            # fused = self.atb_fuse_videointerp_atb(colorvid1[i_idx][:,1:,:,:], colorvid2[i_idx][:,1:,:,:])
+            fused = self.atb_fuse_videointerp_atb(a, b)
+            # ab_fuse_atb.append(fused.detach())
+            ab_fuse_atb.append(fused)
+
+        # backward-time propgation
+        backward_propagation = []
+        backward_propagation.append(colorvid2[-1][:,1:,:,:])
+        lr_curr = colorvid2[-1][:,1:,:,:]
+        for i in range(t - 2, -1, -1):
+            flow = flows_backward[i]
+            feat_prop = flow_warp(lr_curr, flow.permute(0, 2, 3, 1), padding_mode='border')
+            feat_atb = self.atb_backward(colorvid2[i][:,1:,:,:], feat_prop)
+
+            lr_curr = feat_atb
+            backward_propagation.append(feat_atb)
+        backward_propagation.reverse()
+
+        # forward-time propagation and upsampling
+        result = []
+        result.append(colorvid1[0][:,1:,:,:])
+        forward_propagation = []
+        forward_propagation.append(colorvid1[0][:,1:,:,:])
+        lr_curr = colorvid1[0][:,1:,:,:]
+        for i in range(0, t-1):
+            flow = flows_forward[i]
+            feat_prop = flow_warp(lr_curr, flow.permute(0, 2, 3, 1), padding_mode='border')
+            feat_atb = self.atb_forward(colorvid1[i+1][:,1:,:,:], feat_prop)
+
+            lr_curr = feat_atb
+            forward_propagation.append(feat_atb)
+
+            if i < t-2:
+                # print(i, len(backward_propagation))
+                feat_fuse = self.atb_fuse(feat_atb, backward_propagation[i+1])
+                # result.append(feat_fuse.detach())
+                result.append(feat_fuse)
+
+        result.append(colorvid2[-1][:,1:,:,:])
+
+
+        # combine ab_fuse_videointerp, ab_fuse_atb and result
+        result_fuse = []
+        for i_idx in range(t):
+            result_fuse_i = torch.cat([result[i_idx], ab_fuse_videointerp[i_idx], ab_fuse_atb[i_idx]], dim=1)
+            result_fuse.append(result_fuse_i)
+
+
+        return result_fuse, ab_fuse_videointerp, ab_fuse_atb
+
+
+
+class ATB(nn.Module):
+    def __init__(self,channel=64):
+        super(ATB, self).__init__()
+        self.channel = channel
+        self.ATB_preconv = nn.Conv2d(self.channel*2, self.channel*2, 3, 1, 1)
+        self.ATB_11 = nn.Conv2d(self.channel, self.channel, 3, 1, 1)
+        self.ATB_12 = nn.Conv2d(self.channel, self.channel, 3, 1, 1)
+        self.ATB_21 = nn.Conv2d(self.channel, self.channel, 3, 1, 1)
+        self.ATB_22 = nn.Conv2d(self.channel, self.channel, 3, 1, 1)
+        self.sigmoid = nn.Sigmoid()
+        self.lrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+
+    def forward(self, fea_x, fea_ref):
+        feat_concat = torch.cat([fea_x, fea_ref], dim=1)
+        feat_fus = self.ATB_preconv(feat_concat)
+        feat_split = torch.split(feat_fus, [self.channel, self.channel], dim=1)
+        # up branch
+        feat_up = self.sigmoid(self.ATB_12(self.lrelu(self.ATB_11(feat_split[0]))))
+        feat_up = fea_ref * feat_up
+        # down branch
+        feat_down = self.sigmoid(self.ATB_21(self.lrelu(self.ATB_22(feat_split[1]))))
+        feat_down = fea_x * feat_down
+        # sum
+        feat_prop = feat_up + feat_down
+        return feat_prop
+
+
+# 0711 v2: with ATB 
+class ColorVidNet_wBasicVSR_v2(nn.Module):
+    def __init__(self, ic, flag_propagation = False, mid_channels = 64):
+        super(ColorVidNet_wBasicVSR_v2, self).__init__()
+        self.conv1_1 = nn.Sequential(nn.Conv2d(ic, 32, 3, 1, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1, 1))
+        self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
+        self.conv1_2norm = nn.BatchNorm2d(64, affine=False)
+        self.conv1_2norm_ss = nn.Conv2d(64, 64, 1, 2, bias=False, groups=64)
+        self.conv2_1 = nn.Conv2d(64, 128, 3, 1, 1)
+        self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
+        self.conv2_2norm = nn.BatchNorm2d(128, affine=False)
+        self.conv2_2norm_ss = nn.Conv2d(128, 128, 1, 2, bias=False, groups=128)
+        self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
+        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
+        self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
+        self.conv3_3norm = nn.BatchNorm2d(256, affine=False)
+        self.conv3_3norm_ss = nn.Conv2d(256, 256, 1, 2, bias=False, groups=256)
+        self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
+        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
+        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
+        self.conv4_3norm = nn.BatchNorm2d(512, affine=False)
+        self.conv5_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv5_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv5_3norm = nn.BatchNorm2d(512, affine=False)
+        self.conv6_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv6_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv6_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+        self.conv6_3norm = nn.BatchNorm2d(512, affine=False)
+        self.conv7_1 = nn.Conv2d(512, 512, 3, 1, 1)
+        self.conv7_2 = nn.Conv2d(512, 512, 3, 1, 1)
+        self.conv7_3 = nn.Conv2d(512, 512, 3, 1, 1)
+        self.conv7_3norm = nn.BatchNorm2d(512, affine=False)
+        self.conv8_1 = nn.ConvTranspose2d(512, 256, 4, 2, 1)
+        self.conv3_3_short = nn.Conv2d(256, 256, 3, 1, 1)
+        self.conv8_2 = nn.Conv2d(256, 256, 3, 1, 1)
+        self.conv8_3 = nn.Conv2d(256, 256, 3, 1, 1)
+        self.conv8_3norm = nn.BatchNorm2d(256, affine=False)
+        self.conv9_1 = nn.ConvTranspose2d(256, 128, 4, 2, 1)
+        self.conv2_2_short = nn.Conv2d(128, 128, 3, 1, 1)
+        self.conv9_2 = nn.Conv2d(128, 128, 3, 1, 1)
+        self.conv9_2norm = nn.BatchNorm2d(128, affine=False)
+        self.conv10_1 = nn.ConvTranspose2d(128, 128, 4, 2, 1)
+        self.conv1_2_short = nn.Conv2d(64, 128, 3, 1, 1)
+        self.conv10_2 = nn.Conv2d(128, 128, 3, 1, 1)
+
+        self.flag_propagation = flag_propagation
+        self.mid_channels = mid_channels
+        # if self.flag_propagation:
+        #     self.conv10_ab = nn.Conv2d(128, self.mid_channels, 1, 1)
+        # else:
+        #     self.conv10_ab = nn.Conv2d(128, 2, 1, 1)
+
+        self.conv10_ab = nn.Conv2d(128, 2, 1, 1)
+
+        # add self.relux_x
+        self.relu1_1 = nn.ReLU()
+        self.relu1_2 = nn.ReLU()
+        self.relu2_1 = nn.ReLU()
+        self.relu2_2 = nn.ReLU()
+        self.relu3_1 = nn.ReLU()
+        self.relu3_2 = nn.ReLU()
+        self.relu3_3 = nn.ReLU()
+        self.relu4_1 = nn.ReLU()
+        self.relu4_2 = nn.ReLU()
+        self.relu4_3 = nn.ReLU()
+        self.relu5_1 = nn.ReLU()
+        self.relu5_2 = nn.ReLU()
+        self.relu5_3 = nn.ReLU()
+        self.relu6_1 = nn.ReLU()
+        self.relu6_2 = nn.ReLU()
+        self.relu6_3 = nn.ReLU()
+        self.relu7_1 = nn.ReLU()
+        self.relu7_2 = nn.ReLU()
+        self.relu7_3 = nn.ReLU()
+        self.relu8_1_comb = nn.ReLU()
+        self.relu8_2 = nn.ReLU()
+        self.relu8_3 = nn.ReLU()
+        self.relu9_1_comb = nn.ReLU()
+        self.relu9_2 = nn.ReLU()
+        self.relu10_1_comb = nn.ReLU()
+        self.relu10_2 = nn.LeakyReLU(0.2, True)
+
+        print("replace all deconv with [nearest + conv]")
+        self.conv8_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(512, 256, 3, 1, 1))
+        self.conv9_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(256, 128, 3, 1, 1))
+        self.conv10_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(128, 128, 3, 1, 1))
+
+        print("replace all batchnorm with instancenorm")
+        self.conv1_2norm = nn.InstanceNorm2d(64)
+        self.conv2_2norm = nn.InstanceNorm2d(128)
+        self.conv3_3norm = nn.InstanceNorm2d(256)
+        self.conv4_3norm = nn.InstanceNorm2d(512)
+        self.conv5_3norm = nn.InstanceNorm2d(512)
+        self.conv6_3norm = nn.InstanceNorm2d(512)
+        self.conv7_3norm = nn.InstanceNorm2d(512)
+        self.conv8_3norm = nn.InstanceNorm2d(256)
+        self.conv9_2norm = nn.InstanceNorm2d(128)
+
+    def forward(self, x):
+        """ x: gray image (1 channel), ab(2 channel), ab_err, ba_err"""
+        conv1_1 = self.relu1_1(self.conv1_1(x))
+        conv1_2 = self.relu1_2(self.conv1_2(conv1_1))
+        conv1_2norm = self.conv1_2norm(conv1_2)
+        conv1_2norm_ss = self.conv1_2norm_ss(conv1_2norm)
+        conv2_1 = self.relu2_1(self.conv2_1(conv1_2norm_ss))
+        conv2_2 = self.relu2_2(self.conv2_2(conv2_1))
+        conv2_2norm = self.conv2_2norm(conv2_2)
+        conv2_2norm_ss = self.conv2_2norm_ss(conv2_2norm)
+        conv3_1 = self.relu3_1(self.conv3_1(conv2_2norm_ss))
+        conv3_2 = self.relu3_2(self.conv3_2(conv3_1))
+        conv3_3 = self.relu3_3(self.conv3_3(conv3_2))
+        conv3_3norm = self.conv3_3norm(conv3_3)
+        conv3_3norm_ss = self.conv3_3norm_ss(conv3_3norm)
+        conv4_1 = self.relu4_1(self.conv4_1(conv3_3norm_ss))
+        conv4_2 = self.relu4_2(self.conv4_2(conv4_1))
+        conv4_3 = self.relu4_3(self.conv4_3(conv4_2))
+        conv4_3norm = self.conv4_3norm(conv4_3)
+        conv5_1 = self.relu5_1(self.conv5_1(conv4_3norm))
+        conv5_2 = self.relu5_2(self.conv5_2(conv5_1))
+        conv5_3 = self.relu5_3(self.conv5_3(conv5_2))
+        conv5_3norm = self.conv5_3norm(conv5_3)
+        conv6_1 = self.relu6_1(self.conv6_1(conv5_3norm))
+        conv6_2 = self.relu6_2(self.conv6_2(conv6_1))
+        conv6_3 = self.relu6_3(self.conv6_3(conv6_2))
+        conv6_3norm = self.conv6_3norm(conv6_3)
+        conv7_1 = self.relu7_1(self.conv7_1(conv6_3norm))
+        conv7_2 = self.relu7_2(self.conv7_2(conv7_1))
+        conv7_3 = self.relu7_3(self.conv7_3(conv7_2))
+        conv7_3norm = self.conv7_3norm(conv7_3)
+        conv8_1 = self.conv8_1(conv7_3norm)
+        conv3_3_short = self.conv3_3_short(conv3_3norm)
+        conv8_1_comb = self.relu8_1_comb(conv8_1 + conv3_3_short)
+        conv8_2 = self.relu8_2(self.conv8_2(conv8_1_comb))
+        conv8_3 = self.relu8_3(self.conv8_3(conv8_2))
+        conv8_3norm = self.conv8_3norm(conv8_3)
+        conv9_1 = self.conv9_1(conv8_3norm)
+        conv2_2_short = self.conv2_2_short(conv2_2norm)
+        conv9_1_comb = self.relu9_1_comb(conv9_1 + conv2_2_short)
+        conv9_2 = self.relu9_2(self.conv9_2(conv9_1_comb))
+        conv9_2norm = self.conv9_2norm(conv9_2)
+        conv10_1 = self.conv10_1(conv9_2norm)
+        conv1_2_short = self.conv1_2_short(conv1_2norm)
+        conv10_1_comb = self.relu10_1_comb(conv10_1 + conv1_2_short)
+        conv10_2 = self.relu10_2(self.conv10_2(conv10_1_comb))
+        conv10_ab = self.conv10_ab(conv10_2)
+
+        if self.flag_propagation:
+            return conv10_ab
+        else:
+            return torch.tanh(conv10_ab) * 128
+
+
+# 00810 v3—1: Add coarse to fine:
+# resolution: 224x384 112x192 56x96
+class ColorVidNet_wBasicVSR_v3(nn.Module):
+    def __init__(self, ic, flag_propagation = False, mid_channels = 64):
+        super(ColorVidNet_wBasicVSR_v3, self).__init__()
+        self.size64 = ColorVidNet_wBasicVSR_v2(ic, flag_propagation = False, mid_channels = 64)
+        self.size128 = ColorVidNet_wBasicVSR_v2(ic + 2, flag_propagation = False, mid_channels = 64)
+        self.size256 = ColorVidNet_wBasicVSR_v2(ic + 2, flag_propagation = False, mid_channels = 64)
+
+        # self.upconv1 = nn.ConvTranspose2d(2, 2, 3, stride=2, padding=1, 
+        #                                                output_padding=1, groups=1, bias=True, dilation=1)
+        # self.upconv2 = nn.ConvTranspose2d(2, 2, 3, stride=2, padding=1, 
+        #                                                output_padding=1, groups=1, bias=True, dilation=1)
+        self.upconv1 = nn.ConvTranspose2d(2, 2, 3, stride=2, padding=1, 
+                                                       output_padding=1, groups=1, bias=True, dilation=1)
+        self.upconv2 = nn.ConvTranspose2d(2, 2, 3, stride=2, padding=1, 
+                                                       output_padding=1, groups=1, bias=True, dilation=1)
+
+    def forward(self, x):
+
+        input_pyr_b1 = x[0]  # 224
+        input_pyr_b2 = x[1]  # 112
+        input_pyr_b3 = x[2]  # 56
+        # print(input_pyr_b1.shape, input_pyr_b2.shape, input_pyr_b3.shape)
+
+        # first layer res64
+        out_size64 = self.size64(input_pyr_b3)
+
+        # second layer res128
+        up_out_size64 = self.upconv1(out_size64)
+        input_size128 = torch.cat([input_pyr_b2, up_out_size64], dim=1)
+        out_size128 = self.size128(input_size128)
+
+        # print(up_out_size64.shape, out_size64.shape, input_size128.shape);assert 1==0
+
+        # third layer res256
+        up_out_size128 = self.upconv2(out_size128)
+        input_size256 = torch.cat([input_pyr_b1, up_out_size128], dim=1)
+        out_size256 = self.size128(input_size256)
+
+        result = [out_size256, out_size128, out_size64]
+        return result
+
+
+class ResidualBlocksWithInputConv_adaptive_for_colorization(nn.Module):
+    """Residual blocks with a convolution in front.
+
+    Args:
+        in_channels (int): Number of input channels of the first conv.
+        out_channels (int): Number of channels of the residual blocks.
+            Default: 64.
+        num_blocks (int): Number of residual blocks. Default: 30.
+    """
+
+    def __init__(self, in_channels, out_channels=64, num_blocks=30):
+        super().__init__()
+
+        main = []
+
+        # a convolution used to match the channels of the residual blocks
+        main.append(nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=True))
+        main.append(nn.LeakyReLU(negative_slope=0.1, inplace=True))
+
+        # residual blocks
+        main.append(
+            make_layer(
+                ResidualBlockNoBN, num_blocks, mid_channels=out_channels))
+
+        main.append(nn.Conv2d(out_channels, 2, 1, 1))
+
+        self.main = nn.Sequential(*main)
+
+    def forward(self, feat):
+        """
+        Forward function for ResidualBlocksWithInputConv.
+
+        Args:
+            feat (Tensor): Input feature with shape (n, in_channels, h, w)
+
+        Returns:
+            Tensor: Output feature with shape (n, out_channels, h, w)
+        """
+
+        output = self.main(feat)
+        return torch.tanh(output) * 128
+
+
+class ResidualBlocksWithInputConv(nn.Module):
+    """Residual blocks with a convolution in front.
+
+    Args:
+        in_channels (int): Number of input channels of the first conv.
+        out_channels (int): Number of channels of the residual blocks.
+            Default: 64.
+        num_blocks (int): Number of residual blocks. Default: 30.
+    """
+
+    def __init__(self, in_channels, out_channels=64, num_blocks=30):
+        super().__init__()
+
+        main = []
+
+        # a convolution used to match the channels of the residual blocks
+        main.append(nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=True))
+        main.append(nn.LeakyReLU(negative_slope=0.1, inplace=True))
+
+        # residual blocks
+        main.append(
+            make_layer(
+                ResidualBlockNoBN, num_blocks, mid_channels=out_channels))
+
+        self.main = nn.Sequential(*main)
+
+    def forward(self, feat):
+        """
+        Forward function for ResidualBlocksWithInputConv.
+
+        Args:
+            feat (Tensor): Input feature with shape (n, in_channels, h, w)
+
+        Returns:
+            Tensor: Output feature with shape (n, out_channels, h, w)
+        """
+        return self.main(feat)
+
+
+class SPyNet(nn.Module):
+    """SPyNet network structure.
+
+    The difference to the SPyNet in [tof.py] is that
+        1. more SPyNetBasicModule is used in this version, and
+        2. no batch normalization is used in this version.
+
+    Paper:
+        Optical Flow Estimation using a Spatial Pyramid Network, CVPR, 2017
+
+    Args:
+        pretrained (str): path for pre-trained SPyNet. Default: None.
+    """
+
+    def __init__(self, pretrained):
+        super().__init__()
+
+        self.basic_module = nn.ModuleList(
+            [SPyNetBasicModule() for _ in range(6)])
+
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=True, logger=logger)
+        elif pretrained is not None:
+            raise TypeError('[pretrained] should be str or None, '
+                            f'but got {type(pretrained)}.')
+
+        self.register_buffer(
+            'mean',
+            torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+        self.register_buffer(
+            'std',
+            torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+
+    def compute_flow(self, ref, supp):
+        """Compute flow from ref to supp.
+
+        Note that in this function, the images are already resized to a
+        multiple of 32.
+
+        Args:
+            ref (Tensor): Reference image with shape of (n, 3, h, w).
+            supp (Tensor): Supporting image with shape of (n, 3, h, w).
+
+        Returns:
+            Tensor: Estimated optical flow: (n, 2, h, w).
+        """
+        n, c, h, w = ref.size()
+
+        # normalize the input images
+        ref = [(ref - self.mean) / self.std]
+        supp = [(supp - self.mean) / self.std]
+
+        # generate downsampled frames
+        for level in range(5):
+            ref.append(
+                F.avg_pool2d(
+                    input=ref[-1],
+                    kernel_size=2,
+                    stride=2,
+                    count_include_pad=False))
+            supp.append(
+                F.avg_pool2d(
+                    input=supp[-1],
+                    kernel_size=2,
+                    stride=2,
+                    count_include_pad=False))
+        ref = ref[::-1]
+        supp = supp[::-1]
+
+        # flow computation
+        flow = ref[0].new_zeros(n, 2, h // 32, w // 32)
+        for level in range(len(ref)):
+            if level == 0:
+                flow_up = flow
+            else:
+                flow_up = F.interpolate(
+                    input=flow,
+                    scale_factor=2,
+                    mode='bilinear',
+                    align_corners=True) * 2.0
+
+            # add the residue to the upsampled flow
+            flow = flow_up + self.basic_module[level](
+                torch.cat([
+                    ref[level],
+                    flow_warp(
+                        supp[level],
+                        flow_up.permute(0, 2, 3, 1),
+                        padding_mode='border'), flow_up
+                ], 1))
+
+        return flow
+
+    def forward(self, ref, supp):
+        """Forward function of SPyNet.
+
+        This function computes the optical flow from ref to supp.
+
+        Args:
+            ref (Tensor): Reference image with shape of (n, 3, h, w).
+            supp (Tensor): Supporting image with shape of (n, 3, h, w).
+
+        Returns:
+            Tensor: Estimated optical flow: (n, 2, h, w).
+        """
+
+        # upsize to a multiple of 32
+        h, w = ref.shape[2:4]
+        w_up = w if (w % 32) == 0 else 32 * (w // 32 + 1)
+        h_up = h if (h % 32) == 0 else 32 * (h // 32 + 1)
+        ref = F.interpolate(
+            input=ref, size=(h_up, w_up), mode='bilinear', align_corners=False)
+        supp = F.interpolate(
+            input=supp,
+            size=(h_up, w_up),
+            mode='bilinear',
+            align_corners=False)
+
+        # compute flow, and resize back to the original resolution
+        flow = F.interpolate(
+            input=self.compute_flow(ref, supp),
+            size=(h, w),
+            mode='bilinear',
+            align_corners=False)
+
+        # adjust the flow values
+        flow[:, 0, :, :] *= float(w) / float(w_up)
+        flow[:, 1, :, :] *= float(h) / float(h_up)
+
+        return flow
+
+
+class SPyNetBasicModule(nn.Module):
+    """Basic Module for SPyNet.
+
+    Paper:
+        Optical Flow Estimation using a Spatial Pyramid Network, CVPR, 2017
+    """
+
+    def __init__(self):
+        super().__init__()
+
+        self.basic_module = nn.Sequential(
+            ConvModule(
+                in_channels=8,
+                out_channels=32,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                norm_cfg=None,
+                act_cfg=dict(type='ReLU')),
+            ConvModule(
+                in_channels=32,
+                out_channels=64,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                norm_cfg=None,
+                act_cfg=dict(type='ReLU')),
+            ConvModule(
+                in_channels=64,
+                out_channels=32,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                norm_cfg=None,
+                act_cfg=dict(type='ReLU')),
+            ConvModule(
+                in_channels=32,
+                out_channels=16,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                norm_cfg=None,
+                act_cfg=dict(type='ReLU')),
+            ConvModule(
+                in_channels=16,
+                out_channels=2,
+                kernel_size=7,
+                stride=1,
+                padding=3,
+                norm_cfg=None,
+                act_cfg=None))
+
+    def forward(self, tensor_input):
+        """
+        Args:
+            tensor_input (Tensor): Input tensor with shape (b, 8, h, w).
+                8 channels contain:
+                [reference image (3), neighbor image (3), initial flow (2)].
+
+        Returns:
+            Tensor: Refined flow with shape (b, 2, h, w)
+        """
+        return self.basic_module(tensor_input)
+
diff --git a/BiSTNet-NTIRE2023/models/ContextualLoss.py b/BiSTNet-NTIRE2023/models/ContextualLoss.py
new file mode 100644
index 0000000..a37a8c6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/ContextualLoss.py
@@ -0,0 +1,303 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from utils.util import feature_normalize
+
+postpa = torchvision.transforms.Compose(
+    [
+        torchvision.transforms.Lambda(lambda x: x.mul_(1.0 / 255)),
+        torchvision.transforms.Normalize(
+            mean=[-0.40760392, -0.45795686, -0.48501961], std=[1, 1, 1]  # add imagenet mean
+        ),
+        torchvision.transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]),  # turn to RGB
+    ]
+)
+postpb = torchvision.transforms.Compose([torchvision.transforms.ToPILImage()])
+
+
+def post_processing(tensor):
+    t = postpa(tensor)  # denormalize the image since the optimized tensor is the normalized one
+    t[t > 1] = 1
+    t[t < 0] = 0
+    img = postpb(t)
+    img = np.array(img)
+    return img
+
+
+class ContextualLoss(nn.Module):
+    """
+    input is Al, Bl, channel = 1, range ~ [0, 255]
+    """
+
+    def __init__(self):
+        super(ContextualLoss, self).__init__()
+        return None
+
+    def forward(self, X_features, Y_features, h=0.1, feature_centering=True):
+        """
+        X_features&Y_features are are feature vectors or feature 2d array
+        h: bandwidth
+        return the per-sample loss
+        """
+        batch_size = X_features.shape[0]
+        feature_depth = X_features.shape[1]
+        feature_size = X_features.shape[2]
+
+        # to normalized feature vectors
+        if feature_centering:
+            X_features = X_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+                dim=-1
+            ).unsqueeze(dim=-1)
+            Y_features = Y_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+                dim=-1
+            ).unsqueeze(dim=-1)
+        X_features = feature_normalize(X_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+        Y_features = feature_normalize(Y_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+
+        # conine distance = 1 - similarity
+        X_features_permute = X_features.permute(0, 2, 1)  # batch_size * feature_size^2 * feature_depth
+        d = 1 - torch.matmul(X_features_permute, Y_features)  # batch_size * feature_size^2 * feature_size^2
+
+        # normalized distance: dij_bar
+        d_norm = d / (torch.min(d, dim=-1, keepdim=True)[0] + 1e-5)  # batch_size * feature_size^2 * feature_size^2
+
+        # pairwise affinity
+        w = torch.exp((1 - d_norm) / h)
+        A_ij = w / torch.sum(w, dim=-1, keepdim=True)
+
+        # contextual loss per sample
+        CX = torch.mean(torch.max(A_ij, dim=1)[0], dim=-1)
+        return -torch.log(CX)
+
+
+class ContextualLoss_forward(nn.Module):
+    """
+    input is Al, Bl, channel = 1, range ~ [0, 255]
+    """
+
+    def __init__(self):
+        super(ContextualLoss_forward, self).__init__()
+        return None
+
+    def forward(self, X_features, Y_features, h=0.1, feature_centering=True):
+        """
+        X_features&Y_features are are feature vectors or feature 2d array
+        h: bandwidth
+        return the per-sample loss
+        """
+        batch_size = X_features.shape[0]
+        feature_depth = X_features.shape[1]
+        feature_size = X_features.shape[2]
+
+        # to normalized feature vectors
+        if feature_centering:
+            X_features = X_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+                dim=-1
+            ).unsqueeze(dim=-1)
+            Y_features = Y_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+                dim=-1
+            ).unsqueeze(dim=-1)
+        X_features = feature_normalize(X_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+        Y_features = feature_normalize(Y_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+
+        # conine distance = 1 - similarity
+        X_features_permute = X_features.permute(0, 2, 1)  # batch_size * feature_size^2 * feature_depth
+        d = 1 - torch.matmul(X_features_permute, Y_features)  # batch_size * feature_size^2 * feature_size^2
+
+        # normalized distance: dij_bar
+        d_norm = d / (torch.min(d, dim=-1, keepdim=True)[0] + 1e-5)  # batch_size * feature_size^2 * feature_size^2
+
+        # pairwise affinity
+        w = torch.exp((1 - d_norm) / h)
+        A_ij = w / torch.sum(w, dim=-1, keepdim=True)
+
+        # contextual loss per sample
+        CX = torch.mean(torch.max(A_ij, dim=-1)[0], dim=1)
+        return -torch.log(CX)
+
+
+class ContextualLoss_complex(nn.Module):
+    """
+    input is Al, Bl, channel = 1, range ~ [0, 255]
+    """
+
+    def __init__(self):
+        super(ContextualLoss_complex, self).__init__()
+        return None
+
+    def forward(self, X_features, Y_features, h=0.1, patch_size=1, direction="forward"):
+        """
+        X_features&Y_features are are feature vectors or feature 2d array
+        h: bandwidth
+        return the per-sample loss
+        """
+        batch_size = X_features.shape[0]
+        feature_depth = X_features.shape[1]
+        feature_size = X_features.shape[2]
+
+        # to normalized feature vectors
+        X_features = X_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+            dim=-1
+        ).unsqueeze(dim=-1)
+        Y_features = Y_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(
+            dim=-1
+        ).unsqueeze(dim=-1)
+        X_features = feature_normalize(X_features)  # batch_size * feature_depth * feature_size^2
+        Y_features = feature_normalize(Y_features)  # batch_size * feature_depth * feature_size^2
+
+        # to normalized feature vectors
+        X_features = F.unfold(
+            X_features, kernel_size=(patch_size, patch_size), stride=(1, 1), padding=(patch_size // 2, patch_size // 2)
+        )  # batch_size * feature_depth_new * feature_size^2
+        Y_features = F.unfold(
+            Y_features, kernel_size=(patch_size, patch_size), stride=(1, 1), padding=(patch_size // 2, patch_size // 2)
+        )  # batch_size * feature_depth_new * feature_size^2
+
+        # conine distance = 1 - similarity
+        X_features_permute = X_features.permute(0, 2, 1)  # batch_size * feature_size^2 * feature_depth
+        d = 1 - torch.matmul(X_features_permute, Y_features)  # batch_size * feature_size^2 * feature_size^2
+
+        # normalized distance: dij_bar
+        d_norm = d / (torch.min(d, dim=-1, keepdim=True)[0] + 1e-5)  # batch_size * feature_size^2 * feature_size^2
+
+        # pairwise affinity
+        w = torch.exp((1 - d_norm) / h)
+        A_ij = w / torch.sum(w, dim=-1, keepdim=True)
+
+        # contextual loss per sample
+        if direction == "forward":
+            CX = torch.mean(torch.max(A_ij, dim=-1)[0], dim=1)
+        else:
+            CX = torch.mean(torch.max(A_ij, dim=1)[0], dim=-1)
+
+        return -torch.log(CX)
+
+
+class ChamferDistance_patch_loss(nn.Module):
+    """
+    input is Al, Bl, channel = 1, range ~ [0, 255]
+    """
+
+    def __init__(self):
+        super(ChamferDistance_patch_loss, self).__init__()
+        return None
+
+    def forward(self, X_features, Y_features, patch_size=3, image_x=None, image_y=None, h=0.1, Y_features_in=None):
+        """
+        X_features&Y_features are are feature vectors or feature 2d array
+        h: bandwidth
+        return the per-sample loss
+        """
+        batch_size = X_features.shape[0]
+        feature_depth = X_features.shape[1]
+        feature_size = X_features.shape[2]
+
+        # to normalized feature vectors
+        X_features = F.unfold(
+            X_features, kernel_size=(patch_size, patch_size), stride=(1, 1), padding=(patch_size // 2, patch_size // 2)
+        )  # batch_size, feature_depth_new * feature_size^2
+        Y_features = F.unfold(
+            Y_features, kernel_size=(patch_size, patch_size), stride=(1, 1), padding=(patch_size // 2, patch_size // 2)
+        )  # batch_size, feature_depth_new * feature_size^2
+
+        if image_x is not None and image_y is not None:
+            image_x = torch.nn.functional.interpolate(image_x, size=(feature_size, feature_size), mode="bilinear").view(
+                batch_size, 3, -1
+            )
+            image_y = torch.nn.functional.interpolate(image_y, size=(feature_size, feature_size), mode="bilinear").view(
+                batch_size, 3, -1
+            )
+
+        X_features_permute = X_features.permute(0, 2, 1)  # batch_size * feature_size^2 * feature_depth
+        similarity_matrix = torch.matmul(X_features_permute, Y_features)  # batch_size * feature_size^2 * feature_size^2
+        NN_index = similarity_matrix.max(dim=-1, keepdim=True)[1].squeeze()
+
+        if Y_features_in is not None:
+            loss = torch.mean((X_features - Y_features_in.detach()) ** 2)
+            Y_features_in = Y_features_in.detach()
+        else:
+            loss = torch.mean((X_features - Y_features[:, :, NN_index].detach()) ** 2)
+            Y_features_in = Y_features[:, :, NN_index].detach()
+
+        # re-arrange image
+        if image_x is not None and image_y is not None:
+            image_y_rearrange = image_y[:, :, NN_index]
+            image_y_rearrange = image_y_rearrange.view(batch_size, 3, feature_size, feature_size)
+            image_x = image_x.view(batch_size, 3, feature_size, feature_size)
+            image_y = image_y.view(batch_size, 3, feature_size, feature_size)
+
+        return loss
+
+
+class ChamferDistance_loss(nn.Module):
+    """
+    input is Al, Bl, channel = 1, range ~ [0, 255]
+    """
+
+    def __init__(self):
+        super(ChamferDistance_loss, self).__init__()
+        return None
+
+    def forward(self, X_features, Y_features, image_x, image_y, h=0.1, Y_features_in=None):
+        """
+        X_features&Y_features are are feature vectors or feature 2d array
+        h: bandwidth
+        return the per-sample loss
+        """
+        batch_size = X_features.shape[0]
+        feature_depth = X_features.shape[1]
+        feature_size = X_features.shape[2]
+
+        # to normalized feature vectors
+        X_features = feature_normalize(X_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+        Y_features = feature_normalize(Y_features).view(
+            batch_size, feature_depth, -1
+        )  # batch_size * feature_depth * feature_size^2
+        image_x = torch.nn.functional.interpolate(image_x, size=(feature_size, feature_size), mode="bilinear").view(
+            batch_size, 3, -1
+        )
+        image_y = torch.nn.functional.interpolate(image_y, size=(feature_size, feature_size), mode="bilinear").view(
+            batch_size, 3, -1
+        )
+
+        X_features_permute = X_features.permute(0, 2, 1)  # batch_size * feature_size^2 * feature_depth
+        similarity_matrix = torch.matmul(X_features_permute, Y_features)  # batch_size * feature_size^2 * feature_size^2
+        NN_index = similarity_matrix.max(dim=-1, keepdim=True)[1].squeeze()
+        if Y_features_in is not None:
+            loss = torch.mean((X_features - Y_features_in.detach()) ** 2)
+            Y_features_in = Y_features_in.detach()
+        else:
+            loss = torch.mean((X_features - Y_features[:, :, NN_index].detach()) ** 2)
+            Y_features_in = Y_features[:, :, NN_index].detach()
+
+        # re-arrange image
+        image_y_rearrange = image_y[:, :, NN_index]
+        image_y_rearrange = image_y_rearrange.view(batch_size, 3, feature_size, feature_size)
+        image_x = image_x.view(batch_size, 3, feature_size, feature_size)
+        image_y = image_y.view(batch_size, 3, feature_size, feature_size)
+
+        return loss, Y_features_in, X_features
+
+
+if __name__ == "__main__":
+    contextual_loss = ContextualLoss()
+    batch_size = 32
+    feature_depth = 8
+    feature_size = 16
+    X_features = torch.zeros(batch_size, feature_depth, feature_size, feature_size)
+    Y_features = torch.zeros(batch_size, feature_depth, feature_size, feature_size)
+
+    cx_loss = contextual_loss(X_features, Y_features, 1)
+    print(cx_loss)
diff --git a/BiSTNet-NTIRE2023/models/FrameColor.py b/BiSTNet-NTIRE2023/models/FrameColor.py
new file mode 100644
index 0000000..140ce4d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/FrameColor.py
@@ -0,0 +1,322 @@
+import torch
+from utils.util import *
+
+def warp_color_wBasicVSR(IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise=0, temperature=0.01):
+    IA_rgb_from_gray = torch.zeros_like(IA_l).repeat(1,1,3,1,1)
+    for i in range(IA_l.size(1)):
+        IA_rgb_from_gray[:, i, :, :, :] = gray2rgb_batch(IA_l[:, i, :, :, :])
+
+    # print(IA_rgb_from_gray.shape)
+    # assert 1==0
+    nonlocal_BA_lab_list = []
+    similarity_map_list = []
+    B_relu1_1, B_relu2_1, B_relu3_1, B_relu4_1, B_relu5_1 = features_B
+    for i in range(IA_l.size(1)):
+        with torch.no_grad():
+            A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1 = vggnet(
+                IA_rgb_from_gray[:, i, :, :, :], ["r12", "r22", "r32", "r42", "r52"], preprocess=True
+            )
+
+        # NOTE: output the feature before normalization
+        features_A = [A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1]
+
+        A_relu2_1 = feature_normalize(A_relu2_1)
+        A_relu3_1 = feature_normalize(A_relu3_1)
+        A_relu4_1 = feature_normalize(A_relu4_1)
+        A_relu5_1 = feature_normalize(A_relu5_1)
+        B_relu2_1 = feature_normalize(B_relu2_1)
+        B_relu3_1 = feature_normalize(B_relu3_1)
+        B_relu4_1 = feature_normalize(B_relu4_1)
+        B_relu5_1 = feature_normalize(B_relu5_1)
+
+        nonlocal_BA_lab_single, similarity_map_single = nonlocal_net(
+            IB_lab,
+            A_relu2_1,
+            A_relu3_1,
+            A_relu4_1,
+            A_relu5_1,
+            B_relu2_1,
+            B_relu3_1,
+            B_relu4_1,
+            B_relu5_1,
+            temperature=temperature,
+        )
+
+        nonlocal_BA_lab_list.append(nonlocal_BA_lab_single)
+        similarity_map_list.append(similarity_map_single)
+    nonlocal_BA_lab = torch.cat(nonlocal_BA_lab_list, dim=0)
+    similarity_map = torch.cat(similarity_map_list, dim=0)
+
+    # print(nonlocal_BA_lab_single.shape, nonlocal_BA_lab.shape)
+    # print(similarity_map_single.shape, similarity_map.shape)
+    # assert 1==0
+    return nonlocal_BA_lab, similarity_map, features_A
+
+
+def warp_color(IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise=0, temperature=0.01):
+    IA_rgb_from_gray = gray2rgb_batch(IA_l)
+    with torch.no_grad():
+        A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1 = vggnet(
+            IA_rgb_from_gray, ["r12", "r22", "r32", "r42", "r52"], preprocess=True
+        )
+        B_relu1_1, B_relu2_1, B_relu3_1, B_relu4_1, B_relu5_1 = features_B
+
+    # NOTE: output the feature before normalization
+    features_A = [A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1]
+
+    A_relu2_1 = feature_normalize(A_relu2_1)
+    A_relu3_1 = feature_normalize(A_relu3_1)
+    A_relu4_1 = feature_normalize(A_relu4_1)
+    A_relu5_1 = feature_normalize(A_relu5_1)
+    B_relu2_1 = feature_normalize(B_relu2_1)
+    B_relu3_1 = feature_normalize(B_relu3_1)
+    B_relu4_1 = feature_normalize(B_relu4_1)
+    B_relu5_1 = feature_normalize(B_relu5_1)
+
+    nonlocal_BA_lab, similarity_map = nonlocal_net(
+        IB_lab,
+        A_relu2_1,
+        A_relu3_1,
+        A_relu4_1,
+        A_relu5_1,
+        B_relu2_1,
+        B_relu3_1,
+        B_relu4_1,
+        B_relu5_1,
+        temperature=temperature,
+    )
+
+    return nonlocal_BA_lab, similarity_map, features_A
+
+def warp_color_v0_baseline_double(IA_l, IB_lab1, IB_lab2, features_B1, features_B2, vggnet, nonlocal_net, colornet, feature_noise=0, temperature=0.01):
+    IA_rgb_from_gray = gray2rgb_batch(IA_l)
+    with torch.no_grad():
+        A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1 = vggnet(
+            IA_rgb_from_gray, ["r12", "r22", "r32", "r42", "r52"], preprocess=True
+        )
+        B1_relu1_1, B1_relu2_1, B1_relu3_1, B1_relu4_1, B1_relu5_1 = features_B1
+        B2_relu1_1, B2_relu2_1, B2_relu3_1, B2_relu4_1, B2_relu5_1 = features_B2
+
+    # NOTE: output the feature before normalization
+    features_A = [A_relu1_1, A_relu2_1, A_relu3_1, A_relu4_1, A_relu5_1]
+
+    A_relu2_1 = feature_normalize(A_relu2_1)
+    A_relu3_1 = feature_normalize(A_relu3_1)
+    A_relu4_1 = feature_normalize(A_relu4_1)
+    A_relu5_1 = feature_normalize(A_relu5_1)
+    B1_relu2_1 = feature_normalize(B1_relu2_1)
+    B1_relu3_1 = feature_normalize(B1_relu3_1)
+    B1_relu4_1 = feature_normalize(B1_relu4_1)
+    B1_relu5_1 = feature_normalize(B1_relu5_1)
+    B2_relu2_1 = feature_normalize(B2_relu2_1)
+    B2_relu3_1 = feature_normalize(B2_relu3_1)
+    B2_relu4_1 = feature_normalize(B2_relu4_1)
+    B2_relu5_1 = feature_normalize(B2_relu5_1)
+
+    nonlocal_BA_lab, similarity_map = nonlocal_net(
+        IB_lab1,
+        IB_lab2,
+        A_relu2_1,
+        A_relu3_1,
+        A_relu4_1,
+        A_relu5_1,
+        B1_relu2_1,
+        B1_relu3_1,
+        B1_relu4_1,
+        B1_relu5_1,
+        B2_relu2_1,
+        B2_relu3_1,
+        B2_relu4_1,
+        B2_relu5_1,
+        temperature=temperature,
+    )
+
+    return nonlocal_BA_lab, similarity_map, features_A
+
+def frame_colorization_wBasicVSR(
+    IA_lab,
+    IB_lab,
+    IA_last_lab,
+    features_B,
+    vggnet,
+    nonlocal_net,
+    colornet,
+    joint_training=True,
+    feature_noise=0,
+    luminance_noise=0,
+    temperature=0.01,
+):
+
+    IA_l = IA_lab[:, :, 0:1, :, :]
+    if luminance_noise:
+        IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+    with torch.autograd.set_grad_enabled(joint_training):
+        '''
+        Args:
+            IA_lab (tensor): Current frame image in lab-channal with shape (n, t, c, h, w), torch.Size([1, 26, 3, 64, 64]) etc
+            nonlocal_BA_lab (tensor): warped ab-channal colors w/L-channal with shape (t, c, h, w), torch.Size([26, 3, 64, 64]) etc
+            similarity_map (tensor): confidence map with shape (t, 1, h, w), torch.Size([26, 1, 64, 64]) etc
+            color_input (tensor): ColorVid input with shape (n, t, 4, h, w), torch.Size([1, 26, 4, 64, 64]) etc
+            IA_ab_predict (tensor): ColorVid output with shape (n, t, 2, h, w), torch.Size([1, 26, 2, 64, 64]) etc
+        '''
+        nonlocal_BA_lab, similarity_map, features_A_gray = warp_color_wBasicVSR(
+            IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+        )
+        nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+
+        color_input = torch.cat((IA_l[0], nonlocal_BA_ab, similarity_map), dim=1).unsqueeze(0)
+        # color_input = torch.cat((IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab), dim=1)
+
+        IA_ab_predict = colornet(color_input)
+
+        # print(IA_l.shape, nonlocal_BA_ab.shape, similarity_map.shape, color_input.shape, IA_ab_predict.shape)
+        # assert 1==0
+
+    return IA_ab_predict, nonlocal_BA_lab, features_A_gray
+
+
+def frame_colorization(
+    IA_lab,
+    IB_lab,
+    IA_last_lab,
+    features_B,
+    vggnet,
+    nonlocal_net,
+    colornet,
+    joint_training=True,
+    feature_noise=0,
+    luminance_noise=0,
+    temperature=0.01,
+):
+
+    IA_l = IA_lab[:, 0:1, :, :]
+    if luminance_noise:
+        IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+    with torch.autograd.set_grad_enabled(joint_training):
+        nonlocal_BA_lab, similarity_map, features_A_gray = warp_color(
+            IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+        )
+        nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+        color_input = torch.cat((IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab), dim=1)
+        IA_ab_predict = colornet(color_input)
+
+    return IA_ab_predict, nonlocal_BA_lab, features_A_gray
+
+def frame_colorization_20230311_tcvc(
+    IA_lab,
+    IB_lab,
+    IA_last_lab,
+    features_B,
+    vggnet,
+    nonlocal_net,
+    colornet,
+    joint_training=True,
+    feature_noise=0,
+    luminance_noise=0,
+    temperature=0.01,
+):
+
+    IA_l = IA_lab[:, 0:1, :, :]
+    if luminance_noise:
+        IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+    with torch.no_grad():
+        nonlocal_BA_lab, similarity_map, features_A_gray = warp_color(
+            IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+        )
+        nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+
+    return nonlocal_BA_lab, similarity_map
+
+def frame_colorization_0618_tcvc(
+    IA_lab,
+    IB_lab,
+    IA_last_lab,
+    features_B,
+    vggnet,
+    nonlocal_net,
+    colornet,
+    joint_training=True,
+    feature_noise=0,
+    luminance_noise=0,
+    temperature=0.01,
+):
+
+    IA_l = IA_lab[:, 0:1, :, :]
+    if luminance_noise:
+        IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+    with torch.no_grad():
+        nonlocal_BA_lab, similarity_map, features_A_gray = warp_color(
+            IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+        )
+        nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+        color_input = torch.cat((IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab), dim=1)
+
+    with torch.autograd.set_grad_enabled(joint_training):
+        IA_ab_predict = colornet(color_input)
+
+    return IA_ab_predict, nonlocal_BA_lab, features_A_gray, similarity_map
+
+def frame_colorization_0618_tcvc_v0_baseline_double(
+    IA_lab,
+    IB_lab1,
+    IB_lab2,
+    IA_last_lab,
+    features_B1,
+    features_B2,
+    vggnet,
+    nonlocal_net,
+    colornet,
+    joint_training=True,
+    feature_noise=0,
+    luminance_noise=0,
+    temperature=0.01,
+):
+
+    IA_l = IA_lab[:, 0:1, :, :]
+    if luminance_noise:
+        IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+    with torch.no_grad():
+        nonlocal_BA_lab, similarity_map, features_A_gray = warp_color_v0_baseline_double(
+            IA_l, IB_lab1, IB_lab2, features_B1, features_B2, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+        )
+        nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+        color_input = torch.cat((IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab), dim=1)
+
+    with torch.autograd.set_grad_enabled(joint_training):
+        IA_ab_predict = colornet(color_input)
+
+    return IA_ab_predict, nonlocal_BA_lab, features_A_gray, similarity_map
+
+
+# def frame_colorization_0618_tcvc(
+#     IA_lab,
+#     IB_lab,
+#     IA_last_lab,
+#     features_B,
+#     vggnet,
+#     nonlocal_net,
+#     colornet,
+#     joint_training=True,
+#     feature_noise=0,
+#     luminance_noise=0,
+#     temperature=0.01,
+# ):
+
+#     IA_l = IA_lab[:, 0:1, :, :]
+#     if luminance_noise:
+#         IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+#     with torch.autograd.set_grad_enabled(joint_training):
+#         nonlocal_BA_lab, similarity_map, features_A_gray = warp_color(
+#             IA_l, IB_lab, features_B, vggnet, nonlocal_net, colornet, feature_noise, temperature=temperature
+#         )
+#         nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+#         color_input = torch.cat((IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab), dim=1)
+#         IA_ab_predict = colornet(color_input)
+
+#     return IA_ab_predict, nonlocal_BA_lab, features_A_gray, similarity_map
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/GAN_models.py b/BiSTNet-NTIRE2023/models/GAN_models.py
new file mode 100644
index 0000000..3dddc07
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/GAN_models.py
@@ -0,0 +1,393 @@
+# DCGAN-like generator and discriminator
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from models.spectral_normalization import SpectralNorm
+
+
+class Generator(nn.Module):
+    def __init__(self, z_dim):
+        super(Generator, self).__init__()
+        self.z_dim = z_dim
+
+        self.model = nn.Sequential(
+            nn.ConvTranspose2d(z_dim, 512, 4, stride=1),
+            nn.InstanceNorm2d(512),
+            nn.ReLU(),
+            nn.ConvTranspose2d(512, 256, 4, stride=2, padding=(1, 1)),
+            nn.InstanceNorm2d(256),
+            nn.ReLU(),
+            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=(1, 1)),
+            nn.InstanceNorm2d(128),
+            nn.ReLU(),
+            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=(1, 1)),
+            nn.InstanceNorm2d(64),
+            nn.ReLU(),
+            nn.ConvTranspose2d(64, channels, 3, stride=1, padding=(1, 1)),
+            nn.Tanh(),
+        )
+
+    def forward(self, z):
+        return self.model(z.view(-1, self.z_dim, 1, 1))
+
+
+channels = 3
+leak = 0.1
+w_g = 4
+
+
+class Discriminator(nn.Module):
+    def __init__(self):
+        super(Discriminator, self).__init__()
+
+        self.conv1 = SpectralNorm(nn.Conv2d(channels, 64, 3, stride=1, padding=(1, 1)))
+        self.conv2 = SpectralNorm(nn.Conv2d(64, 64, 4, stride=2, padding=(1, 1)))
+        self.conv3 = SpectralNorm(nn.Conv2d(64, 128, 3, stride=1, padding=(1, 1)))
+        self.conv4 = SpectralNorm(nn.Conv2d(128, 128, 4, stride=2, padding=(1, 1)))
+        self.conv5 = SpectralNorm(nn.Conv2d(128, 256, 3, stride=1, padding=(1, 1)))
+        self.conv6 = SpectralNorm(nn.Conv2d(256, 256, 4, stride=2, padding=(1, 1)))
+        self.conv7 = SpectralNorm(nn.Conv2d(256, 256, 3, stride=1, padding=(1, 1)))
+        self.conv8 = SpectralNorm(nn.Conv2d(256, 512, 4, stride=2, padding=(1, 1)))
+        self.fc = SpectralNorm(nn.Linear(w_g * w_g * 512, 1))
+
+    def forward(self, x):
+        m = x
+        m = nn.LeakyReLU(leak)(self.conv1(m))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(64)(self.conv2(m)))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(128)(self.conv3(m)))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(128)(self.conv4(m)))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv5(m)))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv6(m)))
+        m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv7(m)))
+        m = nn.LeakyReLU(leak)(self.conv8(m))
+
+        return self.fc(m.view(-1, w_g * w_g * 512))
+
+
+class Self_Attention(nn.Module):
+    """ Self attention Layer"""
+
+    def __init__(self, in_dim):
+        super(Self_Attention, self).__init__()
+        self.chanel_in = in_dim
+
+        self.query_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 1, kernel_size=1))
+        self.key_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 1, kernel_size=1))
+        self.value_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1))
+        self.gamma = nn.Parameter(torch.zeros(1))
+
+        self.softmax = nn.Softmax(dim=-1)  #
+
+    def forward(self, x):
+        """
+        inputs :
+            x : input feature maps( B X C X W X H)
+        returns :
+            out : self attention value + input feature
+            attention: B X N X N (N is Width*Height)
+        """
+        m_batchsize, C, width, height = x.size()
+        proj_query = self.query_conv(x).view(m_batchsize, -1, width * height).permute(0, 2, 1)  # B X CX(N)
+        proj_key = self.key_conv(x).view(m_batchsize, -1, width * height)  # B X C x (*W*H)
+        energy = torch.bmm(proj_query, proj_key)  # transpose check
+        attention = self.softmax(energy)  # BX (N) X (N)
+        proj_value = self.value_conv(x).view(m_batchsize, -1, width * height)  # B X C X N
+
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(m_batchsize, C, width, height)
+
+        out = self.gamma * out + x
+        return out
+
+
+class Discriminator_x64(nn.Module):
+    """
+    Discriminative Network
+    """
+
+    def __init__(self, in_size=6, ndf=64):
+        super(Discriminator_x64, self).__init__()
+        self.in_size = in_size
+        self.ndf = ndf
+
+        self.layer1 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)), nn.LeakyReLU(0.2, inplace=True)
+        )
+        self.layer2 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+            nn.InstanceNorm2d(self.ndf),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+        self.attention = Self_Attention(self.ndf)
+        self.layer3 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+            nn.InstanceNorm2d(self.ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+        self.layer4 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+            nn.InstanceNorm2d(self.ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+        self.layer5 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+            nn.InstanceNorm2d(self.ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+        self.layer6 = nn.Sequential(
+            SpectralNorm(nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1)),
+            nn.InstanceNorm2d(self.ndf * 16),
+            nn.LeakyReLU(0.2, inplace=True),
+        )
+
+        self.last = SpectralNorm(nn.Conv2d(self.ndf * 16, 1, [3, 6], 1, 0))
+
+    def forward(self, input):
+        feature1 = self.layer1(input)
+        feature2 = self.layer2(feature1)
+        feature_attention = self.attention(feature2)
+        feature3 = self.layer3(feature_attention)
+        feature4 = self.layer4(feature3)
+        feature5 = self.layer5(feature4)
+        feature6 = self.layer6(feature5)
+        output = self.last(feature6)
+        output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+        return output, feature4
+
+
+# class Discriminator_WITHOUT_FC_x64_video(nn.Module):
+#     """
+#     Discriminative Network
+#     """
+
+#     def __init__(self, in_size=6, ndf=64):
+#         super(Discriminator_WITHOUT_FC_x64_video, self).__init__()
+#         self.in_size = in_size
+#         self.ndf = ndf
+
+#         self.layer1 = nn.Sequential(
+#             # input size is in_size x 64 x 64
+#             SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)),
+#             nn.LeakyReLU(0.2, inplace=True),
+#         )
+#         self.layer2 = nn.Sequential(
+#             # state size: ndf x 32 x 32
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 2),
+#             nn.LeakyReLU(0.2, inplace=True),
+#         )
+#         self.layer3 = nn.Sequential(
+#             # state size: (ndf * 2) x 16 x 16
+#             SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 4),
+#             nn.LeakyReLU(0.2, inplace=True),
+#         )
+#         self.layer4 = nn.Sequential(
+#             # state size: (ndf * 4) x 8 x 8
+#             # Self_Attention(self.ndf * 4, 'relu'),
+#             SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 8),
+#             nn.LeakyReLU(0.2, inplace=True),
+#         )
+
+#         self.last = SpectralNorm(nn.Conv2d(self.ndf * 8, 1, [3, 6], 1, 0))
+
+#     def forward(self, input):
+#         feature1 = self.layer1(input)
+#         feature2 = self.layer2(feature1)
+#         feature3 = self.layer3(feature2)
+#         feature4 = self.layer4(feature3)
+#         output = self.last(feature4)
+#         output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+#         return output, feature4
+
+
+# class Discriminator_WITHOUT_FC_x64(nn.Module):
+#     """
+#     Discriminative Network
+#     """
+
+#     def __init__(self, in_size=3, ndf=64):
+#         super(Discriminator_WITHOUT_FC_x64, self).__init__()
+#         self.in_size = in_size
+#         self.ndf = ndf
+
+#         self.main = nn.Sequential(
+#             # input size is in_size x 64 x 64
+#             SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: ndf x 32 x 32
+#             Self_Attention(self.ndf),
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 2),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: (ndf * 2) x 16 x 16
+#             SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 4),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: (ndf * 4) x 8 x 8
+#             SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 8),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # sate size: (ndf * 8) x 4 x 4
+#         )
+
+#         self.last = SpectralNorm(nn.Conv2d(self.ndf * 8, 1, 4, 1, 0))
+
+#     def forward(self, input):
+
+#         feature = self.main(input)
+#         output = self.last(feature)
+#         output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+#         return output, feature
+
+
+# class Discriminator_WITHOUT_FC_x64_BIG(nn.Module):
+#     """
+#     Discriminative Network
+#     """
+
+#     def __init__(self, in_size=3, ndf=64):
+#         super(Discriminator_WITHOUT_FC_x64_BIG, self).__init__()
+#         self.in_size = in_size
+#         self.ndf = ndf
+
+#         self.main = nn.Sequential(
+#             # 256 X 256
+#             SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # 128 X 128
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # 64 X 64
+#             Self_Attention(self.ndf),
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 2),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # 32 X 32
+#             SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 4),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # 16 X 16
+#             SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 8),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # 8 X 8
+#             SpectralNorm(nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 16),
+#             nn.LeakyReLU(0.2, inplace=True),
+#         )
+#         # 4 X 4
+#         self.last = SpectralNorm(nn.Conv2d(self.ndf * 16, 1, 4, 1, 0))
+
+#     def forward(self, input):
+
+#         feature = self.main(input)
+#         output = self.last(feature)
+#         output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+#         return output, feature
+
+
+# class Discriminator_WITHOUT_FC_x128(nn.Module):
+#     """
+#     Discriminative Network
+#     """
+
+#     def __init__(self, in_size=3, ndf=64):
+#         super(Discriminator_WITHOUT_FC_x128, self).__init__()
+#         self.in_size = in_size
+#         self.ndf = ndf
+
+#         self.main = nn.Sequential(
+#             # input size is in_size x 128 x 128
+#             SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: ndf x 64 x 64
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: ndf x 32 x 32
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 2),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: (ndf * 24) x 16 x 16
+#             SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 4),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # sate size: (ndf * 4) x 8 x 8
+#             SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 8),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # sate size: (ndf * 8) x 4 x 4
+#         )
+
+#         self.last = SpectralNorm(nn.Conv2d(self.ndf * 8, 1, 4, 1, 0))
+
+#     def forward(self, input):
+
+#         feature = self.main(input)
+#         output = self.last(feature)
+
+#         return output, feature
+
+
+# class Discriminator_WITHOUT_FC_x256(nn.Module):
+#     """
+#     Discriminative Network
+#     """
+
+#     def __init__(self, in_size=3, ndf=64):
+#         super(Discriminator_WITHOUT_FC_x256, self).__init__()
+#         self.in_size = in_size
+#         self.ndf = ndf
+
+#         self.main = nn.Sequential(
+#             # input size is in_size x 256 x 256
+#             SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: ndf x 128 x 128
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: ndf x 64 x 64
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: (ndf * 1) x 32 x 32
+#             SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 2),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # state size: (ndf * 2) x 16 x 16
+#             SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 4),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # sate size: (ndf * 4) x 8 x 8
+#             SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+#             nn.InstanceNorm2d(self.ndf * 8),
+#             nn.LeakyReLU(0.2, inplace=True),
+#             # sate size: (ndf * 8) x 4 x 4
+#         )
+
+#         self.last = SpectralNorm(nn.Conv2d(self.ndf * 8, 1, 4, 1, 0))
+
+#     def forward(self, input):
+
+#         feature = self.main(input)
+#         output = self.last(feature)
+
+#         return output, feature
+
+
+# if __name__ == "__main__":
+#     discriminator1 = Discriminator_WITHOUT_FC_x64()
+#     discriminator2 = Discriminator_WITHOUT_FC_x128()
+#     discriminator3 = Discriminator_WITHOUT_FC_x256()
+#     print(discriminator1)
+#     print(discriminator2)
+#     print(discriminator3)
diff --git a/BiSTNet-NTIRE2023/models/NonlocalNet.py b/BiSTNet-NTIRE2023/models/NonlocalNet.py
new file mode 100644
index 0000000..e4f7db5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/NonlocalNet.py
@@ -0,0 +1,926 @@
+import sys
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models as torch_models
+from utils.util import uncenter_l, vgg_preprocess
+
+from models.vgg19_gray import vgg19_gray, vgg19_gray_new
+
+from torchvision import utils as vutils
+
+def save_image_tensor(input_tensor: torch.Tensor, filename):
+    assert (len(input_tensor.shape) == 4 and input_tensor.shape[0] == 1)
+    input_tensor = input_tensor.clone().detach()
+    input_tensor = input_tensor.to(torch.device('cpu'))
+    vutils.save_image(input_tensor, filename)
+
+def print_wName(var_list):
+    for idx, var in enumerate(var_list):
+        my_var_name = [ k for k,v in locals().iteritems() if v == var][0]
+        print('idx: %s, %s: ,%s'%(my_var_name, var))
+
+
+def find_local_patch(x, patch_size):
+    N, C, H, W = x.shape
+    x_unfold = F.unfold(
+        x, kernel_size=(patch_size, patch_size), padding=(patch_size // 2, patch_size // 2), stride=(1, 1)
+    )
+
+    return x_unfold.view(N, x_unfold.shape[1], H, W)
+
+
+class WeightedAverage(nn.Module):
+    def __init__(
+        self,
+    ):
+        super(WeightedAverage, self).__init__()
+
+    def forward(self, x_lab, patch_size=3, alpha=1, scale_factor=1):
+        # alpha=0: less smooth; alpha=inf: smoother
+        x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+        l = x_lab[:, 0:1, :, :]
+        a = x_lab[:, 1:2, :, :]
+        b = x_lab[:, 2:3, :, :]
+        local_l = find_local_patch(l, patch_size)
+        local_a = find_local_patch(a, patch_size)
+        local_b = find_local_patch(b, patch_size)
+        local_difference_l = (local_l - l) ** 2
+        correlation = nn.functional.softmax(-1 * local_difference_l / alpha, dim=1)
+
+        return torch.cat(
+            (
+                torch.sum(correlation * local_a, dim=1, keepdim=True),
+                torch.sum(correlation * local_b, dim=1, keepdim=True),
+            ),
+            1,
+        )
+
+
+class WeightedAverage_color(nn.Module):
+    """
+    smooth the image according to the color distance in the LAB space
+    """
+
+    def __init__(
+        self,
+    ):
+        super(WeightedAverage_color, self).__init__()
+
+    def forward(self, x_lab, x_lab_predict, patch_size=3, alpha=1, scale_factor=1):
+        """ alpha=0: less smooth; alpha=inf: smoother """
+        x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+        l = uncenter_l(x_lab[:, 0:1, :, :])
+        a = x_lab[:, 1:2, :, :]
+        b = x_lab[:, 2:3, :, :]
+        a_predict = x_lab_predict[:, 1:2, :, :]
+        b_predict = x_lab_predict[:, 2:3, :, :]
+        local_l = find_local_patch(l, patch_size)
+        local_a = find_local_patch(a, patch_size)
+        local_b = find_local_patch(b, patch_size)
+        local_a_predict = find_local_patch(a_predict, patch_size)
+        local_b_predict = find_local_patch(b_predict, patch_size)
+
+        local_color_difference = (local_l - l) ** 2 + (local_a - a) ** 2 + (local_b - b) ** 2
+        correlation = nn.functional.softmax(
+            -1 * local_color_difference / alpha, dim=1
+        )  # so that sum of weights equal to 1
+
+        return torch.cat(
+            (
+                torch.sum(correlation * local_a_predict, dim=1, keepdim=True),
+                torch.sum(correlation * local_b_predict, dim=1, keepdim=True),
+            ),
+            1,
+        )
+
+
+class NonlocalWeightedAverage(nn.Module):
+    def __init__(
+        self,
+    ):
+        super(NonlocalWeightedAverage, self).__init__()
+
+    def forward(self, x_lab, feature, patch_size=3, alpha=0.1, scale_factor=1):
+        # alpha=0: less smooth; alpha=inf: smoother
+        # input feature is normalized feature
+        x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+        batch_size, channel, height, width = x_lab.shape
+        feature = F.interpolate(feature, size=(height, width))
+        batch_size = x_lab.shape[0]
+        x_ab = x_lab[:, 1:3, :, :].view(batch_size, 2, -1)
+        x_ab = x_ab.permute(0, 2, 1)
+
+        local_feature = find_local_patch(feature, patch_size)
+        local_feature = local_feature.view(batch_size, local_feature.shape[1], -1)
+
+        correlation_matrix = torch.matmul(local_feature.permute(0, 2, 1), local_feature)
+        correlation_matrix = nn.functional.softmax(correlation_matrix / alpha, dim=-1)
+
+        weighted_ab = torch.matmul(correlation_matrix, x_ab)
+        weighted_ab = weighted_ab.permute(0, 2, 1).contiguous()
+        weighted_ab = weighted_ab.view(batch_size, 2, height, width)
+        return weighted_ab
+
+
+class CorrelationLayer(nn.Module):
+    def __init__(self, search_range):
+        super(CorrelationLayer, self).__init__()
+        self.search_range = search_range
+
+    def forward(self, x1, x2, alpha=1, raw_output=False, metric="similarity"):
+        shape = list(x1.size())
+        shape[1] = (self.search_range * 2 + 1) ** 2
+        cv = torch.zeros(shape).to(torch.device("cuda"))
+
+        for i in range(-self.search_range, self.search_range + 1):
+            for j in range(-self.search_range, self.search_range + 1):
+                if i < 0:
+                    slice_h, slice_h_r = slice(None, i), slice(-i, None)
+                elif i > 0:
+                    slice_h, slice_h_r = slice(i, None), slice(None, -i)
+                else:
+                    slice_h, slice_h_r = slice(None), slice(None)
+
+                if j < 0:
+                    slice_w, slice_w_r = slice(None, j), slice(-j, None)
+                elif j > 0:
+                    slice_w, slice_w_r = slice(j, None), slice(None, -j)
+                else:
+                    slice_w, slice_w_r = slice(None), slice(None)
+
+                if metric == "similarity":
+                    cv[:, (self.search_range * 2 + 1) * i + j, slice_h, slice_w] = (
+                        x1[:, :, slice_h, slice_w] * x2[:, :, slice_h_r, slice_w_r]
+                    ).sum(1)
+                else:  # patchwise subtraction
+                    cv[:, (self.search_range * 2 + 1) * i + j, slice_h, slice_w] = -(
+                        (x1[:, :, slice_h, slice_w] - x2[:, :, slice_h_r, slice_w_r]) ** 2
+                    ).sum(1)
+
+        # TODO sigmoid?
+        if raw_output:
+            return cv
+        else:
+            return nn.functional.softmax(cv / alpha, dim=1)
+
+
+class Self_Attn(nn.Module):
+    """ Self attention Layer"""
+
+    def __init__(self, in_dim, activation):
+        super(Self_Attn, self).__init__()
+        self.chanel_in = in_dim
+        self.activation = activation
+
+        self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 4, kernel_size=1)
+        self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 4, kernel_size=1)
+        self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x):
+        """
+        inputs :
+            x : input feature maps(B X C X W X H)
+        returns :
+            out : self attention value + input feature
+            attention: B X N X N (N is Width*Height)
+        """
+        m_batchsize, C, width, height = x.size()
+        proj_query = self.query_conv(x).view(m_batchsize, -1, width * height).permute(0, 2, 1)  # B X N X C
+        proj_key = self.key_conv(x).view(m_batchsize, -1, width * height)  # B X C x N
+        energy = torch.bmm(proj_query, proj_key)  # transpose check
+        attention = self.softmax(energy)  # B X (N) X (N)
+        proj_value = self.value_conv(x).view(m_batchsize, -1, width * height)  # B X C X N
+
+        out = torch.bmm(proj_value, attention.permute(0, 1, 2))
+        out = out.view(m_batchsize, C, width, height)
+
+        out = self.gamma * out + x
+        return out
+
+
+class VGG19_pytorch(nn.Module):
+    """
+    NOTE: no need to pre-process the input; input tensor should range in [0,1]
+    """
+
+    def __init__(self, pool="max"):
+        super(VGG19_pytorch, self).__init__()
+        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
+        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
+        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
+        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
+        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
+        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
+        if pool == "max":
+            self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+            self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+            self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
+            self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
+            self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)
+        elif pool == "avg":
+            self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
+            self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
+            self.pool3 = nn.AvgPool2d(kernel_size=2, stride=2)
+            self.pool4 = nn.AvgPool2d(kernel_size=2, stride=2)
+            self.pool5 = nn.AvgPool2d(kernel_size=2, stride=2)
+
+    def forward(self, x, out_keys, preprocess=True):
+        """
+        NOTE: input tensor should range in [0,1]
+        """
+        out = {}
+        if preprocess:
+            x = vgg_preprocess(x)
+        out["r11"] = F.relu(self.conv1_1(x))
+        out["r12"] = F.relu(self.conv1_2(out["r11"]))
+        out["p1"] = self.pool1(out["r12"])
+        out["r21"] = F.relu(self.conv2_1(out["p1"]))
+        out["r22"] = F.relu(self.conv2_2(out["r21"]))
+        out["p2"] = self.pool2(out["r22"])
+        out["r31"] = F.relu(self.conv3_1(out["p2"]))
+        out["r32"] = F.relu(self.conv3_2(out["r31"]))
+        out["r33"] = F.relu(self.conv3_3(out["r32"]))
+        out["r34"] = F.relu(self.conv3_4(out["r33"]))
+        out["p3"] = self.pool3(out["r34"])
+        out["r41"] = F.relu(self.conv4_1(out["p3"]))
+        out["r42"] = F.relu(self.conv4_2(out["r41"]))
+        out["r43"] = F.relu(self.conv4_3(out["r42"]))
+        out["r44"] = F.relu(self.conv4_4(out["r43"]))
+        out["p4"] = self.pool4(out["r44"])
+        out["r51"] = F.relu(self.conv5_1(out["p4"]))
+        out["r52"] = F.relu(self.conv5_2(out["r51"]))
+        out["r53"] = F.relu(self.conv5_3(out["r52"]))
+        out["r54"] = F.relu(self.conv5_4(out["r53"]))
+        out["p5"] = self.pool5(out["r54"])
+        return [out[key] for key in out_keys]
+
+
+class VGG19_feature_color(nn.Module):
+    def __init__(self):
+        super(VGG19_feature_color, self).__init__()
+        # self.select = ['0', '5', '10', '19', '28']  # Select conv1_1 ~ conv5_1 activation maps.
+        self.select = ["1", "6", "11", "20", "29"]  # Select relu1_1 ~ relu5_1 activation maps.
+        self.vgg = torch_models.vgg19(pretrained=True).features
+
+    def forward(self, x):
+        """Extract multiple convolutional feature maps."""
+        features = []
+        for name, layer in self.vgg._modules.items():
+            if name in self.select:
+                x = layer(x)
+                features.append(x)
+        return features
+
+
+class VGG19_feature(nn.Module):
+    # input: [LLL] channels, range=[0,255]
+    def __init__(self, gpu_ids):
+        super(VGG19_feature, self).__init__()
+        self.vgg19_gray = vgg19_gray().cuda()
+
+    def forward(self, A_l, B_l):
+        A_relu3_1, A_relu4_1, A_relu5_1 = self.vgg19_gray(A_l)
+        B_relu3_1, B_relu4_1, B_relu5_1 = self.vgg19_gray(B_l)
+        return A_relu3_1, A_relu4_1, A_relu5_1, B_relu3_1, B_relu4_1, B_relu5_1
+
+
+class WTA_scale(torch.autograd.Function):
+    """
+    We can implement our own custom autograd Functions by subclassing
+    torch.autograd.Function and implementing the forward and backward passes
+    which operate on Tensors.
+    """
+
+    @staticmethod
+    def forward(ctx, input, scale=1e-4):
+        """
+        In the forward pass we receive a Tensor containing the input and return a
+        Tensor containing the output. You can cache arbitrary Tensors for use in the
+        backward pass using the save_for_backward method.
+        """
+        activation_max, index_max = torch.max(input, -1, keepdim=True)
+        input_scale = input * scale  # default: 1e-4
+        # input_scale = input * scale  # default: 1e-4
+        output_max_scale = torch.where(input == activation_max, input, input_scale)
+
+        mask = (input == activation_max).type(torch.float)
+        ctx.save_for_backward(input, mask)
+        return output_max_scale
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """
+        In the backward pass we receive a Tensor containing the gradient of the loss
+        with respect to the output, and we need to compute the gradient of the loss
+        with respect to the input.
+        """
+        # import pdb
+        # pdb.set_trace()
+        input, mask = ctx.saved_tensors
+        mask_ones = torch.ones_like(mask)
+        mask_small_ones = torch.ones_like(mask) * 1e-4
+        # mask_small_ones = torch.ones_like(mask) * 1e-4
+
+        grad_scale = torch.where(mask == 1, mask_ones, mask_small_ones)
+        grad_input = grad_output.clone() * grad_scale
+        return grad_input, None
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, stride=1):
+        super(ResidualBlock, self).__init__()
+        self.padding1 = nn.ReflectionPad2d(padding)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=0, stride=stride)
+        self.bn1 = nn.InstanceNorm2d(out_channels)
+        self.prelu = nn.PReLU()
+        self.padding2 = nn.ReflectionPad2d(padding)
+        self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=0, stride=stride)
+        self.bn2 = nn.InstanceNorm2d(out_channels)
+
+    def forward(self, x):
+        residual = x
+        out = self.padding1(x)
+        out = self.conv1(out)
+        out = self.bn1(out)
+        out = self.prelu(out)
+        out = self.padding2(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out += residual
+        out = self.prelu(out)
+        return out
+
+a = -1
+class WarpNet_debug(nn.Module):
+    """ input is Al, Bl, channel = 1, range~[0,255] """
+
+    def __init__(self, batch_size):
+        super(WarpNet_debug, self).__init__()
+        self.feature_channel = 64
+        self.in_channels = self.feature_channel * 4
+        self.inter_channels = 256
+        # 44*44
+        self.layer2_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=2),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+        self.layer3_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+
+        # 22*22->44*44
+        self.layer4_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        # 11*11->44*44
+        self.layer5_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        self.layer = nn.Sequential(
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+        )
+
+        self.theta = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.phi = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+
+        self.upsampling = nn.Upsample(scale_factor=4)
+
+    def forward(
+        self,
+        B_lab_map,
+        A_relu2_1,
+        A_relu3_1,
+        A_relu4_1,
+        A_relu5_1,
+        B_relu2_1,
+        B_relu3_1,
+        B_relu4_1,
+        B_relu5_1,
+        temperature=0.001 * 5,
+        detach_flag=False,
+        WTA_scale_weight=1,
+        feature_noise=0,
+    ):
+        # flag_check = True
+        flag_check = False
+        check_idx = 0
+
+        '''print a=0 means current pic is: 00000.jpg, etc'''
+        if flag_check:
+            global a 
+            a += 1
+            print(a)
+
+
+        batch_size = B_lab_map.shape[0]
+        channel = B_lab_map.shape[1]
+        image_height = B_lab_map.shape[2]
+        image_width = B_lab_map.shape[3]
+        feature_height = int(image_height / 4)
+        feature_width = int(image_width / 4)
+
+        # scale feature size to 44*44
+        A_feature2_1 = self.layer2_1(A_relu2_1)
+        B_feature2_1 = self.layer2_1(B_relu2_1)
+        A_feature3_1 = self.layer3_1(A_relu3_1)
+        B_feature3_1 = self.layer3_1(B_relu3_1)
+        A_feature4_1 = self.layer4_1(A_relu4_1)
+        B_feature4_1 = self.layer4_1(B_relu4_1)
+        A_feature5_1 = self.layer5_1(A_relu5_1)
+        B_feature5_1 = self.layer5_1(B_relu5_1)
+
+        # concatenate features
+        if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+            A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+            B_feature5_1 = F.pad(B_feature5_1, (0, 0, 1, 1), "replicate")
+        A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+        B_features = self.layer(torch.cat((B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1), 1))
+
+        if a == check_idx and flag_check:
+        # print(A_features.shape)
+        # print(B_features.shape)
+            vutils.save_image(A_features.detach().cpu().permute(1, 0, 2, 3), 'A_features.png', nrow=16, padding=20, normalize=True, pad_value=1)
+            vutils.save_image(B_features.detach().cpu().permute(1, 0, 2, 3), 'B_features.png', nrow=16, padding=20, normalize=True, pad_value=1)
+
+        # pairwise cosine similarity
+        theta = self.theta(A_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # # tmp1 = theta.mean(dim=-1, keepdim=True).clone().detach().cpu()
+        # tmp1 = theta.clone().detach().cpu()
+        theta = theta - theta.mean(dim=-1, keepdim=True)  # center the feature
+        theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+        theta = torch.div(theta, theta_norm)
+        theta_permute = theta.permute(0, 2, 1)  # 2*(feature_height*feature_width)*256
+
+        '''Note! I choise to use gray feature extraction, so only self.theta should be considered'''
+        phi = self.theta(B_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # phi = self.phi(B_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # tmp2 = phi.mean(dim=-1, keepdim=True).clone().detach().cpu()
+        # tmp2 = phi.clone().detach().cpu()
+        phi = phi - phi.mean(dim=-1, keepdim=True)  # center the feature
+        phi_norm = torch.norm(phi, 2, 1, keepdim=True) + sys.float_info.epsilon
+        phi = torch.div(phi, phi_norm)
+        f = torch.matmul(theta_permute, phi)  # 2*(feature_height*feature_width)*(feature_height*feature_width)
+        if detach_flag:
+            f = f.detach()
+
+        # print('check theta - phi equal')
+        # diff = theta - phi
+        # print(diff.sum())
+        # print('check A_features - B_features equal')
+        # diff1 = A_features - A_features
+        # print(diff1.sum())
+        # print('check tmp equal')
+        # diff2 = tmp1 - tmp2
+        # print(diff2.sum())
+
+        f_similarity = f.unsqueeze_(dim=1)
+        similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+        similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+
+        if a == check_idx and flag_check:
+            # similarity map / confidence map
+            print('aaaaaaaaaaaaaaaaaaaaaaaaa')
+            print(f_similarity.shape)
+
+        if a == check_idx and flag_check:
+            print(similarity_map.max(), similarity_map.min())
+            # vutils.save_image(f_similarity.detach().cpu().permute(1, 0, 2, 3), 'f_similarity.png', nrow=1, padding=20, normalize=True, pad_value=1)
+
+        # f can be negative
+        f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+        f_WTA = f_WTA / temperature
+        f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1)  # 2*1936*1936;
+
+        # print('f_div_C_shape')
+        # print(f_div_C.shape)
+        # vutils.save_image(f_div_C.view(1, 2, feature_height, feature_width).detach().cpu().permute(1, 0, 2, 3), 'f_div_C.png', nrow=1, padding=20, normalize=True, pad_value=1)
+
+
+        # downsample the reference color
+        B_lab = F.avg_pool2d(B_lab_map, 4)
+        B_lab = B_lab.view(batch_size, channel, -1)
+        B_lab = B_lab.permute(0, 2, 1)  # 2*1936*channel
+        # multiply the corr map with color
+        y = torch.matmul(f_div_C, B_lab)  # 2*1936*channel
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, channel, feature_height, feature_width)  # 2*3*44*44
+        y = self.upsampling(y)
+        similarity_map = self.upsampling(similarity_map)
+
+        if a == check_idx and flag_check:
+            print('bbbbbbbbbbbbbb')
+            print(similarity_map.shape)
+            save_image_tensor(similarity_map, 'similarity_map.png')
+            vutils.save_image(similarity_map.detach().cpu(), 'similarity_map_v2.png', nrow=1, padding=0, normalize=True, pad_value=1)
+            print('f_similarity: %s-%s similarity_map: %s-%s'%(f_similarity.max(), f_similarity.min(), similarity_map.max(), similarity_map.min()))
+            assert 1==0
+
+        return y, similarity_map
+    
+a = -1
+class WarpNet_debug_v0_baseline_double(nn.Module):
+    """ input is Al, Bl, channel = 1, range~[0,255] """
+
+    def __init__(self, batch_size):
+        super(WarpNet_debug_v0_baseline_double, self).__init__()
+        self.feature_channel = 64
+        self.in_channels = self.feature_channel * 4
+        self.inter_channels = 256
+        # 44*44
+        self.layer2_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=2),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+        self.layer3_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+
+        # 22*22->44*44
+        self.layer4_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        # 11*11->44*44
+        self.layer5_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        self.layer = nn.Sequential(
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+        )
+
+        self.theta = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.phi = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+
+        self.upsampling = nn.Upsample(scale_factor=4)
+
+    def forward(
+        self,
+        B_lab_map1,
+        B_lab_map2,
+        A_relu2_1,
+        A_relu3_1,
+        A_relu4_1,
+        A_relu5_1,
+        B1_relu2_1,
+        B1_relu3_1,
+        B1_relu4_1,
+        B1_relu5_1,
+        B2_relu2_1,
+        B2_relu3_1,
+        B2_relu4_1,
+        B2_relu5_1,
+
+        temperature=0.001 * 5,
+        detach_flag=False,
+        WTA_scale_weight=1,
+        feature_noise=0,
+    ):
+        # flag_check = True
+        flag_check = False
+        check_idx = 0
+
+        '''print a=0 means current pic is: 00000.jpg, etc'''
+        if flag_check:
+            global a 
+            a += 1
+            print(a)
+
+
+        batch_size = B_lab_map1.shape[0]
+        channel = B_lab_map1.shape[1]
+        image_height = B_lab_map1.shape[2]
+        image_width = B_lab_map1.shape[3]
+        feature_height = int(image_height / 4)
+        feature_width = int(image_width / 4)
+
+        # scale feature size to 44*44
+        A_feature2_1 = self.layer2_1(A_relu2_1)
+        B1_feature2_1 = self.layer2_1(B1_relu2_1)
+        B2_feature2_1 = self.layer2_1(B2_relu2_1)
+        A_feature3_1 = self.layer3_1(A_relu3_1)
+        B1_feature3_1 = self.layer3_1(B1_relu3_1)
+        B2_feature3_1 = self.layer3_1(B2_relu3_1)
+        A_feature4_1 = self.layer4_1(A_relu4_1)
+        B1_feature4_1 = self.layer4_1(B1_relu4_1)
+        B2_feature4_1 = self.layer4_1(B2_relu4_1)
+        A_feature5_1 = self.layer5_1(A_relu5_1)
+        B1_feature5_1 = self.layer5_1(B1_relu5_1)
+        B2_feature5_1 = self.layer5_1(B2_relu5_1)
+
+        # concatenate features
+        if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+            A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+            B1_feature5_1 = F.pad(B1_feature5_1, (0, 0, 1, 1), "replicate")
+            B2_feature5_1 = F.pad(B2_feature5_1, (0, 0, 1, 1), "replicate")
+        A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+        B1_features = self.layer(torch.cat((B1_feature2_1, B1_feature3_1, B1_feature4_1, B1_feature5_1), 1))
+        B2_features = self.layer(torch.cat((B2_feature2_1, B2_feature3_1, B2_feature4_1, B2_feature5_1), 1))
+
+        if a == check_idx and flag_check:
+        # print(A_features.shape)
+        # print(B_features.shape)
+            vutils.save_image(A_features.detach().cpu().permute(1, 0, 2, 3), 'A_features.png', nrow=16, padding=20, normalize=True, pad_value=1)
+            vutils.save_image(B1_features.detach().cpu().permute(1, 0, 2, 3), 'B1_features.png', nrow=16, padding=20, normalize=True, pad_value=1)
+            vutils.save_image(B2_features.detach().cpu().permute(1, 0, 2, 3), 'B2_features.png', nrow=16, padding=20, normalize=True, pad_value=1)
+
+        # pairwise cosine similarity
+        theta = self.theta(A_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # # tmp1 = theta.mean(dim=-1, keepdim=True).clone().detach().cpu()
+        # tmp1 = theta.clone().detach().cpu()
+        theta = theta - theta.mean(dim=-1, keepdim=True)  # center the feature
+        theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+        theta = torch.div(theta, theta_norm)
+        theta_permute = theta.permute(0, 2, 1)  # 2*(feature_height*feature_width)*256
+
+        '''Note! I choise to use gray feature extraction, so only self.theta should be considered'''
+        phi1 = self.theta(B1_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        phi2 = self.theta(B2_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # phi = self.phi(B_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        # tmp2 = phi.mean(dim=-1, keepdim=True).clone().detach().cpu()
+        # tmp2 = phi.clone().detach().cpu()
+
+        # ref image 1
+        phi1 = phi1 - phi1.mean(dim=-1, keepdim=True)  # center the feature
+        phi1_norm = torch.norm(phi1, 2, 1, keepdim=True) + sys.float_info.epsilon
+        phi1 = torch.div(phi1, phi1_norm)
+        phi2 = phi2 - phi2.mean(dim=-1, keepdim=True)  # center the feature
+        phi2_norm = torch.norm(phi2, 2, 1, keepdim=True) + sys.float_info.epsilon
+        phi2 = torch.div(phi2, phi2_norm)
+        phi_total = torch.cat([phi1, phi2], dim = 2)
+
+        f = torch.matmul(theta_permute, phi_total)  # 2*(feature_height*feature_width)*(feature_height*feature_width)
+        if detach_flag:
+            f = f.detach()
+        f_similarity = f.unsqueeze_(dim=1)
+        similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+        # print(theta_permute.size(), phi_total.size(), similarity_map.size());assert 1==0
+        # torch.Size([1, 8064, 256]) torch.Size([1, 256, 16128]) torch.Size([1, 1, 8064, 1])
+        similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+        
+        # f can be negative
+        f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+        f_WTA = f_WTA / temperature
+        # print(f_WTA.size()) # torch.Size([1, 1, 8064, 16128])
+        f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1)  # 2*1936*1936;
+        # print(f_div_C.size());assert 1==0 # torch.Size([8064, 16128])
+        # total_len = f_div_C.size()[1]
+        f_div_C1 = f_div_C[:,:f_div_C.size()[0]]
+        f_div_C2 = f_div_C[:,f_div_C.size()[0]:]
+        # print(f_div_C1.size(), f_div_C2.size());assert 1==0
+
+
+        # downsample the reference color
+        B_lab1 = F.avg_pool2d(B_lab_map1, 4)
+        B_lab1 = B_lab1.view(batch_size, channel, -1)
+        B_lab1 = B_lab1.permute(0, 2, 1)  # 2*1936*channel
+        B_lab2 = F.avg_pool2d(B_lab_map2, 4)
+        B_lab2 = B_lab2.view(batch_size, channel, -1)
+        B_lab2 = B_lab2.permute(0, 2, 1)  # 2*1936*channel
+
+        # multiply the corr map with color
+        y = torch.matmul(f_div_C1, B_lab1) + torch.matmul(f_div_C2, B_lab2) # 2*1936*channel
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, channel, feature_height, feature_width)  # 2*3*44*44
+        y = self.upsampling(y)
+        similarity_map = self.upsampling(similarity_map)
+
+        return y, similarity_map
+
+
+class WarpNet(nn.Module):
+    """ input is Al, Bl, channel = 1, range~[0,255] """
+
+    def __init__(self, batch_size):
+        super(WarpNet, self).__init__()
+        self.feature_channel = 64
+        self.in_channels = self.feature_channel * 4
+        self.inter_channels = 256
+        # 44*44
+        self.layer2_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=2),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+        self.layer3_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, 128, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(128),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+        )
+
+        # 22*22->44*44
+        self.layer4_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        # 11*11->44*44
+        self.layer5_1 = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(256),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+            nn.InstanceNorm2d(self.feature_channel),
+            nn.PReLU(),
+            nn.Upsample(scale_factor=2),
+        )
+
+        self.layer = nn.Sequential(
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+            ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+        )
+
+        self.theta = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.phi = nn.Conv2d(
+            in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+        )
+
+        self.upsampling = nn.Upsample(scale_factor=4)
+
+    def forward(
+        self,
+        B_lab_map,
+        A_relu2_1,
+        A_relu3_1,
+        A_relu4_1,
+        A_relu5_1,
+        B_relu2_1,
+        B_relu3_1,
+        B_relu4_1,
+        B_relu5_1,
+        temperature=0.001 * 5,
+        detach_flag=False,
+        WTA_scale_weight=1,
+        feature_noise=0,
+    ):
+        batch_size = B_lab_map.shape[0]
+        channel = B_lab_map.shape[1]
+        image_height = B_lab_map.shape[2]
+        image_width = B_lab_map.shape[3]
+        feature_height = int(image_height / 4)
+        feature_width = int(image_width / 4)
+
+        # scale feature size to 44*44
+        A_feature2_1 = self.layer2_1(A_relu2_1)
+        B_feature2_1 = self.layer2_1(B_relu2_1)
+        A_feature3_1 = self.layer3_1(A_relu3_1)
+        B_feature3_1 = self.layer3_1(B_relu3_1)
+        A_feature4_1 = self.layer4_1(A_relu4_1)
+        B_feature4_1 = self.layer4_1(B_relu4_1)
+        A_feature5_1 = self.layer5_1(A_relu5_1)
+        B_feature5_1 = self.layer5_1(B_relu5_1)
+
+        # concatenate features
+        if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+            A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+            B_feature5_1 = F.pad(B_feature5_1, (0, 0, 1, 1), "replicate")
+        A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+        B_features = self.layer(torch.cat((B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1), 1))
+
+        # pairwise cosine similarity
+        theta = self.theta(A_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        theta = theta - theta.mean(dim=-1, keepdim=True)  # center the feature
+        theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+        theta = torch.div(theta, theta_norm)
+        theta_permute = theta.permute(0, 2, 1)  # 2*(feature_height*feature_width)*256
+        phi = self.phi(B_features).view(batch_size, self.inter_channels, -1)  # 2*256*(feature_height*feature_width)
+        phi = phi - phi.mean(dim=-1, keepdim=True)  # center the feature
+        phi_norm = torch.norm(phi, 2, 1, keepdim=True) + sys.float_info.epsilon
+        phi = torch.div(phi, phi_norm)
+        f = torch.matmul(theta_permute, phi)  # 2*(feature_height*feature_width)*(feature_height*feature_width)
+        if detach_flag:
+            f = f.detach()
+
+        f_similarity = f.unsqueeze_(dim=1)
+        similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+        similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+
+        # f can be negative
+        f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+        f_WTA = f_WTA / temperature
+        f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1)  # 2*1936*1936;
+
+        # downsample the reference color
+        B_lab = F.avg_pool2d(B_lab_map, 4)
+        B_lab = B_lab.view(batch_size, channel, -1)
+        B_lab = B_lab.permute(0, 2, 1)  # 2*1936*channel
+
+        # multiply the corr map with color
+        y = torch.matmul(f_div_C, B_lab)  # 2*1936*channel
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, channel, feature_height, feature_width)  # 2*3*44*44
+        y = self.upsampling(y)
+        similarity_map = self.upsampling(similarity_map)
+
+        return y, similarity_map
diff --git a/BiSTNet-NTIRE2023/models/hed.py b/BiSTNet-NTIRE2023/models/hed.py
new file mode 100644
index 0000000..abb9e4d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/hed.py
@@ -0,0 +1,100 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+##########################################################
+class Network(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        arguments_strModel = 'bsds500'
+
+        self.netVggOne = torch.nn.Sequential(
+            torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False)
+        )
+
+        self.netVggTwo = torch.nn.Sequential(
+            torch.nn.MaxPool2d(kernel_size=2, stride=2),
+            torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False)
+        )
+
+        self.netVggThr = torch.nn.Sequential(
+            torch.nn.MaxPool2d(kernel_size=2, stride=2),
+            torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False)
+        )
+
+        self.netVggFou = torch.nn.Sequential(
+            torch.nn.MaxPool2d(kernel_size=2, stride=2),
+            torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False)
+        )
+
+        self.netVggFiv = torch.nn.Sequential(
+            torch.nn.MaxPool2d(kernel_size=2, stride=2),
+            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False),
+            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
+            torch.nn.ReLU(inplace=False)
+        )
+
+        self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0)
+        self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0)
+        self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0)
+        self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
+        self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
+
+        self.netCombine = torch.nn.Sequential(
+            torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0),
+            torch.nn.Sigmoid()
+        )
+
+        self.load_state_dict({ strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.hub.load_state_dict_from_url(url='http://content.sniklaus.com/github/pytorch-hed/network-' + arguments_strModel + '.pytorch', file_name='hed-' + arguments_strModel).items() })
+    # end
+
+    def forward(self, tenInput):
+        tenInput = tenInput * 255.0
+        tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1)
+
+        tenVggOne = self.netVggOne(tenInput)
+        tenVggTwo = self.netVggTwo(tenVggOne)
+        tenVggThr = self.netVggThr(tenVggTwo)
+        tenVggFou = self.netVggFou(tenVggThr)
+        tenVggFiv = self.netVggFiv(tenVggFou)
+
+        tenScoreOne = self.netScoreOne(tenVggOne)
+        tenScoreTwo = self.netScoreTwo(tenVggTwo)
+        tenScoreThr = self.netScoreThr(tenVggThr)
+        tenScoreFou = self.netScoreFou(tenVggFou)
+        tenScoreFiv = self.netScoreFiv(tenVggFiv)
+
+        tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
+        tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
+        tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
+        tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
+        tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
+
+        return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1))
+    # end
+# end
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/Cityscapes/val/image/examples.png b/BiSTNet-NTIRE2023/models/protoseg_core/Cityscapes/val/image/examples.png
new file mode 100644
index 0000000..8d71b31
Binary files /dev/null and b/BiSTNet-NTIRE2023/models/protoseg_core/Cityscapes/val/image/examples.png differ
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4.json
new file mode 100644
index 0000000..187fd81
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_RMI.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_RMI.json
new file mode 100644
index 0000000..1c70d3f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_RMI.json
@@ -0,0 +1,156 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+        "loss_type": "fs_aux_rmi_loss",
+        "params": {
+          "num_classes": 150,
+          "rmi_radius": 3,
+          "rmi_pool_way": 0,
+          "rmi_pool_size": 3,
+          "rmi_pool_stride": 3,
+          "loss_weight_lambda": 0.5,
+          "loss_weight": 1.0,
+          "lambda_way": 1,
+          "use_sigmoid": false
+        }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_TEST.json
new file mode 100644
index 0000000..e78b22d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_48_D_4_TEST.json
@@ -0,0 +1,160 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "min_side_length": 520,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_SEGFIX.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_SEGFIX.json
new file mode 100644
index 0000000..dd4f381
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/H_SEGFIX.json
@@ -0,0 +1,153 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8,
+      "pred_dt_offset": true
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "fit_stride": 8,
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 512
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize", "random_crop"],
+      "resize": {
+        "min_side_length": 512
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/M_V2_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/M_V2_D_8.json
new file mode 100644
index 0000000..7a23086
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/M_V2_D_8.json
@@ -0,0 +1,154 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "min_side_length": 500,
+      "max_side_length": 1000,
+      "fit_stride": 8,
+      "scale_search": [0.6, 0.8, 1, 1.2, 1.4, 1.6]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "mobilenetv2_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8.json
new file mode 100644
index 0000000..854aeff
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8_TEST.json
new file mode 100644
index 0000000..e3f2342
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_101_D_8_TEST.json
@@ -0,0 +1,160 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "min_side_length": 520,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 16]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_50_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_50_D_8.json
new file mode 100644
index 0000000..e5aff5a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/R_50_D_8.json
@@ -0,0 +1,156 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/TransUNet.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/TransUNet.json
new file mode 100644
index 0000000..69b233c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/TransUNet.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 32,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 32,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 512
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 512
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.0001,
+      "metric": "iters",
+      "lr_policy": "warmup_cosine",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "adamw",
+      "adamw": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/W_38_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/W_38_D_8.json
new file mode 100644
index 0000000..668a41b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/ade20k/W_38_D_8.json
@@ -0,0 +1,154 @@
+{
+    "dataset": "ade20k",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 150,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/ADE20K",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "min_side_length": 500,
+      "max_side_length": 1000,
+      "fit_stride": 8,
+      "scale_search": [0.6, 0.8, 1, 1.2, 1.4, 1.6]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_ade20k_seg",
+      "checkpoints_dir": "./checkpoints/ade20k",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_ade20k_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.02,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4.json
new file mode 100644
index 0000000..080ac8f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4.json
@@ -0,0 +1,146 @@
+{
+    "dataset": "camvid",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 12,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [480, 360],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_MEM.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_MEM.json
new file mode 100644
index 0000000..a8f3765
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_MEM.json
@@ -0,0 +1,149 @@
+{
+    "dataset": "camvid",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 12,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [480, 360],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 3000,
+      "loss_weight": 0.1,
+      "use_rmi": false,
+      "use_lovasz": false,
+      "with_memory": true,
+      "memory_size": 5000,
+      "pixel_update_freq": 10
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_TEST.json
new file mode 100644
index 0000000..9c5e380
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/camvid/H_48_D_4_TEST.json
@@ -0,0 +1,153 @@
+{
+    "dataset": "camvid",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 12,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [480, 360],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "mode": "ss_test",
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "align_method": "only_pad",
+        "input_size": [480, 360]
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "align_method": "only_pad",
+        "input_size": [480, 360]
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip",  "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [480, 360],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 3000,
+      "loss_weight": 0.1,
+      "use_rmi": false,
+      "use_lovasz": false,
+      "with_memory": true,
+      "memory_size": 5000,
+      "pixel_update_freq": 10
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4.json
new file mode 100644
index 0000000..efde451
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4.json
@@ -0,0 +1,141 @@
+{
+    "dataset": "celeba",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "reduce_zero_label": false,
+      "data_dir": "~/DataSet/celeba",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [512, 512]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [512, 512]
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/celeba/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 2000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4_TEST.json
new file mode 100644
index 0000000..45b6577
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/celeba/H_48_D_4_TEST.json
@@ -0,0 +1,153 @@
+{
+    "dataset": "celeba",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/celeba_mask",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+        "batch_size": 4,
+        "mode": "ms_test",
+        "min_side_length": 520,
+        "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+        "data_transformer": {
+          "size_mode": "diverse_size",
+          "fit_stride": 8,
+          "align_method": "only_pad",
+          "pad_mode": "pad_right_down"
+        }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/celeba/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 150000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4.json
new file mode 100644
index 0000000..c5da94a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4.json
@@ -0,0 +1,147 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.1,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_MEM.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_MEM.json
new file mode 100644
index 0000000..c8fb17e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_MEM.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 2000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 1,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 1,
+      "use_rmi": false,
+      "use_lovasz": false,
+      "with_memory": true,
+      "memory_size": 5000,
+      "pixel_update_freq": 10
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_RMI.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_RMI.json
new file mode 100644
index 0000000..dee48a4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_RMI.json
@@ -0,0 +1,138 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_aux_rmi_loss",
+      "params": {
+        "num_classes": 19,
+        "rmi_radius": 3,
+        "rmi_pool_way": 0,
+        "rmi_pool_size": 3,
+        "rmi_pool_stride": 3,
+        "loss_weight_lambda": 0.5,
+        "loss_weight": 1.0,
+        "lambda_way": 1,
+        "use_sigmoid": false
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST.json
new file mode 100644
index 0000000..4a72ebe
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+        "batch_size": 4,
+        "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        "mode": "ms_test",
+        "data_transformer": {
+          "size_mode": "fix_size",
+          "input_size": [2048, 1024],
+          "align_method": "only_pad"
+        }
+    },
+    "test": {
+        "batch_size": 4,
+        "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+        "mode": "ms_test",
+        "data_transformer": {
+          "size_mode": "fix_size",
+          "input_size": [2048, 1024],
+          "align_method": "only_pad"
+        }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false,
+      "with_memory": true,
+      "memory_size": 5000,
+      "pixel_update_freq": 10
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST_DEPTH.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST_DEPTH.json
new file mode 100644
index 0000000..904c607
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_TEST_DEPTH.json
@@ -0,0 +1,147 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "scale_search": [0.75, 1.0, 1.25, 1.5, 1.75, 2],
+      "mode": "ms_test_depth",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "scale_search": [0.75, 1.0, 1.25, 1.5, 1.75, 2],
+      "mode": "ms_test_depth",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_proto.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_proto.json
new file mode 100644
index 0000000..ec60606
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_48_D_4_proto.json
@@ -0,0 +1,148 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 0
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1920, 1088],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "save_prob": "True",
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "critical",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 2000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "protoseg": {
+      "gamma": 0.999,
+      "loss_ppc_weight": 0.01,
+      "loss_ppd_weight": 0.001,
+      "num_prototype": 10,
+      "pretrain_prototype": false,
+      "use_rmi": false,
+      "use_prototype": true,
+      "update_prototype": true,
+      "warmup_iters": 0
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_64_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_64_D_4.json
new file mode 100644
index 0000000..ccb4776
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_64_D_4.json
@@ -0,0 +1,145 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_SEGFIX.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_SEGFIX.json
new file mode 100644
index 0000000..830b434
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/H_SEGFIX.json
@@ -0,0 +1,152 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8,
+      "pred_dt_offset": true
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 16,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+        "trans_seq": ["random_crop"],
+        "random_crop":{
+          "ratio": 1.0,
+          "crop_size": [512, 512],
+          "method": "random",
+          "allow_outside_center": false
+        }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.04,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_16.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_16.json
new file mode 100644
index 0000000..53e4a25
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_16.json
@@ -0,0 +1,126 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "input_size": [2048, 1024],
+      "scale_search": [0.75, 1.0, 1.25],
+      "mode": "ms_test"
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 2, 4],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8.json
new file mode 100644
index 0000000..aba636a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8.json
@@ -0,0 +1,147 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.1,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_EDGE_VOID.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_EDGE_VOID.json
new file mode 100644
index 0000000..238ac55
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_EDGE_VOID.json
@@ -0,0 +1,144 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8,
+      "label_edge2void": true
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_NON_EDGE_VOID.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_NON_EDGE_VOID.json
new file mode 100644
index 0000000..6bb93e9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_NON_EDGE_VOID.json
@@ -0,0 +1,138 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8,
+      "label_non_edge2void": true
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_TEST.json
new file mode 100644
index 0000000..7a2baee
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_101_D_8_TEST.json
@@ -0,0 +1,139 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+      "mode": "ms_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
+      "mode": "ms_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_18_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_18_D_8.json
new file mode 100644
index 0000000..6b6e5b8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_18_D_8.json
@@ -0,0 +1,131 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 12,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet50_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_50_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_50_D_8.json
new file mode 100644
index 0000000..639e52b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/R_50_D_8.json
@@ -0,0 +1,140 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [769, 769],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 12,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet50_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/W_38_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/W_38_D_8.json
new file mode 100644
index 0000000..3e648d6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/W_38_D_8.json
@@ -0,0 +1,131 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "RGB",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [800, 800],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 12,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [800, 800],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.2,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.002,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxohemce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 200000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_16.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_16.json
new file mode 100644
index 0000000..f464087
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_16.json
@@ -0,0 +1,145 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [768, 768],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [768, 768],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255,
+      "mean_value": [0.5, 0.5, 0.5],
+      "mean": [0.5, 0.5, 0.5],
+      "std": [0.5, 0.5, 0.5]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_8.json
new file mode 100644
index 0000000..f7821af
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/cityscapes/X_65_D_8.json
@@ -0,0 +1,145 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [768, 768],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [769, 769],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": []
+    },
+    "normalize": {
+      "div_value": 255,
+      "mean_value": [0.5, 0.5, 0.5],
+      "mean": [0.5, 0.5, 0.5],
+      "std": [0.5, 0.5, 0.5]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/cityscapes",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4.json
new file mode 100644
index 0000000..b5d3207
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4.json
@@ -0,0 +1,172 @@
+{
+    "dataset": "coco_stuff",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 171,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
+                    21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
+                    40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
+                    59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
+                    78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
+                    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
+                    113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
+                    129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
+                    145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
+                    161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
+                    177, 178, 179, 180, 181, 182],
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_coco_stuff_seg",
+      "checkpoints_dir": "./checkpoints/coco_stuff",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_RMI.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_RMI.json
new file mode 100644
index 0000000..c318862
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_RMI.json
@@ -0,0 +1,166 @@
+{
+    "dataset": "coco_stuff",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 171,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
+                    21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
+                    40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
+                    59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
+                    78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
+                    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
+                    113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
+                    129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
+                    145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
+                    161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
+                    177, 178, 179, 180, 181, 182],
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_coco_stuff_seg",
+      "checkpoints_dir": "./checkpoints/coco_stuff",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+        "loss_type": "fs_aux_rmi_loss",
+        "params": {
+          "num_classes": 171,
+          "rmi_radius": 3,
+          "rmi_pool_way": 0,
+          "rmi_pool_size": 3,
+          "rmi_pool_stride": 3,
+          "loss_weight_lambda": 0.5,
+          "loss_weight": 1.0,
+          "lambda_way": 1,
+          "use_sigmoid": false
+        }
+      }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_TEST.json
new file mode 100644
index 0000000..b8eb479
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/H_48_D_4_TEST.json
@@ -0,0 +1,167 @@
+{
+    "dataset": "coco_stuff",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 171,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
+                    21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
+                    40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
+                    59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
+                    78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
+                    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
+                    113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
+                    129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
+                    145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
+                    161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
+                    177, 178, 179, 180, 181, 182],
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "min_side_length": 520,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "min_side_length": 520,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_coco_stuff_seg",
+      "checkpoints_dir": "./checkpoints/coco_stuff",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 50000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8.json
new file mode 100644
index 0000000..181cde5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8.json
@@ -0,0 +1,161 @@
+{
+    "dataset": "coco_stuff",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 171,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
+                    21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
+                    40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
+                    59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
+                    78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
+                    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
+                    113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
+                    129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
+                    145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
+                    161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
+                    177, 178, 179, 180, 181, 182],
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "batch_size": 4,
+      "crop_size": [520, 520],
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2],
+      "data_transformer": {
+        "size_mode": "diverse_size"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_coco_stuff_seg",
+      "checkpoints_dir": "./checkpoints/coco_stuff",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 50000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8_TEST.json
new file mode 100644
index 0000000..6ddee3e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/coco_stuff/R_101_D_8_TEST.json
@@ -0,0 +1,165 @@
+{
+    "dataset": "coco_stuff",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 171,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
+                    21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
+                    40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
+                    59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
+                    78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
+                    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
+                    113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
+                    129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
+                    145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
+                    161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
+                    177, 178, 179, 180, 181, 182],
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "min_side_length": 520,
+      "fit_stride": 8,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_coco_stuff_seg",
+      "checkpoints_dir": "./checkpoints/coco_stuff",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 50000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4.json
new file mode 100644
index 0000000..7e94668
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4.json
@@ -0,0 +1,135 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16
+    },
+   "train": {
+      "batch_size": 32,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "min_side_length": 473,
+      "max_side_length": 846,
+      "scale_search": [0.75, 1.0, 1.25]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_RMI.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_RMI.json
new file mode 100644
index 0000000..ca6ee31
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_RMI.json
@@ -0,0 +1,142 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16
+    },
+   "train": {
+      "batch_size": 32,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "min_side_length": 473,
+      "max_side_length": 846,
+      "scale_search": [0.75, 1.0, 1.25]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+        "loss_type": "fs_aux_rmi_loss",
+        "params": {
+          "num_classes": 20,
+          "rmi_radius": 3,
+          "rmi_pool_way": 0,
+          "rmi_pool_size": 3,
+          "rmi_pool_stride": 3,
+          "loss_weight_lambda": 0.5,
+          "loss_weight": 1.0,
+          "lambda_way": 1,
+          "use_sigmoid": false
+        }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_TEST.json
new file mode 100644
index 0000000..c0e08d8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/H_48_D_4_TEST.json
@@ -0,0 +1,146 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16
+    },
+   "train": {
+      "batch_size": 32,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[0,0,0], [128,0,0], [255,0,0], [0,85,0], [170,0,51],
+                     [255,85,0], [0,0,85], [0,119,221], [85,85,0], [0,85,85],
+                     [85,51,0], [52,86,128], [0,128,0], [0,0,255], [51,170,221], 
+                     [0,255,255], [85,255,170], [170,255,85], [255,255,0], [255,170,0]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_16.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_16.json
new file mode 100644
index 0000000..e94fdbc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_16.json
@@ -0,0 +1,142 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16,
+      "use_edge": "ce2p"
+    },
+   "train": {
+      "batch_size": 32,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "only_small": true,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "min_side_length": 473,
+      "max_side_length": 846,
+      "scale_search": [0.75, 1.0, 1.25]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [104.00698793, 116.66876762, 122.67891434],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[0,0,0], [128,0,0], [255,0,0], [0,85,0], [170,0,51],
+                     [255,85,0], [0,0,85], [0,119,221], [85,85,0], [0,85,85],
+                     [85,51,0], [52,86,128], [0,128,0], [0,0,255], [51,170,221], 
+                     [0,255,255], [85,255,170], [170,255,85], [255,255,0], [255,170,0]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "ce2p_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_8.json
new file mode 100644
index 0000000..24632c9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/R_101_D_8.json
@@ -0,0 +1,142 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16,
+      "use_edge": true
+    },
+   "train": {
+      "batch_size": 32,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "only_small": true,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "min_side_length": 473,
+      "max_side_length": 846,
+      "scale_search": [0.75, 1.0, 1.25]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [104.00698793, 116.66876762, 122.67891434],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "details": {
+      "color_list": [[0,0,0], [128,0,0], [255,0,0], [0,85,0], [170,0,51],
+                     [255,85,0], [0,0,85], [0,119,221], [85,85,0], [0,85,85],
+                     [85,51,0], [52,86,128], [0,128,0], [0,0,255], [51,170,221], 
+                     [0,255,255], [85,255,170], [170,255,85], [255,255,0], [255,170,0]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated16",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 16,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "ce2p_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/W_38_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/W_38_D_8.json
new file mode 100644
index 0000000..430c0e0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/lip/W_38_D_8.json
@@ -0,0 +1,143 @@
+{
+    "dataset": "lip",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "RGB",
+      "num_classes": 20,
+      "reduce_zero_label": false,
+      "data_dir": "~/datasets/LIP",
+      "workers": 16,
+      "use_edge": true
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "only_small": true,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [473, 473],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "min_side_length": 473,
+      "max_side_length": 846,
+      "scale_search": [0.75, 1.0, 1.25]
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "target_size": [473, 473]
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": [[14, 15], [16, 17], [18, 19]]
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 1.5],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [473, 473],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize":{
+        "target_size": [473, 473]
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+     "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_lip_seg",
+      "checkpoints_dir": "./checkpoints/lip",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "wide_resnet38_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/lip/fs_aspocnet_lip_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.007,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 10000,
+      "max_iters": 110000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1
+      }
+    }
+}
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/mapillary/H_48_D_4_1024x1024.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/mapillary/H_48_D_4_1024x1024.json
new file mode 100644
index 0000000..b6675b5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/mapillary/H_48_D_4_1024x1024.json
@@ -0,0 +1,151 @@
+{
+    "dataset": "mapillary",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 65,
+      "label_list": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
+                     11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
+                     21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 
+                     31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
+                     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 
+                     51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
+                     61, 62, 63, 64],
+      "data_dir": "/msravcshare/dataset/mapillary-vista-v1.1",
+      "workers": 8
+   },
+   "train": {
+      "batch_size": 8,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [1024, 1024],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["resize", "random_resize", "random_crop", "random_hflip", "random_brightness"],
+      "resize": {
+        "min_side_length": 1024
+      },
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [1024, 1024],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 1024
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_mapillary_seg",
+      "checkpoints_dir": "./checkpoints/mapillary",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/mapillary/fs_baseocnet_mapillary_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.01,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      },
+      "lambda_poly": {
+        "power": 0.9
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 1000000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4.json
new file mode 100644
index 0000000..38fdf8d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4.json
@@ -0,0 +1,161 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_RMI.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_RMI.json
new file mode 100644
index 0000000..c619edf
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_RMI.json
@@ -0,0 +1,155 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 8,
+      "mode": "ms_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "batch_size": 8,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+        "loss_type": "fs_aux_rmi_loss",
+        "params": {
+          "num_classes": 59,
+          "rmi_radius": 3,
+          "rmi_pool_way": 0,
+          "rmi_pool_size": 3,
+          "rmi_pool_stride": 3,
+          "loss_weight_lambda": 0.5,
+          "loss_weight": 1.0,
+          "lambda_way": 1,
+          "use_sigmoid": false
+        }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_TEST.json
new file mode 100644
index 0000000..9282529
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/H_48_D_4_TEST.json
@@ -0,0 +1,167 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "mode": "ss_test",
+      "batch_size": 4,
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.9
+      }
+    },
+    "contrast": {
+      "proj_dim": 256,
+      "temperature": 0.07,
+      "base_temperature": 0.07,
+      "max_samples": 1024,
+      "max_views": 100,
+      "stride": 8,
+      "warmup_iters": 5000,
+      "loss_weight": 0.1,
+      "use_rmi": false,
+      "with_memory": true,
+      "memory_size": 5000,
+      "pixel_update_freq": 10
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8.json
new file mode 100644
index 0000000..6932b82
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8.json
@@ -0,0 +1,152 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "batch_size": 4,
+      "crop_size": [520, 520],
+      "scale_search": [0.6, 0.8, 1, 1.2, 1.4, 1.6],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":50000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8_TEST.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8_TEST.json
new file mode 100644
index 0000000..05cae65
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/R_101_D_8_TEST.json
@@ -0,0 +1,155 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ms_test",
+      "batch_size": 4,
+      "min_side_length": 520,
+      "scale_search": [0.5, 0.75, 1, 1.25, 1.5, 1.75, 2.0],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 8,
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 1.0,
+      "mean_value": [103, 116, 123],
+      "mean": [102.9801, 115.9465, 122.7717],
+      "std": [1.0, 1.0, 1.0]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/TransUNet.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/TransUNet.json
new file mode 100644
index 0000000..c099763
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/TransUNet.json
@@ -0,0 +1,149 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 32,
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "fit_stride": 32,
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 512
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 512
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.0001,
+      "metric": "iters",
+      "lr_policy": "warmup_cosine",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "adamw",
+      "adamw": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":100000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/W_38_D_8.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/W_38_D_8.json
new file mode 100644
index 0000000..1cb45d3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/pascal_context/W_38_D_8.json
@@ -0,0 +1,152 @@
+{
+    "dataset": "pascal_context",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 59,
+      "reduce_zero_label": true,
+      "data_dir": "~/DataSet/pascal_context",
+      "workers": 8
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [520, 520],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ms_test",
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "test": {
+      "mode": "ss_test",
+      "batch_size": 4,
+      "crop_size": [520, 520],
+      "scale_search": [0.6, 0.8, 1, 1.2, 1.4, 1.6],
+      "data_transformer": {
+        "size_mode": "diverse_size",
+        "align_method": "only_pad",
+        "pad_mode": "pad_right_down"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_hflip", "resize", "random_resize", "random_crop", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_rotate": {
+        "ratio": 1.0,
+        "rotate_degree": 10
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+      "trans_seq": ["resize"],
+      "resize": {
+        "min_side_length": 520
+      },
+      "random_crop": {
+        "ratio": 1.0,
+        "crop_size": [520, 520],
+        "method": "center",
+        "allow_outside_center": false
+      }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_aspocnet_pascal_context_seg",
+      "checkpoints_dir": "./checkpoints/pascal_context",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "model_name": "asp_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/ade20k/fs_aspocnet_pascal_context_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.001,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "warm": {
+        "warm_iters": 1000,
+        "freeze_backbone": false
+      },
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 5000,
+      "max_iters": 30000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd":{
+        "weight_decay": 0.0001,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep":50000,
+        "ohem_thresh": 0.7
+      }
+    }
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/configs/segfix/H_SEGFIX.json b/BiSTNet-NTIRE2023/models/protoseg_core/configs/segfix/H_SEGFIX.json
new file mode 100644
index 0000000..b9db3ae
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/configs/segfix/H_SEGFIX.json
@@ -0,0 +1,152 @@
+{
+    "dataset": "cityscapes",
+    "method": "fcn_segmentor",
+    "data": {
+      "image_tool": "cv2",
+      "input_mode": "BGR",
+      "num_classes": 19,
+      "label_list": [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+      "data_dir": "/msravcshare/dataset/cityscapes",
+      "workers": 8,
+      "pred_dt_offset": true
+    },
+   "train": {
+      "batch_size": 16,
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad",
+        "pad_mode": "random"
+      }
+    },
+    "val": {
+      "batch_size": 4,
+      "mode": "ss_test",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [512, 512],
+        "align_method": "only_pad"
+      }
+    },
+    "test": {
+      "batch_size": 16,
+      "mode": "ss_test",
+      "out_dir": "/msravcshare/dataset/seg_result/cityscapes",
+      "data_transformer": {
+        "size_mode": "fix_size",
+        "input_size": [2048, 1024],
+        "align_method": "only_pad"
+      }
+    },
+    "train_trans": {
+      "trans_seq": ["random_crop", "random_hflip", "random_brightness"],
+      "random_brightness": {
+        "ratio": 1.0,
+        "shift_value": 10
+      },
+      "random_hflip": {
+        "ratio": 0.5,
+        "swap_pair": []
+      },
+      "random_resize": {
+        "ratio": 1.0,
+        "method": "random",
+        "scale_range": [0.5, 2.0],
+        "aspect_range": [0.9, 1.1]
+      },
+      "random_crop":{
+        "ratio": 1.0,
+        "crop_size": [512, 512],
+        "method": "random",
+        "allow_outside_center": false
+      }
+    },
+    "val_trans": {
+        "trans_seq": ["random_crop"],
+        "random_crop":{
+          "ratio": 1.0,
+          "crop_size": [512, 512],
+          "method": "random",
+          "allow_outside_center": false
+        }
+    },
+    "normalize": {
+      "div_value": 255.0,
+      "mean_value": [0.485, 0.456, 0.406],
+      "mean": [0.485, 0.456, 0.406],
+      "std": [0.229, 0.224, 0.225]
+    },
+    "details": {
+      "color_list": [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], [190, 153, 153],
+                     [153, 153, 153], [250, 170, 30], [220, 220, 0], [107, 142, 35], [152, 251, 152],
+                     [70, 130, 180], [220, 20, 60], [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100],
+                     [0, 80, 100], [0, 0, 230], [119, 11, 32]]
+    },
+    "checkpoints": {
+      "checkpoints_name": "fs_baseocnet_cityscapes_seg",
+      "checkpoints_dir": "./checkpoints/segfix/",
+      "save_iters": 1000
+    },
+    "network":{
+      "backbone": "deepbase_resnet101_dilated8",
+      "multi_grid": [1, 1, 1],
+      "model_name": "base_ocnet",
+      "bn_type": "torchsyncbn",
+      "stride": 8,
+      "factors": [[8, 8]],
+      "loss_weights": {
+        "corr_loss": 0.01,
+        "aux_loss": 0.4,
+        "seg_loss": 1.0
+      }
+    },
+    "logging": {
+      "logfile_level": "info",
+      "stdout_level": "info",
+      "log_file": "./log/cityscapes/fs_baseocnet_cityscapes_seg.log",
+      "log_format": "%(asctime)s %(levelname)-7s %(message)s",
+      "rewrite": true
+    },
+    "lr": {
+      "base_lr": 0.04,
+      "metric": "iters",
+      "lr_policy": "lambda_poly",
+      "step": {
+        "gamma": 0.5,
+        "step_size": 100
+      }
+    },
+    "solver": {
+      "display_iter": 10,
+      "test_interval": 1000,
+      "max_iters": 40000
+    },
+    "optim": {
+      "optim_method": "sgd",
+      "adam": {
+        "betas": [0.9, 0.999],
+        "eps": 1e-08,
+        "weight_decay": 0.0001
+      },
+      "sgd": {
+        "weight_decay": 0.0005,
+        "momentum": 0.9,
+        "nesterov": false
+      }
+    },
+    "loss": {
+      "loss_type": "fs_auxce_loss",
+      "params": {
+        "weight_": [0.8194, 0.8946, 0.9416, 1.0091, 0.9925, 0.9740, 1.0804,
+                    1.0192, 0.8528, 0.9771, 0.9139, 0.9744, 1.1098, 0.8883,
+                    1.0639, 1.2476, 1.0729, 1.1323, 1.0365],
+        "ce_weight": [0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754,
+                      1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037,
+                      1.0865, 1.0955, 1.0865, 1.1529, 1.0507],
+        "ce_reduction": "elementwise_mean",
+        "ce_ignore_index": -1,
+        "ohem_minkeep": 100000,
+        "ohem_thresh": 0.9
+      }
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/data_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/data_loader.py
new file mode 100644
index 0000000..bfed14a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/data_loader.py
@@ -0,0 +1,223 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Donny You, RainbowSecret, JingyiXie
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import torch
+from torch.utils import data
+
+import models.protoseg_core.lib.datasets.tools.transforms as trans
+import models.protoseg_core.lib.datasets.tools.cv2_aug_transforms as cv2_aug_trans
+import models.protoseg_core.lib.datasets.tools.pil_aug_transforms as pil_aug_trans
+from models.protoseg_core.lib.datasets.loader.default_loader import DefaultLoader, CSDataTestLoader
+from models.protoseg_core.lib.datasets.loader.ade20k_loader import ADE20KLoader
+from models.protoseg_core.lib.datasets.loader.lip_loader import LipLoader
+from models.protoseg_core.lib.datasets.loader.offset_loader import DTOffsetLoader
+from models.protoseg_core.lib.datasets.tools.collate import collate
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+from models.protoseg_core.lib.utils.distributed import get_world_size, get_rank, is_distributed
+
+
+class DataLoader(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+        from models.protoseg_core.lib.datasets.tools import cv2_aug_transforms
+        self.aug_train_transform = cv2_aug_transforms.CV2AugCompose(self.configer, split='train')
+        self.aug_val_transform = cv2_aug_transforms.CV2AugCompose(self.configer, split='val')
+
+        # div_value=self.configer.get('normalize', 'div_value')
+        # mean=self.configer.get('normalize', 'mean')
+        # std=self.configer.get('normalize', 'std')
+        # print(div_value, mean, std);assert 1==0
+
+        self.img_transform = trans.Compose([
+            trans.ToTensor(),
+            trans.Normalize(div_value=self.configer.get('normalize', 'div_value'),
+                            mean=self.configer.get('normalize', 'mean'),
+                            std=self.configer.get('normalize', 'std')), ])
+
+        self.label_transform = trans.Compose([
+            trans.ToLabel(),
+            trans.ReLabel(255, -1), ])
+
+    def get_dataloader_sampler(self, klass, split, dataset):
+
+        from models.protoseg_core.lib.datasets.loader.multi_dataset_loader import MultiDatasetLoader, MultiDatasetTrainingSampler
+
+        root_dir = self.configer.get('data', 'data_dir')
+        if isinstance(root_dir, list) and len(root_dir) == 1:
+            root_dir = root_dir[0]
+
+        kwargs = dict(
+            dataset=dataset,
+            aug_transform=(self.aug_train_transform if split == 'train' else self.aug_val_transform),
+            img_transform=self.img_transform,
+            label_transform=self.label_transform,
+            configer=self.configer
+        )
+
+        if isinstance(root_dir, str):
+            loader = klass(root_dir, **kwargs)
+            multi_dataset = False
+        elif isinstance(root_dir, list):
+            loader = MultiDatasetLoader(root_dir, klass, **kwargs)
+            multi_dataset = True
+            Log.info('use multi-dataset for {}...'.format(dataset))
+        else:
+            raise RuntimeError('Unknown root dir {}'.format(root_dir))
+
+        if split == 'train':
+            if is_distributed() and multi_dataset:
+                raise RuntimeError('Currently multi dataset doesn\'t support distributed.')
+
+            if is_distributed():
+                sampler = torch.utils.data.distributed.DistributedSampler(loader)
+            elif multi_dataset:
+                sampler = MultiDatasetTrainingSampler(loader)
+            else:
+                sampler = None
+
+        elif split == 'val':
+
+            if is_distributed():
+                sampler = torch.utils.data.distributed.DistributedSampler(loader)
+            else:
+                sampler = None
+
+        return loader, sampler
+
+    def get_trainloader(self):
+        if self.configer.exists('data', 'use_edge') and self.configer.get('data', 'use_edge') == 'ce2p':
+            """
+            ce2p manner:
+            load both the ground-truth label and edge.
+            """
+            Log.info('use edge (follow ce2p) for train...')
+            klass = LipLoader
+
+        elif self.configer.exists('data', 'use_dt_offset') or self.configer.exists('data', 'pred_dt_offset'):
+            """
+            dt-offset manner:
+            load both the ground-truth label and offset (based on distance transform).
+            """
+            Log.info('use distance transform offset loader for train...')
+            klass = DTOffsetLoader
+
+        elif self.configer.exists('train', 'loader') and \
+                (self.configer.get('train', 'loader') == 'ade20k'
+                 or self.configer.get('train', 'loader') == 'pascal_context'
+                 or self.configer.get('train', 'loader') == 'pascal_voc'
+                 or self.configer.get('train', 'loader') == 'coco_stuff'
+                 or self.configer.get('train', 'loader') == 'camvid'):
+            """
+            ADE20KLoader manner:
+            support input images of different shapes.
+            """
+            Log.info('use ADE20KLoader (diverse input shape) for train...')
+            klass = ADE20KLoader
+        else:
+            """
+            Default manner:
+            + support input images of the same shapes.
+            + support distributed training (the performance is more un-stable than non-distributed manner)
+            """
+            Log.info('use the DefaultLoader for train...')
+            klass = DefaultLoader
+        loader, sampler = self.get_dataloader_sampler(klass, 'train', 'train')
+        trainloader = data.DataLoader(
+            loader,
+            batch_size=self.configer.get('train', 'batch_size') // get_world_size(), pin_memory=True,
+            num_workers=self.configer.get('data', 'workers') // get_world_size(),
+            sampler=sampler,
+            shuffle=(sampler is None),
+            drop_last=self.configer.get('data', 'drop_last'),
+            collate_fn=lambda *args: collate(
+                *args, trans_dict=self.configer.get('train', 'data_transformer')
+            )
+        )
+        return trainloader
+
+    def get_valloader(self, dataset=None):
+        dataset = 'val' if dataset is None else dataset
+
+        if self.configer.exists('data', 'use_dt_offset') or self.configer.exists('data', 'pred_dt_offset'):
+            """
+            dt-offset manner:
+            load both the ground-truth label and offset (based on distance transform).
+            """
+            Log.info('use distance transform based offset loader for val ...')
+            klass = DTOffsetLoader
+
+        elif self.configer.get('method') == 'fcn_segmentor':
+            """
+            default manner:
+            load the ground-truth label.
+            """
+            Log.info('use DefaultLoader for val ...')
+            klass = DefaultLoader
+        else:
+            Log.error('Method: {} loader is invalid.'.format(self.configer.get('method')))
+            return None
+
+        loader, sampler = self.get_dataloader_sampler(klass, 'val', dataset)
+        valloader = data.DataLoader(
+            loader,
+            sampler=sampler,
+            batch_size=self.configer.get('val', 'batch_size') // get_world_size(), pin_memory=True,
+            num_workers=self.configer.get('data', 'workers'), shuffle=False,
+            collate_fn=lambda *args: collate(
+                *args, trans_dict=self.configer.get('val', 'data_transformer')
+            )
+        )
+        return valloader
+
+    def get_testloader(self, dataset=None):
+        dataset = 'test' if dataset is None else dataset
+        if self.configer.exists('data', 'use_sw_offset') or self.configer.exists('data', 'pred_sw_offset'):
+            Log.info('use sliding window based offset loader for test ...')
+            test_loader = data.DataLoader(
+                SWOffsetTestLoader(root_dir=self.configer.get('data', 'data_dir'), dataset=dataset,
+                                   img_transform=self.img_transform,
+                                   configer=self.configer),
+                batch_size=self.configer.get('test', 'batch_size'), pin_memory=True,
+                num_workers=self.configer.get('data', 'workers'), shuffle=False,
+                collate_fn=lambda *args: collate(
+                    *args, trans_dict=self.configer.get('test', 'data_transformer')
+                )
+            )
+            return test_loader
+
+        elif self.configer.get('method') == 'fcn_segmentor':
+            Log.info('use CSDataTestLoader for test ...')
+
+            root_dir = self.configer.get('data', 'data_dir')
+            if isinstance(root_dir, list) and len(root_dir) == 1:
+                root_dir = root_dir[0]
+            test_loader = data.DataLoader(
+                CSDataTestLoader(root_dir=root_dir, dataset=dataset,
+                                 img_transform=self.img_transform,
+                                 configer=self.configer),
+                batch_size=self.configer.get('test', 'batch_size'), pin_memory=True,
+                num_workers=self.configer.get('data', 'workers'), shuffle=False,
+                collate_fn=lambda *args: collate(
+                    *args, trans_dict=self.configer.get('test', 'data_transformer')
+                )
+            )
+            return test_loader
+
+
+if __name__ == "__main__":
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/ade20k_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/ade20k_loader.py
new file mode 100644
index 0000000..42ec396
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/ade20k_loader.py
@@ -0,0 +1,194 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: DonnyYou
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import random
+
+import numpy as np
+import torch
+from torch.utils import data
+
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class ADE20KLoader(data.Dataset):
+    def __init__(self, root_dir, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.img_list, self.label_list, self.size_list = self.__list_dirs(root_dir, dataset)
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def _get_batch_per_gpu(self, cur_index):
+        img = ImageHelper.read_image(self.img_list[cur_index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        labelmap = ImageHelper.read_image(self.label_list[cur_index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+        img_size = self.size_list[cur_index]
+        img_out = [img]
+        label_out = [labelmap]
+        for i in range(self.configer.get('train', 'batch_per_gpu')-1):
+            while True:
+                cur_index = (cur_index + random.randint(1, len(self.img_list) - 1)) % len(self.img_list)
+                now_img_size = self.size_list[cur_index]
+                now_mark = 0 if now_img_size[0] > now_img_size[1] else 1
+                mark = 0 if img_size[0] > img_size[1] else 1
+                if now_mark == mark:
+                    img = ImageHelper.read_image(self.img_list[cur_index],
+                                                 tool=self.configer.get('data', 'image_tool'),
+                                                 mode=self.configer.get('data', 'input_mode'))
+                    img_out.append(img)
+                    labelmap = ImageHelper.read_image(self.label_list[cur_index],
+                                                      tool=self.configer.get('data', 'image_tool'), mode='P')
+                    label_out.append(labelmap)
+                    break
+
+        return img_out, label_out
+
+    def __getitem__(self, index):
+        img_out, label_out = self._get_batch_per_gpu(index)
+        img_list = []
+        labelmap_list = []
+        for img, labelmap in zip(img_out, label_out):
+            if self.configer.exists('data', 'label_list'):
+                labelmap = self._encode_label(labelmap)
+
+            if self.configer.exists('data', 'reduce_zero_label'):
+                labelmap = self._reduce_zero_label(labelmap)
+
+            # process for the pascal-voc dataset
+            # ori_target = ImageHelper.tonp(labelmap)
+            # ori_target[ori_target == 255] = -1
+
+            if self.aug_transform is not None:
+                img, labelmap = self.aug_transform(img, labelmap=labelmap)
+
+            if self.img_transform is not None:
+                img = self.img_transform(img)
+
+            if self.label_transform is not None:
+                labelmap = self.label_transform(labelmap)
+
+            img_list.append(img)
+            labelmap_list.append(labelmap)
+
+        border_width = [sample.size(2) for sample in img_list]
+        border_height = [sample.size(1) for sample in img_list]
+        target_width, target_height = max(border_width), max(border_height)
+        if 'fit_stride' in self.configer.get('train', 'data_transformer'):
+            stride = self.configer.get('train', 'data_transformer')['fit_stride']
+            pad_w = 0 if (target_width % stride == 0) else stride - (target_width % stride)  # right
+            pad_h = 0 if (target_height % stride == 0) else stride - (target_height % stride)  # down
+            target_width = target_width + pad_w
+            target_height = target_height + pad_h
+
+        batch_images = torch.zeros(self.configer.get('train', 'batch_per_gpu'), 3, target_height, target_width)
+        batch_labels = torch.ones(self.configer.get('train', 'batch_per_gpu'), target_height, target_width)
+        batch_labels = (batch_labels * -1).long()
+        for i, (img, labelmap) in enumerate(zip(img_list, labelmap_list)):
+            pad_width = target_width - img.size(2)
+            pad_height = target_height - img.size(1)
+            if self.configer.get('train', 'data_transformer')['pad_mode'] == 'random':
+                left_pad = random.randint(0, pad_width)  # pad_left
+                up_pad = random.randint(0, pad_height)  # pad_up
+            else:
+                left_pad = 0
+                up_pad = 0
+
+            batch_images[i, :, up_pad:up_pad+img.size(1), left_pad:left_pad+img.size(2)] = img
+            batch_labels[i, up_pad:up_pad+labelmap.size(0), left_pad:left_pad+labelmap.size(1)] = labelmap
+
+        return dict(
+            img=DataContainer(batch_images, stack=False),
+            labelmap=DataContainer(batch_labels, stack=False),
+        )
+
+    def _reduce_zero_label(self, labelmap):
+        if not self.configer.get('data', 'reduce_zero_label'):
+            return labelmap
+
+        labelmap = np.array(labelmap)
+        encoded_labelmap = labelmap - 1
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def _encode_label(self, labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(shape=(shape[0], shape[1]), dtype=np.float32) * 255
+        for i in range(len(self.configer.get('data', 'label_list'))):
+            class_id = self.configer.get('data', 'label_list')[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        label_list = list()
+        size_list = list()
+        image_dir = os.path.join(root_dir, dataset, 'image')
+        label_dir = os.path.join(root_dir, dataset, 'label')
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        for file_name in os.listdir(label_dir):
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+            label_path = os.path.join(label_dir, file_name)
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} not exists.'.format(label_path))
+                continue
+
+            img_list.append(img_path)
+            label_list.append(label_path)
+            img = ImageHelper.read_image(img_path,
+                                         tool=self.configer.get('data', 'image_tool'),
+                                         mode=self.configer.get('data', 'input_mode'))
+            size_list.append(ImageHelper.get_size(img))
+
+        if dataset == 'train' and self.configer.get('data', 'include_val'):
+            image_dir = os.path.join(root_dir, 'val/image')
+            label_dir = os.path.join(root_dir, 'val/label')
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                img = ImageHelper.read_image(img_path,
+                                             tool=self.configer.get('data', 'image_tool'),
+                                             mode=self.configer.get('data', 'input_mode'))
+                size_list.append(ImageHelper.get_size(img))
+
+        return img_list, label_list, size_list
+
+
+if __name__ == "__main__":
+    # Test cityscapes loader.
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/default_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/default_loader.py
new file mode 100644
index 0000000..0c54ae2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/default_loader.py
@@ -0,0 +1,329 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie, LangHuang, DonnyYou, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pdb
+
+import numpy as np
+from torch.utils import data
+
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class DefaultLoader(data.Dataset):
+    def __init__(self, root_dir, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.img_list, self.label_list, self.name_list = self.__list_dirs(root_dir, dataset)
+        size_mode = self.configer.get(dataset, 'data_transformer')['size_mode']
+        self.is_stack = size_mode != 'diverse_size'
+
+        Log.info('{} {}'.format(dataset, len(self.img_list)))
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        # Log.info('{}'.format(self.img_list[index]))
+        img_size = ImageHelper.get_size(img)
+        labelmap = ImageHelper.read_image(self.label_list[index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+        if self.configer.exists('data', 'label_list'):
+            labelmap = self._encode_label(labelmap)
+
+        if self.configer.exists('data', 'reduce_zero_label'):
+            labelmap = self._reduce_zero_label(labelmap)
+
+        ori_target = ImageHelper.tonp(labelmap)
+        ori_target[ori_target == 255] = -1
+
+        if self.aug_transform is not None:
+            img, labelmap = self.aug_transform(img, labelmap=labelmap)
+
+        border_size = ImageHelper.get_size(img)
+
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+
+        if self.label_transform is not None:
+            labelmap = self.label_transform(labelmap)
+
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=border_size,
+            ori_target=ori_target
+        )
+        return dict(
+            img=DataContainer(img, stack=self.is_stack),
+            labelmap=DataContainer(labelmap, stack=self.is_stack),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+        )
+
+    def _reduce_zero_label(self, labelmap):
+        if not self.configer.get('data', 'reduce_zero_label'):
+            return labelmap
+
+        labelmap = np.array(labelmap)
+        encoded_labelmap = labelmap - 1
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def _encode_label(self, labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(shape=(shape[0], shape[1]), dtype=np.float32) * 255
+        for i in range(len(self.configer.get('data', 'label_list'))):
+            class_id = self.configer.get('data', 'label_list')[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        image_dir = os.path.join(root_dir, dataset, 'image')
+        label_dir = os.path.join(root_dir, dataset, 'label')
+
+        # only change the ground-truth labels of training set
+        if self.configer.exists('data', 'label_edge2void'):
+            label_dir = os.path.join(root_dir, dataset, 'label_edge_void')
+        elif self.configer.exists('data', 'label_non_edge2void'):
+            label_dir = os.path.join(root_dir, dataset, 'label_non_edge_void')
+
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+        lab_extension = img_extension
+        # support the argument to pass the file list used for training/testing
+        file_list_txt = os.environ.get('use_file_list')
+        if file_list_txt is None:
+            files = sorted(os.listdir(image_dir))
+        else:
+            Log.info("Using file list {} for training".format(file_list_txt))
+            with open(os.path.join(root_dir, dataset, 'file_list', file_list_txt)) as f:
+                files = [x.strip() for x in f]
+
+        for file_name in files:
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}'.format(file_name))
+            # label_path = os.path.join(label_dir, image_name + '.png')
+            label_path = os.path.join(label_dir, image_name + '.' + lab_extension)
+            # Log.info('{} {} {}'.format(image_name, img_path, label_path))
+
+            # print(image_dir, label_dir, img_extension, files, image_name, img_path, label_path);assert 1==0
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} {} not exists.'.format(label_path, img_path))
+                continue
+
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_val'):
+            Log.info("Use validation dataset for training.")
+            image_dir = os.path.join(root_dir, 'val/image')
+            label_dir = os.path.join(root_dir, 'val/label')
+
+            # we only use trainval set for training if set include_val
+            if self.configer.get('dataset') == 'pascal_voc':
+                image_dir = os.path.join(root_dir, 'trainval/image')
+                label_dir = os.path.join(root_dir, 'trainval/label') 
+                img_list.clear()
+                label_list.clear()
+                name_list.clear()              
+
+            if self.configer.exists('data', 'label_edge2void'):
+                label_dir = os.path.join(root_dir, 'val/label_edge_void')
+            elif self.configer.exists('data', 'label_non_edge2void'):
+                label_dir = os.path.join(root_dir, 'val/label_non_edge_void')
+
+            if file_list_txt is None:
+                files = sorted(os.listdir(image_dir))
+            else:
+                Log.info("Using file list {} for validation".format(file_list_txt))
+                with open(os.path.join(root_dir, 'val', 'file_list', file_list_txt)) as f:
+                    files = [x.strip() for x in f]
+
+            for file_name in files:
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}'.format(file_name))
+                label_path = os.path.join(label_dir, image_name + '.png')
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} {} not exists.'.format(label_path, img_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_coarse'):
+            Log.info("Use Coarse labeled dataset for training.")
+            image_dir = os.path.join(root_dir, 'coarse/image')
+            label_dir = os.path.join(root_dir, 'coarse/label')
+
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+                
+        if dataset == 'train' and self.configer.get('data', 'include_atr'):
+            Log.info("Use ATR dataset for training.")
+            image_dir = os.path.join(root_dir, 'atr/image')
+            label_dir = os.path.join(root_dir, 'atr/label')
+
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'only_coarse'):
+            Log.info("Only use Coarse labeled dataset for training.")
+            image_dir = os.path.join(root_dir, 'coarse/image')
+            label_dir = os.path.join(root_dir, 'coarse/label')
+
+            img_list.clear()
+            label_list.clear()
+            name_list.clear()
+
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'only_mapillary'):
+            Log.info("Only use mapillary labeled dataset for training.")
+            image_dir = os.path.join(root_dir, 'mapillary/image')
+            label_dir = os.path.join(root_dir, 'mapillary/label')
+
+            img_list.clear()
+            label_list.clear()
+            name_list.clear()
+
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, "jpg"))
+                label_path = os.path.join(label_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+
+        return img_list, label_list, name_list
+
+
+class CSDataTestLoader(data.Dataset):
+    def __init__(self, root_dir, dataset=None, img_transform=None, configer=None):
+        self.configer = configer
+        self.img_transform = img_transform
+        self.img_list, self.name_list, self.subfolder_list = self.__list_dirs(root_dir, dataset)
+
+        size_mode = self.configer.get(dataset, 'data_transformer')['size_mode']
+        self.is_stack = (size_mode != 'diverse_size')
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        img_size = ImageHelper.get_size(img)
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=img_size,
+        )
+        return dict(
+            img=DataContainer(img, stack=self.is_stack),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+            subfolder=DataContainer(self.subfolder_list[index], stack=False, cpu_only=True),
+        )
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        name_list = list()
+        subfolder_list = list()
+        image_dir = os.path.join(root_dir, dataset)
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        if self.configer.get('dataset') == 'cityscapes' or self.configer.get('dataset') == 'camvid' or \
+                self.configer.get('dataset') == 'autonue21':
+            for item in os.listdir(image_dir):
+                sub_image_dir = os.path.join(image_dir, item)
+                for file_name in os.listdir(sub_image_dir):
+                    image_name = file_name.split('.')[0]
+                    img_path = os.path.join(sub_image_dir, file_name)
+                    if not os.path.exists(img_path):
+                        Log.error('Image Path: {} not exists.'.format(img_path))
+                        continue
+                    img_list.append(img_path)
+                    name_list.append(image_name)
+                    subfolder_list.append(item)
+        else:
+             for file_name in os.listdir(image_dir):
+                image_name = file_name.split('.')[0]
+                img_path = os.path.join(image_dir, file_name)
+                if not os.path.exists(img_path):
+                    Log.error('Image Path: {} not exists.'.format(img_path))
+                    continue
+                img_list.append(img_path)
+                name_list.append(image_name)
+                subfolder_list.append('')
+
+        return img_list, name_list, subfolder_list
+
+if __name__ == "__main__":
+    # Test cityscapes loader.
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/lip_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/lip_loader.py
new file mode 100644
index 0000000..3cabcd1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/lip_loader.py
@@ -0,0 +1,194 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Jianyuan Guo, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import cv2
+
+import numpy as np
+from torch.utils import data
+
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class LipLoader(data.Dataset):
+    def __init__(self, root_dir, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.img_list, self.label_list, self.edge_list, self.name_list = self.__list_dirs(root_dir, dataset)
+        self.root_dir = root_dir
+        self.dataset = dataset
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        img_size = ImageHelper.get_size(img)
+        labelmap = ImageHelper.read_image(self.label_list[index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+        edgemap = ImageHelper.read_image(self.edge_list[index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+            
+        edgemap[edgemap==255] = 1
+        edgemap = cv2.resize(edgemap, (labelmap.shape[-1], labelmap.shape[-2]), interpolation = cv2.INTER_NEAREST)
+
+        if self.configer.exists('data', 'label_list'):
+            labelmap = self._encode_label(labelmap)
+
+        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data', 'reduce_zero_label') == 'True':
+            labelmap = self._reduce_zero_label(labelmap)
+
+        ori_target = ImageHelper.tonp(labelmap)
+        ori_target[ori_target == 255] = -1
+
+        if self.aug_transform is not None:
+            img, labelmap, edgemap = self.aug_transform(img, labelmap=labelmap, maskmap=edgemap)
+
+        border_size = ImageHelper.get_size(img)
+
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+
+        if self.label_transform is not None:
+            labelmap = self.label_transform(labelmap)
+            edgemap = self.label_transform(edgemap)
+
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=border_size,
+            ori_target=ori_target
+        )
+        return dict(
+            img=DataContainer(img, stack=True),
+            labelmap=DataContainer(labelmap, stack=True),
+            maskmap=DataContainer(edgemap, stack=True),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+        )
+
+    def _reduce_zero_label(self, labelmap):
+        if not self.configer.get('data', 'reduce_zero_label'):
+            return labelmap
+
+        labelmap = np.array(labelmap)
+        encoded_labelmap = labelmap - 1
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def _encode_label(self, labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(shape=(shape[0], shape[1]), dtype=np.float32) * 255
+        for i in range(len(self.configer.get('data', 'label_list'))):
+            class_id = self.configer.get('data', 'label_list')[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        label_list = list()
+        edge_list = list()
+        name_list = list()
+        image_dir = os.path.join(root_dir, dataset, 'image')
+        label_dir = os.path.join(root_dir, dataset, 'label')
+        edge_dir = os.path.join(root_dir, dataset, 'edge')
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        for file_name in os.listdir(label_dir):
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+            label_path = os.path.join(label_dir, file_name)
+            edge_path = os.path.join(edge_dir, file_name)
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} not exists.'.format(label_path))
+                continue
+
+            img_list.append(img_path)
+            label_list.append(label_path)
+            edge_list.append(edge_path)
+            name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_val'):
+            image_dir = os.path.join(root_dir, 'val/image')
+            label_dir = os.path.join(root_dir, 'val/label')
+            edge_dir = os.path.join(root_dir, 'val/edge')
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                edge_path = os.path.join(edge_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                edge_list.append(edge_path)
+                name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_atr'):
+            image_dir = os.path.join(root_dir, 'atr/image')
+            label_dir = os.path.join(root_dir, 'atr/label')
+            edge_dir = os.path.join(root_dir, 'atr/edge')
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                edge_path = os.path.join(edge_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                edge_list.append(edge_path)
+                name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_cihp'):
+            image_dir = os.path.join(root_dir, 'cihp/single_person/image')
+            label_dir = os.path.join(root_dir, 'cihp/single_person/label')
+            edge_dir = os.path.join(root_dir, 'cihp/single_person/edge')
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                edge_path = os.path.join(edge_dir, file_name)
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                edge_list.append(edge_path)
+                name_list.append(image_name)
+
+        return img_list, label_list, edge_list, name_list
+
+
+if __name__ == "__main__":
+    pass
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/multi_dataset_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/multi_dataset_loader.py
new file mode 100644
index 0000000..947a7d8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/multi_dataset_loader.py
@@ -0,0 +1,154 @@
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# Created by: JingyiXie
+# Microsoft Research
+# hsfzxjy@gmail.com
+# Copyright (c) 2020
+##
+# This source code is licensed under the MIT-style license found in the
+# LICENSE file in the root directory of this source tree
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pdb
+
+import numpy as np
+import torch
+from torch.utils import data
+from models.protoseg_core.lib.utils.tools.configer import Configer
+from models.protoseg_core.lib.datasets.tools.cv2_aug_transforms import CV2AugCompose
+
+class MultiDatasetLoader(data.Dataset):
+    """
+    A meta dataloader that can serve data from multiple datasets.
+
+    `root_dirs` is list of strings representing root directory of each dataset.
+    `base_class` is an task-specific dataloader class, such as `DefaultLoader`, `OffsetLoader`.
+
+    During training, this object will serve `N * MAX` items within an epoch, where `N` is 
+    number of datasets, and `MAX` is maximum items number among the `N` datasets.
+
+    Items with index `N * j + i` is guaranteed to be the j-th item from the i-th dataset,
+    for j = 0..MAX-1, i=0..N-1. For dataset with length less than `MAX`, we will repeat its 
+    items to get a list of length `MAX`.
+
+    During training, this object should be used with `MultiDatasetTrainingSampler` to get a
+    balance sampling among the `N` datasets.
+
+    During validation, this object serve a list of items that is the concatenation of items from
+    all datasets.
+    """
+    def __init__(self, root_dirs, base_class, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.base_class = base_class
+        self.dataset = dataset
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.child_loaders = self._get_child_loaders(root_dirs, base_class)
+        self.num_datasets = len(self.child_loaders)
+        self.selected_dataset_index = -1
+
+    def _get_child_configer_transform(self, child_config_file):
+
+        dataset_configer = Configer(configs=child_config_file)
+        child_configer = self.configer.clone()
+
+        child_configer.params_root['data'].update(dataset_configer.get('data'))
+
+        if self.configer.exists('use_adaptive_transform') or self.dataset == 'val':
+            child_configer.params_root.update({
+                'train_trans': dataset_configer.params_root['train_trans'],
+                'val_trans': dataset_configer.params_root['val_trans'],
+            })
+
+        return child_configer, CV2AugCompose(split=self.dataset, configer=child_configer)
+
+    def _get_child_loaders(self, root_dirs, base_class):
+        child_config_files = self.configer.get('child_config_files')
+        child_loaders = []
+        for i, root_dir in enumerate(root_dirs):
+            child_configer, child_aug_transform = self._get_child_configer_transform(
+                child_config_files[i]
+            )
+            print(child_aug_transform)
+            child_loaders.append(
+                base_class(
+                    root_dir, child_aug_transform, self.dataset,
+                    self.img_transform, self.label_transform, 
+                    child_configer
+                )
+            )
+        return child_loaders
+
+    def __len__(self):
+        if self.dataset == 'train':
+            return self.num_datasets * max(len(loader) for loader in self.child_loaders)
+        elif self.dataset == 'val':
+            if self.selected_dataset_index >= 0:
+                return len(self.child_loaders[self.selected_dataset_index])
+            return sum(len(loader) for loader in self.child_loaders)
+
+    def __getitem__(self, idx):
+
+        if self.dataset == 'train':
+            loader = self.child_loaders[idx % self.num_datasets]
+            return loader[(idx // self.num_datasets) % len(loader)]
+
+        elif self.dataset == 'val':
+
+            if self.selected_dataset_index >= 0:
+                return self.child_loaders[self.selected_dataset_index][idx]
+
+            current_loader = None
+            for loader in self.child_loaders:
+                if idx < len(loader):
+                    current_loader = loader
+                    break
+                idx -= len(loader)
+            return current_loader[idx]
+
+    def select(self, dataset_idx):
+        assert 0 <= dataset_idx < self.num_datasets
+        self.selected_dataset_index = dataset_idx
+
+
+class MultiDatasetTrainingSampler(torch.utils.data.Sampler):
+
+    def __init__(self, data_source):
+        assert isinstance(data_source, MultiDatasetLoader)
+        assert data_source.dataset == 'train'
+        self.data_source = data_source
+
+    @property
+    def num_samples(self):
+        return len(self.data_source)
+
+    def __iter__(self):
+        n = len(self.data_source) // self.data_source.num_datasets
+        return get_multi_randperm(n, self.data_source.num_datasets)
+
+    def __len__(self):
+        return self.num_samples
+
+
+def get_multi_randperm(n, m):
+    """
+    Return an iterator of length n * m.
+
+    Say x_i is the i-th element yielded, i = 0...n * m - 1.
+    x_i will always have the same remainder as i, modulo m.
+    Fix i, {x_{m * j + i}} forms a permutation of [0..n-1].
+    """
+    for idx_group in zip(*[torch.randperm(n).tolist() for _ in range(m)]):
+        for loader_id, idx in enumerate(idx_group):
+            yield idx * m + loader_id
+
+
+if __name__ == '__main__':
+    print(list(get_multi_randperm(4, 3)))
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/offset_loader.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/offset_loader.py
new file mode 100644
index 0000000..22d12a1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/loader/offset_loader.py
@@ -0,0 +1,507 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import cv2
+import torch 
+import numpy as np
+import scipy.io as io
+from torch.utils import data
+
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.utils.helpers.offset_helper import DTOffsetHelper
+
+class DTOffsetLoader(data.Dataset):
+    """
+    Load [image, label, offset, boundary, name]
+    """
+    def __init__(self, root_dir, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.img_list, self.label_list, self.offset_list, self.name_list = self.__list_dirs(root_dir, dataset)
+        self.root_dir = root_dir
+        self.dataset = dataset
+        # check whether or not stack the data
+        size_mode = self.configer.get(self.dataset, 'data_transformer')['size_mode']
+        self.is_stack = size_mode != 'diverse_size'
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def _load_maps(self, filename, labelmap):
+        dct = self._load_mat(filename)
+        distance_map = dct['depth'].astype(np.int32)
+        dir_deg = dct['dir_deg'].astype(np.float)  # in [0, 360 / deg_reduce]
+        deg_reduce = dct['deg_reduce'][0][0]
+       
+        dir_deg = deg_reduce * dir_deg - 180  # in [-180, 180]
+
+        return distance_map, dir_deg
+
+    def load_boundary(self, fn):
+        if fn.endswith('mat'):
+            mat = io.loadmat(fn)
+            if 'depth' in mat:
+                dist_map, _ = self._load_maps(fn, None)
+                boundary_map = DTOffsetHelper.distance_to_mask_label(dist_map, np.zeros_like(dist_map)).astype(np.float32)
+            else:
+                boundary_map = mat['mat'].transpose(1, 2, 0)
+        else:
+            boundary_map = ImageHelper.read_image(fn,
+                                        tool=self.configer.get('data', 'image_tool'), mode='P')
+            boundary_map = boundary_map.astype(np.float32) / 255
+
+        return boundary_map
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        img_size = ImageHelper.get_size(img)
+        labelmap = ImageHelper.read_image(self.label_list[index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+        if self.configer.exists('data', 'label_list'):
+            labelmap = self._encode_label(labelmap)
+        distance_map, angle_map = self._load_maps(self.offset_list[index], labelmap)
+
+        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data', 'reduce_zero_label') == True:
+            labelmap = self._reduce_zero_label(labelmap)
+
+        ori_target = ImageHelper.tonp(labelmap).astype(np.int)
+        ori_target[ori_target == 255] = -1
+        ori_distance_map = np.array(distance_map)
+        ori_angle_map = np.array(angle_map)
+
+        if self.aug_transform is not None:
+            img, labelmap, distance_map, angle_map = self.aug_transform(img, labelmap=labelmap, distance_map=distance_map, angle_map=angle_map)
+
+        old_img = img
+        border_size = ImageHelper.get_size(img)
+
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+
+        if self.label_transform is not None:
+            labelmap = self.label_transform(labelmap)
+            distance_map = torch.from_numpy(distance_map)
+            angle_map = torch.from_numpy(angle_map)
+
+        if set(self.configer.get('val_trans', 'trans_seq')) & set(['random_crop', 'crop']):
+            ori_target = labelmap.numpy()
+            ori_distance_map = distance_map.numpy()
+            ori_angle_map = angle_map.numpy()
+            img_size = ori_target.shape[:2][::-1]
+            
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=border_size,
+            ori_target=ori_target,
+            ori_distance_map=ori_distance_map,
+            ori_angle_map=ori_angle_map,
+            basename=os.path.basename(self.label_list[index])
+        )
+
+        return dict(
+            img=DataContainer(img, stack=self.is_stack),
+            labelmap=DataContainer(labelmap, stack=self.is_stack),
+            distance_map=DataContainer(distance_map, stack=self.is_stack),
+            angle_map=DataContainer(angle_map, stack=self.is_stack),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+        )
+
+    def _load_mat(self, filename):
+        return io.loadmat(filename)
+
+    def _replace_ext(self, filename, ext):
+        return '.'.join([filename.rpartition('.')[0], ext])
+
+    def _reduce_zero_label(self, labelmap):
+        if not self.configer.get('data', 'reduce_zero_label'):
+            return labelmap
+
+        labelmap = np.array(labelmap)
+        encoded_labelmap = labelmap - 1
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def _encode_label(self, labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(shape=(shape[0], shape[1]), dtype=np.float32) * 255
+        for i in range(len(self.configer.get('data', 'label_list'))):
+            class_id = self.configer.get('data', 'label_list')[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def __list_dirs(self, root_dir, dataset):
+
+        if os.environ.get('use_cityscapes_style'):
+            if 'GTA5_small' in root_dir:
+                root_dir = root_dir.replace('GTA5_small', 'GTA5_Cityscapes')
+            else:
+                root_dir = root_dir.replace('GTA5', 'GTA5_Cityscapes')
+            Log.info_once('Using Cityscapes style, switch to {}'.format(root_dir))
+        else:
+            Log.info_once('Using default root dir: {}'.format(root_dir))
+
+        img_list = list()
+        label_list = list()
+        offset_list = list()
+        name_list = list()
+
+        image_subdir = os.environ.get('image_subdir', 'image')
+        label_subdir = os.environ.get('label_dir', 'label')
+        Log.info_once('Using label dir: {}'.format(label_subdir))
+        offset_subdir = os.environ.get('offset_dir', 'dt_offset')
+        Log.info_once('Using distance transform based offset: {}'.format(offset_subdir))
+
+        image_dir = os.path.join(root_dir, dataset, image_subdir)
+        label_dir = os.path.join(root_dir, dataset, label_subdir)
+        offset_dir = os.path.join(root_dir, dataset, offset_subdir)
+
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        file_list_txt = os.environ.get('use_file_list')
+        if file_list_txt is None:
+            Log.info_once('Using file list: all')
+            files = sorted(os.listdir(label_dir))
+        else:
+            Log.info_once('Using file list: {}'.format(file_list_txt))
+            with open(os.path.join(root_dir, dataset, 'file_list', file_list_txt)) as f:
+                files = [x.strip() for x in f]
+                
+        if os.environ.get('chunk'):
+            n, i = map(int, os.environ.get('chunk').split('_'))
+            step = len(files) // n + 4
+            files = files[step * i: step * (i + 1)]
+
+        for file_name in files:
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+            label_path = os.path.join(label_dir, file_name)
+            offset_path = os.path.join(offset_dir, self._replace_ext(file_name, 'mat'))
+
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} not exists.'.format(label_path))
+                continue
+
+            img_list.append(img_path)
+            label_list.append(label_path)
+            offset_list.append(offset_path)
+            name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_val'):
+            Log.info_once('Include val set for training ...')
+
+            image_dir = os.path.join(root_dir, 'val', image_subdir)
+            label_dir = os.path.join(root_dir, 'val', label_subdir)
+            offset_dir = os.path.join(root_dir, 'val', offset_subdir)
+
+            if file_list_txt is None:
+                files = sorted(os.listdir(label_dir))
+            else:
+                with open(os.path.join(root_dir, 'val', 'file_list', file_list_txt)) as f:
+                    files = [x.strip() for x in f]
+
+            for file_name in files:
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                offset_path = os.path.join(offset_dir, self._replace_ext(file_name, 'mat'))
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                offset_list.append(offset_path)
+                name_list.append(image_name)
+
+        return img_list, label_list, offset_list, name_list
+
+
+class SWOffsetLoader(data.Dataset):
+    def __init__(self, root_dir, aug_transform=None, dataset=None,
+                 img_transform=None, label_transform=None, configer=None):
+        self.configer = configer
+        self.aug_transform = aug_transform
+        self.img_transform = img_transform
+        self.label_transform = label_transform
+        self.img_list, self.label_list, self.offset_h_list, self.offset_w_list, self.name_list = self.__list_dirs(root_dir, dataset)
+        self.root_dir = root_dir
+        self.dataset = dataset
+        # check whether or not stack the data
+        size_mode = self.configer.get(dataset, 'data_transformer')['size_mode']
+        self.is_stack = size_mode != 'diverse_size'
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        img_size = ImageHelper.get_size(img)
+        labelmap = ImageHelper.read_image(self.label_list[index],
+                                          tool=self.configer.get('data', 'image_tool'), mode='P')
+        offsetmap_h = self._load_mat(self.offset_h_list[index])
+        offsetmap_w = self._load_mat(self.offset_w_list[index])
+
+        if os.environ.get('train_no_offset') and self.dataset == 'train':
+            offsetmap_h = np.zeros_like(offsetmap_h)
+            offsetmap_w = np.zeros_like(offsetmap_w)
+
+        if self.configer.exists('data', 'label_list'):
+            labelmap = self._encode_label(labelmap)
+
+        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data', 'reduce_zero_label') == True:
+            labelmap = self._reduce_zero_label(labelmap)
+
+        # Log.info('use dataset {}'.format(self.configer.get('dataset')))
+        ori_target = ImageHelper.tonp(labelmap).astype(np.int)
+        ori_target[ori_target == 255] = -1
+        ori_offset_h = np.array(offsetmap_h)
+        ori_offset_w = np.array(offsetmap_w)
+
+        if self.aug_transform is not None:
+            img, labelmap, offsetmap_h, offsetmap_w = self.aug_transform(img, labelmap=labelmap, offset_h_map=offsetmap_h, offset_w_map=offsetmap_w)
+
+        border_size = ImageHelper.get_size(img)
+
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+
+        if self.label_transform is not None:
+            labelmap = self.label_transform(labelmap)
+            offsetmap_h = torch.from_numpy(np.array(offsetmap_h)).long()
+            offsetmap_w = torch.from_numpy(np.array(offsetmap_w)).long()
+
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=border_size,
+            ori_target=ori_target,
+            ori_offset_h=ori_offset_h,
+            ori_offset_w=ori_offset_w,
+        )
+
+        return dict(
+            img=DataContainer(img, stack=self.is_stack),
+            labelmap=DataContainer(labelmap, stack=self.is_stack),
+            offsetmap_h=DataContainer(offsetmap_h, stack=self.is_stack),
+            offsetmap_w=DataContainer(offsetmap_w, stack=self.is_stack),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+        )
+
+    def _load_mat(self, filename):
+        return io.loadmat(filename)['mat']
+
+    def _replace_ext(self, filename, ext):
+        return '.'.join([filename.rpartition('.')[0], ext])
+
+    def _reduce_zero_label(self, labelmap):
+        if not self.configer.get('data', 'reduce_zero_label'):
+            return labelmap
+
+        labelmap = np.array(labelmap)
+        encoded_labelmap = labelmap - 1
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def _encode_label(self, labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(shape=(shape[0], shape[1]), dtype=np.float32) * 255
+        for i in range(len(self.configer.get('data', 'label_list'))):
+            class_id = self.configer.get('data', 'label_list')[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        if self.configer.get('data', 'image_tool') == 'pil':
+            encoded_labelmap = ImageHelper.np2img(encoded_labelmap.astype(np.uint8))
+
+        return encoded_labelmap
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        label_list = list()
+        offset_h_list = list()
+        offset_w_list = list()
+        name_list = list()
+        image_dir = os.path.join(root_dir, dataset, 'image')
+        label_dir = os.path.join(root_dir, dataset, 'label')
+        offset_h_dir = None
+        offset_w_dir = None
+
+        subdir = os.environ.get('offset_dir')
+        if subdir is not None:
+            Log.info_once('Using offset dir: {}'.format(subdir))
+            offset_h_dir = os.path.join(root_dir, dataset, subdir, 'h')
+            offset_w_dir = os.path.join(root_dir, dataset, subdir, 'w')
+        else:
+            offset_type = self.configer.get('data', 'offset_type')
+            assert(offset_type is not None)
+            offset_h_dir = os.path.join(root_dir, dataset, offset_type, 'h')
+            offset_w_dir = os.path.join(root_dir, dataset, offset_type, 'w')
+
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        for file_name in os.listdir(label_dir):
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+            label_path = os.path.join(label_dir, file_name)
+            offset_h_path = os.path.join(offset_h_dir, self._replace_ext(file_name, 'mat'))
+            offset_w_path = os.path.join(offset_w_dir, self._replace_ext(file_name, 'mat'))
+
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} not exists.'.format(label_path))
+                continue
+
+            img_list.append(img_path)
+            label_list.append(label_path)
+            offset_h_list.append(offset_h_path)
+            offset_w_list.append(offset_w_path)
+            name_list.append(image_name)
+
+        if dataset == 'train' and self.configer.get('data', 'include_val'):
+            image_dir = os.path.join(root_dir, 'val/image')
+            label_dir = os.path.join(root_dir, 'val/label')
+
+            subdir = os.environ.get('offset_dir')
+            if subdir is not None:
+                Log.info_once('Using offset dir: {}'.format(subdir))
+                offset_h_dir = os.path.join(root_dir, 'val', subdir, 'h')
+                offset_w_dir = os.path.join(root_dir, 'val', subdir, 'w')
+            else:
+                offset_type = self.configer.get('data', 'offset_type')
+                assert(offset_type is not None)
+                offset_h_dir = os.path.join(root_dir, 'val', offset_type, 'h')
+                offset_w_dir = os.path.join(root_dir, 'val', offset_type, 'w')
+
+            for file_name in os.listdir(label_dir):
+                image_name = '.'.join(file_name.split('.')[:-1])
+                img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+                label_path = os.path.join(label_dir, file_name)
+                offset_h_path = os.path.join(offset_h_dir, self._replace_ext(file_name, 'mat'))
+                offset_w_path = os.path.join(offset_w_dir, self._replace_ext(file_name, 'mat'))
+                if not os.path.exists(label_path) or not os.path.exists(img_path):
+                    Log.error('Label Path: {} not exists.'.format(label_path))
+                    continue
+
+                img_list.append(img_path)
+                label_list.append(label_path)
+                offset_h_list.append(offset_h_path)
+                offset_w_list.append(offset_w_path)
+                name_list.append(image_name)
+
+        return img_list, label_list, offset_h_list, offset_w_list, name_list
+
+
+class SWOffsetTestLoader(data.Dataset):
+    def __init__(self, root_dir, dataset='val', img_transform=None, configer=None):
+        self.configer = configer
+        self.img_transform = img_transform
+        self.img_list, self.offset_h_list, self.offset_w_list, self.name_list = self.__list_dirs(root_dir, dataset)
+
+        size_mode = self.configer.get(dataset, 'data_transformer')['size_mode']
+        self.is_stack = (size_mode != 'diverse_size')
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, index):
+        img = ImageHelper.read_image(self.img_list[index],
+                                     tool=self.configer.get('data', 'image_tool'),
+                                     mode=self.configer.get('data', 'input_mode'))
+        offsetmap_h = self._load_mat(self.offset_h_list[index])
+        offsetmap_w = self._load_mat(self.offset_w_list[index])
+        img_size = ImageHelper.get_size(img)
+        if self.img_transform is not None:
+            img = self.img_transform(img)
+        meta = dict(
+            ori_img_size=img_size,
+            border_size=img_size,
+        )
+        return dict(
+            img=DataContainer(img, stack=self.is_stack),
+            offsetmap_h=DataContainer(offsetmap_h, stack=self.is_stack),
+            offsetmap_w=DataContainer(offsetmap_w, stack=self.is_stack),
+            meta=DataContainer(meta, stack=False, cpu_only=True),
+            name=DataContainer(self.name_list[index], stack=False, cpu_only=True),
+        )
+
+    def _load_mat(self, filename):
+        return io.loadmat(filename)['mat']
+
+    def _replace_ext(self, filename, ext):
+        return '.'.join([filename.rpartition('.')[0], ext])
+
+    def __list_dirs(self, root_dir, dataset):
+        img_list = list()
+        offset_h_list = list()
+        offset_w_list = list()
+        name_list = list()
+        image_dir = os.path.join(root_dir, dataset, 'image')
+
+        offset_h_dir = None
+        offset_w_dir = None
+
+        offset_type = self.configer.get('data', 'offset_type')
+        assert(offset_type is not None)
+        offset_h_dir = os.path.join(root_dir, dataset, offset_type, 'h')
+        offset_w_dir = os.path.join(root_dir, dataset, offset_type, 'w') 
+        img_extension = os.listdir(image_dir)[0].split('.')[-1]
+
+        for file_name in os.listdir(label_dir):
+            image_name = '.'.join(file_name.split('.')[:-1])
+            img_path = os.path.join(image_dir, '{}.{}'.format(image_name, img_extension))
+            offset_h_path = os.path.join(offset_h_dir, self._replace_ext(file_name, 'mat'))
+            offset_w_path = os.path.join(offset_w_dir, self._replace_ext(file_name, 'mat'))
+
+            if not os.path.exists(label_path) or not os.path.exists(img_path):
+                Log.error('Label Path: {} not exists.'.format(label_path))
+                continue
+            img_list.append(img_path)
+            offset_h_list.append(offset_h_path)
+            offset_w_list.append(offset_w_path)
+            name_list.append(image_name)
+
+        return img_list, offset_h_list, offset_w_list, name_list
+
+
+def load_mat(filename):
+    return io.loadmat(filename)['mat']
+
+def replace_ext(filename, ext):
+    return '.'.join([filename.rpartition('.')[0], ext])
+
+
+if __name__ == "__main__":
+    pass
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/README.md b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/README.md
new file mode 100644
index 0000000..e33aeb7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/README.md
@@ -0,0 +1,42 @@
+### Data Format for Semantic Segmentation
+
+The raw data will be processed by generator shell scripts. There will be two subdirs('train' & 'val')
+
+```
+train or val dir {
+    image: contains the images for train or val.
+    label: contains the label png files(mode='P') for train or val.
+    mask: contains the mask png files(mode='P') for train or val.
+}
+```
+
+
+### Data Format for Instance Segmentation
+
+The raw data will be processed by generator shell scripts. There will be two subdirs('train' & 'val')
+
+```
+train or val dir {
+    image: contains the images for train or val.
+    json: contains the json files for train or val.
+}
+```
+
+The json format for Instance Segmentation below.
+
+```
+{
+    "width": 640,
+    "height": 480,
+    "objects": [
+        {
+            "bbox": [x_left_up, y_left_up, x_right_bottom, y_right_bottom],
+            "label": class_num,
+            "segm": [[polygon1], [...], ...] or rle
+        },
+        {
+            ...
+        }
+    ]
+}
+```
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.py
new file mode 100644
index 0000000..fff8948
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# COCO det data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import argparse
+import shutil
+
+
+LABEL_DIR = 'label'
+IMAGE_DIR = 'image'
+
+
+class ADE20KGenerator(object):
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+        self.train_image_dir = os.path.join(self.args.save_dir, 'train', image_dir)
+        self.val_image_dir = os.path.join(self.args.save_dir, 'val', image_dir)
+        if not os.path.exists(self.train_image_dir):
+            os.makedirs(self.train_image_dir)
+
+        if not os.path.exists(self.val_image_dir):
+            os.makedirs(self.val_image_dir)
+
+    def generate_label(self):
+        train_img_folder = os.path.join(self.args.ori_root_dir, 'images/training')
+        train_mask_folder = os.path.join(self.args.ori_root_dir, 'annotations/training')
+
+        val_img_folder = os.path.join(self.args.ori_root_dir, 'images/validation')
+        val_mask_folder = os.path.join(self.args.ori_root_dir, 'annotations/validation')
+
+        for filename in os.listdir(train_img_folder):
+            basename, _ = os.path.splitext(filename)
+            if filename.endswith(".jpg"):
+                imgpath = os.path.join(train_img_folder, filename)
+                maskname = basename + '.png'
+                maskpath = os.path.join(train_mask_folder, maskname)
+                if os.path.isfile(maskpath):
+                    shutil.copy(imgpath,
+                                os.path.join(self.train_image_dir, filename))
+                    shutil.copy(maskpath,
+                                os.path.join(self.train_label_dir, maskname))
+                else:
+                    print('cannot find the mask:', maskpath)
+
+        for filename in os.listdir(val_img_folder):
+            basename, _ = os.path.splitext(filename)
+            if filename.endswith(".jpg"):
+                imgpath = os.path.join(val_img_folder, filename)
+                maskname = basename + '.png'
+                maskpath = os.path.join(val_mask_folder, maskname)
+                if os.path.isfile(maskpath):
+                    shutil.copy(imgpath, os.path.join(self.val_image_dir, filename))
+                    shutil.copy(maskpath, os.path.join(self.val_label_dir, maskname))
+                else:
+                    print('cannot find the mask:', maskpath)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    ade20k_generator = ADE20KGenerator(args)
+    ade20k_generator.generate_label()
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.sh
new file mode 100644
index 0000000..671c044
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/ade20k_generator.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Generate train & val data.
+
+
+ORI_ROOT_DIR='/cluster/work/cvl/tiazhou/data/ADE20K/ADEChallengeData2016'
+SAVE_DIR='/cluster/work/cvl/tiazhou/data/ADE20K/ADEChallengeData2016'
+
+
+python ade20k_generator.py --ori_root_dir $ORI_ROOT_DIR \
+                           --save_dir $SAVE_DIR
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/dt_offset_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/dt_offset_generator.py
new file mode 100644
index 0000000..dc1b206
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/dt_offset_generator.py
@@ -0,0 +1,138 @@
+import os
+import sys
+import cv2
+import torch
+import argparse
+import subprocess
+import numpy as np
+from glob import glob
+from PIL import Image
+import os.path as osp
+import scipy.io as io
+import multiprocessing as mp
+import numpy.linalg as linalg
+import matplotlib.pyplot as plt
+import multiprocessing.pool as mpp
+from scipy.ndimage.morphology import distance_transform_edt, distance_transform_cdt
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..', '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+DATA_ROOT = subprocess.check_output(
+    ['bash', '-c', "source config.profile; echo $DATA_ROOT"]
+).decode().strip()
+
+
+def sobel_kernel(shape, axis):
+    """
+    shape must be odd: eg. (5,5)
+    axis is the direction, with 0 to positive x and 1 to positive y
+    """
+    k = np.zeros(shape)
+    p = [
+        (j, i)
+        for j in range(shape[0])
+        for i in range(shape[1])
+        if not (i == (shape[1] - 1) / 2.0 and j == (shape[0] - 1) / 2.0)
+    ]
+
+    for j, i in p:
+        j_ = int(j - (shape[0] - 1) / 2.0)
+        i_ = int(i - (shape[1] - 1) / 2.0)
+        k[j, i] = (i_ if axis == 0 else j_) / float(i_ * i_ + j_ * j_)
+    return torch.from_numpy(k).unsqueeze(0)
+
+
+label_list = list(range(1, 151))
+
+
+def _encode_label(labelmap):
+    encoded_labelmap = np.ones_like(labelmap, dtype=np.uint16) * 255
+    for i, class_id in enumerate(label_list):
+        encoded_labelmap[labelmap == class_id] = i
+
+    return encoded_labelmap
+
+
+def process(inp):
+    (indir, outdir, basename) = inp
+    print(inp)
+    labelmap = np.array(Image.open(osp.join(indir, basename)
+                                   ).convert("P")).astype(np.int16)
+    labelmap = _encode_label(labelmap)
+    depth_map = np.zeros(labelmap.shape, dtype=np.float32)
+    dir_map = np.zeros((*labelmap.shape, 2), dtype=np.float32)
+
+    for id in range(255):
+        labelmap_i = (labelmap == id).astype(np.uint8)
+
+        if labelmap_i.sum() < 100:
+            continue
+
+        if args.metric == 'euc':
+            depth_i = distance_transform_edt(labelmap_i)
+        elif args.metric == 'taxicab':
+            depth_i = distance_transform_cdt(labelmap_i, metric='taxicab')
+        else:
+            raise RuntimeError
+        depth_map += depth_i
+
+        dir_i_before = dir_i = np.zeros_like(dir_map)
+        dir_i = torch.nn.functional.conv2d(torch.from_numpy(depth_i).float().view(
+            1, 1, *depth_i.shape), sobel_ker, padding=ksize//2).squeeze().permute(1, 2, 0).numpy()
+
+        # The following line is necessary
+        dir_i[(labelmap_i == 0), :] = 0
+
+        dir_map += dir_i
+
+    depth_map[depth_map > 250] = 250
+    depth_map = depth_map.astype(np.uint8)
+    deg_reduce = 2
+    dir_deg_map = np.degrees(np.arctan2(
+        dir_map[:, :, 0], dir_map[:, :, 1])) + 180
+    dir_deg_map = (dir_deg_map / deg_reduce)
+    print(dir_deg_map.min(), dir_deg_map.max())
+    dir_deg_map = dir_deg_map.astype(np.uint8)
+
+    io.savemat(
+        osp.join(outdir, basename.replace("png", "mat")),
+        {"dir_deg": dir_deg_map, "depth": depth_map, 'deg_reduce': deg_reduce},
+        do_compression=True,
+    )
+
+    try:
+        io.loadmat(osp.join(outdir, basename.replace("png", "mat")),)
+    except Exception as e:
+        print(e)
+        io.savemat(
+            osp.join(outdir, basename.replace("png", "mat")),
+            {"dir_deg": dir_deg_map, "depth": depth_map, 'deg_reduce': deg_reduce},
+            do_compression=False,
+        )
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--datadir", dest='datadir',
+                    default=osp.join(DATA_ROOT, 'ade20k'))
+parser.add_argument("--outname", default='offset_gt/dt_offset')
+parser.add_argument('--split', nargs='+', default=['val', 'train'])
+parser.add_argument("--ksize", type=int, default=5)
+parser.add_argument('--metric', default='euc', choices=['euc', 'taxicab'])
+args = parser.parse_args()
+
+ksize = args.ksize
+
+sobel_x, sobel_y = (sobel_kernel((ksize, ksize), i) for i in (0, 1))
+sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(
+    2, 1, ksize, ksize).float()
+
+for dataset in args.split:
+    indir = osp.join(args.datadir, dataset, 'label')
+    outdir = osp.join(args.datadir, dataset, args.outname)
+    os.makedirs(outdir, exist_ok=True)
+    args_to_apply = [(indir, outdir, osp.basename(basename))
+                     for basename in glob(osp.join(indir, "*.png"))]
+    mpp.Pool(processes=mp.cpu_count() // 2).map(process, args_to_apply)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/prepare_ade20k.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/prepare_ade20k.py
new file mode 100644
index 0000000..cfa7a1f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/ade20k/prepare_ade20k.py
@@ -0,0 +1,124 @@
+"""Prepare ADE20K dataset"""
+import os
+import shutil
+import argparse
+import zipfile
+import requests
+import hashlib
+from tqdm import tqdm
+
+
+_TARGET_DIR = '/cluster/work/cvl/tiazhou/data/'
+
+def check_sha1(filename, sha1_hash):
+    """Check whether the sha1 hash of the file content matches the expected hash.
+    Parameters
+    ----------
+    filename : str
+        Path to the file.
+    sha1_hash : str
+        Expected sha1 hash in hexadecimal digits.
+    Returns
+    -------
+    bool
+        Whether the file content matches the expected hash.
+    """
+    sha1 = hashlib.sha1()
+    with open(filename, 'rb') as f:
+        while True:
+            data = f.read(1048576)
+            if not data:
+                break
+            sha1.update(data)
+
+    return sha1.hexdigest() == sha1_hash
+
+def download(url, path=None, overwrite=False, sha1_hash=None):
+    """Download an given URL
+    Parameters
+    ----------
+    url : str
+        URL to download
+    path : str, optional
+        Destination path to store downloaded file. By default stores to the
+        current directory with same name as in url.
+    overwrite : bool, optional
+        Whether to overwrite destination file if already exists.
+    sha1_hash : str, optional
+        Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
+        but doesn't match.
+    Returns
+    -------
+    str
+        The file path of the downloaded file.
+    """
+    if path is None:
+        fname = url.split('/')[-1]
+    else:
+        path = os.path.expanduser(path)
+        if os.path.isdir(path):
+            fname = os.path.join(path, url.split('/')[-1])
+        else:
+            fname = path
+
+    if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
+        dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+
+        print('Downloading %s from %s...'%(fname, url))
+        r = requests.get(url, stream=True)
+        if r.status_code != 200:
+            raise RuntimeError("Failed downloading url %s"%url)
+        total_length = r.headers.get('content-length')
+        with open(fname, 'wb') as f:
+            if total_length is None: # no content length header
+                for chunk in r.iter_content(chunk_size=1024):
+                    if chunk: # filter out keep-alive new chunks
+                        f.write(chunk)
+            else:
+                total_length = int(total_length)
+                for chunk in tqdm(r.iter_content(chunk_size=1024),
+                                  total=int(total_length / 1024. + 0.5),
+                                  unit='KB', unit_scale=False, dynamic_ncols=True):
+                    f.write(chunk)
+
+        if sha1_hash and not check_sha1(fname, sha1_hash):
+            raise UserWarning('File {} is downloaded but the content hash does not match. ' \
+                              'The repo may be outdated or download may be incomplete. ' \
+                              'If the "repo_url" is overridden, consider switching to ' \
+                              'the default repo.'.format(fname))
+
+    return fname
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Initialize ADE20K dataset.',
+        epilog='Example: python prepare_ade20k.py',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
+    args = parser.parse_args()
+    return args
+
+def download_ade(path, overwrite=False):
+    _AUG_DOWNLOAD_URLS = [
+        ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
+        ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),]
+    download_dir = os.path.join(path, 'ADE20K')
+    os.makedirs(download_dir, exist_ok=True)
+    for url, checksum in _AUG_DOWNLOAD_URLS:
+        filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
+        # extract
+        with zipfile.ZipFile(filename,"r") as zip_ref:
+            zip_ref.extractall(path=path)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    if args.download_dir is not None:
+        if os.path.isdir(_TARGET_DIR):
+            os.remove(_TARGET_DIR)
+        # make symlink
+        os.symlink(args.download_dir, _TARGET_DIR)
+    else:
+        download_ade(_TARGET_DIR, overwrite=False)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/camvid/camvid_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/camvid/camvid_generator.py
new file mode 100644
index 0000000..50ca282
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/camvid/camvid_generator.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# CityScape Seg data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+import shutil
+
+
+IMAGE_DIR = 'image'
+LABEL_DIR = 'label'
+
+def str2bool(v):
+    """ Usage:
+    parser.add_argument('--pretrained', type=str2bool, nargs='?', const=True,
+                        dest='pretrained', help='Whether to use pretrained models.')
+    """
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Unsupported value encountered.')
+
+
+class CamVidGenerator(object):
+
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        self.test_label_dir = os.path.join(self.args.save_dir, 'test', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+        if not os.path.exists(self.test_label_dir):
+            os.makedirs(self.test_label_dir)
+
+        self.train_image_dir = os.path.join(self.args.save_dir, 'train', image_dir)
+        self.val_image_dir = os.path.join(self.args.save_dir, 'val', image_dir)
+        self.test_image_dir = os.path.join(self.args.save_dir, 'test', image_dir)
+
+        if not os.path.exists(self.train_image_dir):
+            os.makedirs(self.train_image_dir)
+
+        if not os.path.exists(self.val_image_dir):
+            os.makedirs(self.val_image_dir)
+
+        if not os.path.exists(self.test_image_dir):
+            os.makedirs(self.test_image_dir)
+
+    def generate_label(self):
+        if not self.args.coarse:
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'train/image')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'train/label')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'val/image')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'val/label')
+            ori_test_img_dir = os.path.join(self.args.ori_root_dir, 'test/image')
+            ori_test_label_dir = os.path.join(self.args.ori_root_dir, 'test/label')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_'))
+                label_file = '{}'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_img_dir, image_file),
+                            os.path.join(self.train_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_label_dir, label_file),
+                            os.path.join(self.train_label_dir, '{}_1_1.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_'))
+                label_file = '{}'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_img_dir, image_file),
+                            os.path.join(self.val_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_val_label_dir, label_file),
+                            os.path.join(self.val_label_dir, '{}_1_1.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_test_img_dir):
+                image_name = '_'.join(image_file.split('_'))
+                label_file = '{}'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_test_img_dir, image_file),
+                            os.path.join(self.test_image_dir, '{}_1_1_1{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_test_label_dir, label_file),
+                            os.path.join(self.test_label_dir, '{}_1_1_1.png'.format(shotname)))
+
+        else:
+
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'train/image')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'train/label')
+            ori_train_extra_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train_extra')
+            ori_train_extra_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/train_extra')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/val')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/val')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_img_dir, image_file),
+                            os.path.join(self.train_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_label_dir, label_file),
+                            os.path.join(self.train_label_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_train_extra_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_extra_img_dir, image_file),
+                            os.path.join(self.coarse_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_extra_label_dir, label_file),
+                            os.path.join(self.coarse_label_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_img_dir, image_file),
+                            os.path.join(self.val_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_val_label_dir, label_file),
+                            os.path.join(self.val_label_dir, '{}.png'.format(shotname)))
+
+
+    def __list_dir(self, dir_name):
+        filename_list = list()
+        for item in os.listdir(dir_name):
+            if os.path.isdir(os.path.join(dir_name, item)):
+                for filename in os.listdir(os.path.join(dir_name, item)):
+                    filename_list.append('{}/{}'.format(item, filename))
+            else:
+                filename_list.append(item)
+
+        return filename_list
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--coarse', type=str2bool, nargs='?', default=False,
+                        dest='coarse', help='Whether is the coarse data.')
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    cityscapes_generator = CamVidGenerator(args)
+    cityscapes_generator.generate_label()
+
+# /root/miniconda3/bin/python cityscapes_generator.py --coarse True \
+# --save_dir /msravcshare/dataset/cityscapes/ --ori_root_dir \
+# /msravcshare/yuyua/code/segmentation/deeplab_v3/dataset/cityscapes/
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_generator.py
new file mode 100644
index 0000000..95e5999
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_generator.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# CityScape Seg data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+import shutil
+
+
+IMAGE_DIR = 'image'
+LABEL_DIR = 'label'
+
+def str2bool(v):
+    """ Usage:
+    parser.add_argument('--pretrained', type=str2bool, nargs='?', const=True,
+                        dest='pretrained', help='Whether to use pretrained models.')
+    """
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Unsupported value encountered.')
+
+
+class CityscapesGenerator(object):
+
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        self.coarse_label_dir = os.path.join(self.args.save_dir, 'coarse', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+        if not os.path.exists(self.coarse_label_dir):
+            os.makedirs(self.coarse_label_dir)
+
+        self.train_image_dir = os.path.join(self.args.save_dir, 'train', image_dir)
+        self.val_image_dir = os.path.join(self.args.save_dir, 'val', image_dir)
+        self.coarse_image_dir = os.path.join(self.args.save_dir, 'coarse', image_dir)
+
+        if not os.path.exists(self.train_image_dir):
+            os.makedirs(self.train_image_dir)
+
+        if not os.path.exists(self.val_image_dir):
+            os.makedirs(self.val_image_dir)
+
+        if not os.path.exists(self.coarse_image_dir):
+            os.makedirs(self.coarse_image_dir)
+
+    def generate_label(self):
+        if not self.args.coarse:
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/train')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/val')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/val')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtFine_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_img_dir, image_file),
+                            os.path.join(self.train_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_label_dir, label_file),
+                            os.path.join(self.train_label_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtFine_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_img_dir, image_file),
+                            os.path.join(self.val_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_val_label_dir, label_file),
+                            os.path.join(self.val_label_dir, '{}.png'.format(shotname)))
+
+        else:
+
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/train')
+            ori_train_extra_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train_extra')
+            ori_train_extra_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/train_extra')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/val')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/val')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_img_dir, image_file),
+                            os.path.join(self.train_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_label_dir, label_file),
+                            os.path.join(self.train_label_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_train_extra_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_extra_img_dir, image_file),
+                            os.path.join(self.coarse_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_train_extra_label_dir, label_file),
+                            os.path.join(self.coarse_label_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                label_file = '{}_gtCoarse_labelIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_img_dir, image_file),
+                            os.path.join(self.val_image_dir, '{}{}'.format(shotname, extension)))
+                shutil.copy(os.path.join(ori_val_label_dir, label_file),
+                            os.path.join(self.val_label_dir, '{}.png'.format(shotname)))
+
+
+    def __list_dir(self, dir_name):
+        filename_list = list()
+        for item in os.listdir(dir_name):
+            if os.path.isdir(os.path.join(dir_name, item)):
+                for filename in os.listdir(os.path.join(dir_name, item)):
+                    filename_list.append('{}/{}'.format(item, filename))
+            else:
+                filename_list.append(item)
+
+        return filename_list
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--coarse', type=str2bool, nargs='?', default=False,
+                        dest='coarse', help='Whether is the coarse data.')
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    cityscapes_generator = CityscapesGenerator(args)
+    cityscapes_generator.generate_label()
+
+# /root/miniconda3/bin/python cityscapes_generator.py --coarse True \
+# --save_dir /msravcshare/dataset/cityscapes/ --ori_root_dir \
+# /msravcshare/yuyua/code/segmentation/deeplab_v3/dataset/cityscapes/
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_instance_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_instance_generator.py
new file mode 100644
index 0000000..44c46dc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/cityscapes_instance_generator.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Updated by: Lang Huang(laynehuang@outlook.com)
+# CityScape Seg data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+import shutil
+
+
+IMAGE_DIR = 'image'
+LABEL_DIR = 'label'
+INSTANCE_DIR = 'instance'
+
+def str2bool(v):
+    """ Usage:
+    parser.add_argument('--pretrained', type=str2bool, nargs='?', const=True,
+                        dest='pretrained', help='Whether to use pretrained models.')
+    """
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Unsupported value encountered.')
+
+
+class CityscapesInstanceGenerator(object):
+
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR, instance_dir=INSTANCE_DIR):
+        self.args = args        
+        self.train_instance_dir = os.path.join(self.args.save_dir, 'train', instance_dir)
+        self.val_instance_dir = os.path.join(self.args.save_dir, 'val', instance_dir)
+        self.coarse_instance_dir = os.path.join(self.args.save_dir, 'coarse', instance_dir)
+        if not os.path.exists(self.train_instance_dir):
+            os.makedirs(self.train_instance_dir)
+        if not os.path.exists(self.val_instance_dir):
+            os.makedirs(self.val_instance_dir)
+        if not os.path.exists(self.coarse_instance_dir):
+            os.makedirs(self.coarse_instance_dir)
+
+    def generate_instance(self):
+        if not self.args.coarse:
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/train')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/val')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/val')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                print(image_name)
+                instance_file = '{}_gtFine_instanceIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_label_dir, instance_file),
+                        os.path.join(self.train_instance_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                print(image_name)
+                instance_file = '{}_gtFine_instanceIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_label_dir, instance_file),
+                        os.path.join(self.val_instance_dir, '{}.png'.format(shotname)))
+
+        else:
+            ori_train_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train')
+            ori_train_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/train')
+            ori_train_extra_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/train_extra')
+            ori_train_extra_label_dir = os.path.join(self.args.ori_root_dir, 'gtCoarse/train_extra')
+            ori_val_img_dir = os.path.join(self.args.ori_root_dir, 'leftImg8bit/val')
+            ori_val_label_dir = os.path.join(self.args.ori_root_dir, 'gtFine/val')
+
+            for image_file in self.__list_dir(ori_train_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                print(image_name)
+                instance_file = '{}_gtFine_instanceIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_label_dir, instance_file),
+                        os.path.join(self.train_instance_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_train_extra_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                print(image_name)
+                instance_file = '{}_gtCoarse_instanceIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_train_extra_label_dir, instance_file),
+                        os.path.join(self.coarse_instance_dir, '{}.png'.format(shotname)))
+
+            for image_file in self.__list_dir(ori_val_img_dir):
+                image_name = '_'.join(image_file.split('_')[:-1])
+                print(image_name)
+                instance_file = '{}_gtFine_instanceIds.png'.format(image_name)
+                shotname, extension = os.path.splitext(image_file.split('/')[-1])
+                shutil.copy(os.path.join(ori_val_label_dir, instance_file),
+                        os.path.join(self.val_instance_dir, '{}.png'.format(shotname)))
+
+    def __list_dir(self, dir_name):
+        filename_list = list()
+        for item in os.listdir(dir_name):
+            if os.path.isdir(os.path.join(dir_name, item)):
+                for filename in os.listdir(os.path.join(dir_name, item)):
+                    filename_list.append('{}/{}'.format(item, filename))
+            else:
+                filename_list.append(item)
+        return filename_list
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--coarse', type=str2bool, nargs='?', default=False,
+                        dest='coarse', help='Whether is the coarse data.')
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    cityscapes_generator = CityscapesInstanceGenerator(args)
+    cityscapes_generator.generate_instance()
+
+# /root/miniconda3/bin/python cityscapes_instance_generator.py --coarse True \
+# --save_dir /msravcshare/dataset/cityscapes/ --ori_root_dir \
+# /msravcshare/yuyua/code/segmentation/deeplab_v3/dataset/cityscapes/
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/dt_offset_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/dt_offset_generator.py
new file mode 100644
index 0000000..348bcb5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/dt_offset_generator.py
@@ -0,0 +1,132 @@
+import os
+import sys
+import cv2
+import torch
+import argparse
+import subprocess
+import numpy as np
+from glob import glob
+from PIL import Image
+import os.path as osp
+import scipy.io as io
+import multiprocessing as mp
+import numpy.linalg as linalg
+import matplotlib.pyplot as plt
+import multiprocessing.pool as mpp
+from scipy.ndimage.morphology import distance_transform_edt, distance_transform_cdt
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..', '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+DATA_ROOT = subprocess.check_output(
+    ['bash', '-c', "source config.profile; echo $DATA_ROOT"]
+).decode().strip()
+
+
+def sobel_kernel(shape, axis):
+    """
+    shape must be odd: eg. (5,5)
+    axis is the direction, with 0 to positive x and 1 to positive y
+    """
+    k = np.zeros(shape)
+    p = [
+        (j, i)
+        for j in range(shape[0])
+        for i in range(shape[1])
+        if not (i == (shape[1] - 1) / 2.0 and j == (shape[0] - 1) / 2.0)
+    ]
+
+    for j, i in p:
+        j_ = int(j - (shape[0] - 1) / 2.0)
+        i_ = int(i - (shape[1] - 1) / 2.0)
+        k[j, i] = (i_ if axis == 0 else j_) / float(i_ * i_ + j_ * j_)
+    return torch.from_numpy(k).unsqueeze(0)
+
+label_list = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
+
+def _encode_label(labelmap):
+    encoded_labelmap = np.ones_like(labelmap, dtype=np.uint16) * 255
+    for i, class_id in enumerate(label_list):
+        encoded_labelmap[labelmap == class_id] = i
+
+    return encoded_labelmap
+
+def process(inp):
+    (indir, outdir, basename) = inp
+    print(inp)
+    labelmap = np.array(Image.open(osp.join(indir, basename)).convert("P")).astype(np.int16)
+    labelmap = _encode_label(labelmap)
+    labelmap = labelmap + 1
+    depth_map = np.zeros(labelmap.shape, dtype=np.float32)
+    dir_map = np.zeros((*labelmap.shape, 2), dtype=np.float32)
+
+    for id in range(1, 20):
+        labelmap_i = labelmap.copy()
+        labelmap_i[labelmap_i != id] = 0
+        labelmap_i[labelmap_i == id] = 1
+
+        if labelmap_i.sum() < 100:
+            continue
+
+        if args.metric == 'euc':
+            depth_i = distance_transform_edt(labelmap_i)
+        elif args.metric == 'taxicab':
+            depth_i = distance_transform_cdt(labelmap_i, metric='taxicab')
+        else:
+            raise RuntimeError
+        depth_map += depth_i
+
+        dir_i_before = dir_i = np.zeros_like(dir_map)
+        dir_i = torch.nn.functional.conv2d(torch.from_numpy(depth_i).float().view(1, 1, *depth_i.shape), sobel_ker, padding=ksize//2).squeeze().permute(1, 2, 0).numpy()
+
+        # The following line is necessary
+        dir_i[(labelmap_i == 0), :] = 0
+        
+        dir_map += dir_i
+
+    depth_map[depth_map > 250] = 250
+    depth_map = depth_map.astype(np.uint8)
+    deg_reduce = 2
+    dir_deg_map = np.degrees(np.arctan2(dir_map[:, :, 0], dir_map[:, :, 1])) + 180
+    dir_deg_map = (dir_deg_map / deg_reduce)
+    print(dir_deg_map.min(), dir_deg_map.max())
+    dir_deg_map = dir_deg_map.astype(np.uint8) 
+
+    io.savemat(
+        osp.join(outdir, basename.replace("png", "mat")),
+        {"dir_deg": dir_deg_map, "depth": depth_map, 'deg_reduce': deg_reduce},
+        do_compression=True,
+    )
+    
+    try:
+        io.loadmat(osp.join(outdir, basename.replace("png", "mat")),)
+    except Exception as e:
+        print(e)
+        io.savemat(
+            osp.join(outdir, basename.replace("png", "mat")),
+            {"dir_deg": dir_deg_map, "depth": depth_map, 'deg_reduce': deg_reduce},
+            do_compression=False,
+        )
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--datadir", dest='datadir', default=osp.join(DATA_ROOT, 'cityscapes'))
+parser.add_argument("--outname", default='offset_gt/dt_offset')
+parser.add_argument('--split', nargs='+', default=['val', 'train'])
+parser.add_argument("--ksize", type=int, default=5)
+parser.add_argument('--metric', default='euc', choices=['euc', 'taxicab'])
+args = parser.parse_args()
+
+ksize = args.ksize
+
+sobel_x, sobel_y = (sobel_kernel((ksize, ksize), i) for i in (0, 1))
+sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(2, 1, ksize, ksize).float()
+
+for dataset in args.split:
+    indir = osp.join(args.datadir, dataset, 'label')
+    outdir = osp.join(args.datadir, dataset, args.outname)
+    os.makedirs(outdir, exist_ok=True)
+    args_to_apply = [(indir, outdir, osp.basename(basename)) for basename in glob(osp.join(indir, "*.png"))]
+    mpp.Pool(processes=mp.cpu_count() // 2).map(process, args_to_apply)
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/edge_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/edge_generator.py
new file mode 100644
index 0000000..bdb5881
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/edge_generator.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: RainbowSecrete
+# Generate the edge files and convert the labels of edge pixels / non-edge pixels to void.
+
+# /root/miniconda3/bin/python generate_edge.py
+
+import os
+import cv2
+import pdb
+import glob
+
+import numpy as np
+
+from PIL import Image
+from shutil import copyfile
+
+
+def generate_edge(label, edge_width=3):
+    h, w = label.shape
+    edge = np.zeros(label.shape, dtype=np.uint8)
+
+    # right
+    edge_right = edge[1:h, :]
+    edge_right[(label[1:h, :] != label[:h - 1, :]) & (label[1:h, :] != 255)
+               & (label[:h - 1, :] != 255)] = 255
+
+    # up
+    edge_up = edge[:, :w - 1]
+    edge_up[(label[:, :w - 1] != label[:, 1:w])
+            & (label[:, :w - 1] != 255)
+            & (label[:, 1:w] != 255)] = 255
+
+    # upright
+    edge_upright = edge[:h - 1, :w - 1]
+    edge_upright[(label[:h - 1, :w - 1] != label[1:h, 1:w])
+                 & (label[:h - 1, :w - 1] != 255)
+                 & (label[1:h, 1:w] != 255)] = 255
+
+    # bottomright
+    edge_bottomright = edge[:h - 1, 1:w]
+    edge_bottomright[(label[:h - 1, 1:w] != label[1:h, :w - 1])
+                     & (label[:h - 1, 1:w] != 255)
+                     & (label[1:h, :w - 1] != 255)] = 255
+
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (edge_width, edge_width))
+    edge = cv2.dilate(edge, kernel)
+
+    return edge
+
+
+def generate_train_val_edge(label_path, edge_path, kernel_size=10):
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        label = np.array(Image.open(label_path + label_file).convert('P'))
+        edge = generate_edge(label, kernel_size)
+        
+        im_edge = Image.fromarray(edge, 'P')
+
+        edge_file = label_file.replace('label', 'edge')
+        im_edge.save(edge_path + edge_file)
+
+        out_edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+
+def label_edge2void(label_path, edge_path, dest_label_path):
+    '''
+    Set the pixels along the edge as void label.
+    Used to train the models without supervision on the edge pixels.
+    '''
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+
+        label = np.array(Image.open(label_path + label_file).convert('P'))
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        label[edge == 255] = 255
+        label_update = Image.fromarray(label)
+
+        label_update.save(dest_label_path + label_file)
+
+
+def label_nedge2void(label_path, edge_path, dest_label_path):
+    '''
+    Set the pixels except the edge as void label.
+    Used to evaluate the performance of various models on the edge pixels.
+    '''
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+
+        label = np.array(Image.open(label_path + label_file).convert('P'))
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        label[edge == 0] = 255
+        label_update = Image.fromarray(label)
+        
+        label_update.save(dest_label_path + label_file)
+
+
+def calculate_edge(edge_path):
+    '''
+    Set the pixels except the edge as void label.
+    Used to evaluate the performance of various models on the edge pixels.
+    '''
+    edge_cnt = 0.0
+    non_edge_cnt = 0.0
+
+    print("ratio: {:f}".format(1/2))
+
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        edge_cnt += np.sum(edge == 255)
+        non_edge_cnt += np.sum(edge == 0)
+
+    print("ratio: {:f}".format(edge_cnt/non_edge_cnt))
+
+
+if __name__ == "__main__":
+    label_path = "/msravcshare/dataset/cityscapes/train/label/"
+    edge_path = "/msravcshare/dataset/cityscapes/train/edge/"
+    # generate_train_val_edge(label_path, edge_path, 10)
+
+    # label_edge2void_path = "/msravcshare/dataset/cityscapes/train/label_non_edge_void/"
+    label_nedge2void_path = "/msravcshare/dataset/cityscapes/train/label_non_edge_void/"
+
+    # label_edge2void(label_path, edge_path, label_edge2void_path)
+    label_nedge2void(label_path, edge_path, label_nedge2void_path)
+
+    # calculate_edge(edge_path)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_dt_offset_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_dt_offset_generator.py
new file mode 100644
index 0000000..4fb490c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_dt_offset_generator.py
@@ -0,0 +1,150 @@
+import os
+import cv2
+import sys
+import torch
+import argparse
+import subprocess
+import numpy as np
+from PIL import Image
+from glob import glob
+import os.path as osp
+import scipy.io as io
+import multiprocessing as mp
+import numpy.linalg as linalg
+import matplotlib.pyplot as plt
+import multiprocessing.pool as mpp
+from scipy.ndimage.morphology import distance_transform_edt, distance_transform_cdt
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..', '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+DATA_ROOT = subprocess.check_output(
+    ['bash', '-c', "source config.profile; echo $DATA_ROOT"]
+).decode().strip()
+
+
+def sobel_kernel(shape, axis):
+    """
+    shape must be odd: eg. (5,5)
+    axis is the direction, with 0 to positive x and 1 to positive y
+    """
+    k = np.zeros(shape)
+    p = [
+        (j, i)
+        for j in range(shape[0])
+        for i in range(shape[1])
+        if not (i == (shape[1] - 1) / 2.0 and j == (shape[0] - 1) / 2.0)
+    ]
+
+    for j, i in p:
+        j_ = int(j - (shape[0] - 1) / 2.0)
+        i_ = int(i - (shape[1] - 1) / 2.0)
+        k[j, i] = (i_ if axis == 0 else j_) / float(i_ * i_ + j_ * j_)
+    return torch.from_numpy(k).unsqueeze(0)
+
+label_list = set([7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33])
+stuff_label_list = set(range(24))
+inst_label_list = label_list - stuff_label_list
+
+from lib.utils.helpers.offset_helper import DTOffsetHelper
+
+def _vis_offset(_offset, image_name, out_dir, image=None, color=(0, 0, 255)):
+    if image is None:
+        color = 255
+        image = np.zeros_like(_offset[:, :, 0], dtype=np.uint8)
+
+    step = 3
+    coord_map = torch.stack(torch.meshgrid([torch.arange(
+        length) for length in _offset.shape[:-1]]), dim=-1).numpy().astype(np.int)
+    offset = (_offset * 5 + coord_map).astype(np.int)
+    for i in range(step//2, offset.shape[0], step):
+        for j in range(step//2, offset.shape[1], step):
+            if (_offset[i, j] == 0).all():
+                continue
+            cv2.arrowedLine(img=image, pt1=tuple(
+                coord_map[i, j][::-1]), pt2=tuple(offset[i, j][::-1]), color=color, thickness=1)
+    cv2.imwrite(os.path.join(out_dir, '{}.png'.format(image_name)), image)
+
+def process(inp):
+    (indir, outdir, basename) = inp
+    print(inp)
+    labelmap = np.array(Image.open(osp.join(indir, basename)))
+    depth_map = np.ones(labelmap.shape) * 0
+    dir_map = np.zeros((*labelmap.shape, 2))
+
+    ignore_id_list = set(range(256)) - label_list
+
+    for id in ignore_id_list:
+        labelmap[labelmap == id] = 255
+    
+    labelmap_flattened = np.unique(labelmap)
+    print(labelmap_flattened)
+
+    for id in labelmap_flattened:
+        labelmap_i = labelmap.copy()
+        labelmap_i[labelmap != id] = 0
+        labelmap_i[labelmap == id] = 1
+
+        # if labelmap_i.sum() < 100:
+        #     continue
+
+        if args.metric == 'euc':
+            depth_i = distance_transform_edt(labelmap_i)
+        elif args.metric == 'taxicab':
+            depth_i = distance_transform_cdt(labelmap_i, metric='taxicab')
+        else:
+            raise RuntimeError
+        depth_map[labelmap_i == 1] = depth_i[labelmap_i == 1]
+
+        dir_i_before = dir_i = np.zeros_like(dir_map)
+        dir_i = torch.nn.functional.conv2d(torch.from_numpy(depth_i).float().view(1, 1, *depth_i.shape), sobel_ker, padding=ksize//2).squeeze().permute(1, 2, 0).numpy()
+
+        # The following line is necessary
+        dir_i[(labelmap_i == 0), :] = 0
+        
+        dir_map += dir_i
+    depth_map[depth_map > 250] = 250
+    depth_map = depth_map.astype(np.uint8)
+    # print(np.unique(depth_map))
+    deg_reduce = 2
+    dir_deg_map = np.degrees(np.arctan2(dir_map[:, :, 0], dir_map[:, :, 1])) + 180
+    dir_deg_map = (dir_deg_map / deg_reduce)
+    print(dir_deg_map.min(), dir_deg_map.max())
+    dir_deg_map = dir_deg_map.astype(np.uint8) 
+    dct = {"dir_deg": dir_deg_map, "depth": depth_map, 'deg_reduce': deg_reduce}
+    safe_savemat(
+        osp.join(outdir, basename.replace("png", "mat")),
+        dct
+    )
+
+
+def safe_savemat(fn, dct):
+    io.savemat(fn, dct, do_compression=True)
+    try:
+        io.loadmat(fn)
+    except Exception as e:
+        print(e)
+        io.savemat(fn, dct, do_compression=False)
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--datadir", dest='datadir', default=osp.join(DATA_ROOT,'cityscapes'))
+parser.add_argument("--outname", default='offset_gt/dt_offset_inst_w_stuff')
+parser.add_argument('--split', nargs='+', default=['val', 'train'])
+parser.add_argument("--ksize", type=int, default=5)
+parser.add_argument('--metric', default='euc', choices=['euc', 'taxicab'])
+args = parser.parse_args()
+
+ksize = args.ksize
+
+sobel_x, sobel_y = (sobel_kernel((ksize, ksize), i) for i in (0, 1))
+sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(2, 1, ksize, ksize).float()
+
+for dataset in args.split:
+    indir = osp.join(args.datadir, dataset, 'instance')
+    outdir = osp.join(args.datadir, dataset, args.outname)
+    os.makedirs(outdir, exist_ok=True)
+    args_to_apply = [(indir, outdir, osp.basename(basename)) for basename in glob(osp.join(indir, "*.png"))]
+    mpp.Pool(processes=mp.cpu_count() // 2).map(process, args_to_apply)
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_edge_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_edge_generator.py
new file mode 100644
index 0000000..2b87d2d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/cityscapes/instance_edge_generator.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Lang Huang
+# Generate the edge files and convert the labels of edge pixels / non-edge pixels to void.
+# Small objects will be ignored.
+
+import os
+import cv2
+import pdb
+import glob
+
+import numpy as np
+
+from PIL import Image
+from shutil import copyfile
+
+
+def _generate_edge(label):
+    h, w = label.shape
+    edge = np.zeros(label.shape, dtype=np.uint8)
+
+    # right
+    edge_right = edge[1:h, :]
+    edge_right[(label[1:h, :] != label[:h - 1, :]) & (label[1:h, :] != 255)
+               & (label[:h - 1, :] != 255)] = 255
+
+    # up
+    edge_up = edge[:, :w - 1]
+    edge_up[(label[:, :w - 1] != label[:, 1:w])
+            & (label[:, :w - 1] != 255)
+            & (label[:, 1:w] != 255)] = 255
+
+    # upright
+    edge_upright = edge[:h - 1, :w - 1]
+    edge_upright[(label[:h - 1, :w - 1] != label[1:h, 1:w])
+                 & (label[:h - 1, :w - 1] != 255)
+                 & (label[1:h, 1:w] != 255)] = 255
+
+    # bottomright
+    edge_bottomright = edge[:h - 1, 1:w]
+    edge_bottomright[(label[:h - 1, 1:w] != label[1:h, :w - 1])
+                     & (label[:h - 1, 1:w] != 255)
+                     & (label[1:h, :w - 1] != 255)] = 255
+
+    return edge
+
+
+# def generate_edge(label, edge_width=10):
+#     area_thrs = 4900
+#     edge = np.zeros_like(label, dtype=np.uint8)
+#     valid_contour = []
+#     for i in np.unique(label):        
+#         temp = (label == i).astype(np.uint8)
+#         _, contours, _ = cv2.findContours(temp, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+        
+#         for contour in contours:
+#             # check the area
+#             area = cv2.contourArea(contour)
+#             if area < area_thrs:
+#                 continue
+            
+#             # check the minimum height/width
+#             rect = cv2.minAreaRect(contour)
+#             w, h = rect[1]
+#             if w < edge_width * 2 or h < edge_width * 2:
+#                 continue
+            
+#             valid_contour.append(contour)
+    
+#     # draw valid contours as edge
+#     cv2.drawContours(edge, valid_contour, -1, 255, thickness=1)
+    
+#     # dilation on edge
+#     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (edge_width, edge_width))
+#     edge = cv2.dilate(edge, kernel)
+#     return edge
+
+
+def _get_bbox(img):
+        rows = np.any(img, axis=1)
+        cols = np.any(img, axis=0)
+        rmin, rmax = np.where(rows)[0][[0, -1]]
+        cmin, cmax = np.where(cols)[0][[0, -1]]
+
+        return max(0, rmin - 1), min(rmax + 1, img.shape[0] - 1), max(0, cmin - 1), min(cmax + 1, img.shape[1] - 1)
+
+def generate_edge(label, edge_width=10, area_thrs=200):
+    label_list = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
+    edge = np.zeros_like(label, dtype=np.uint8)
+    for i in np.unique(label):
+        # have no instance
+        if i < 1000 or (i // 1000) not in label_list:
+            continue
+        
+        # filter out small objects
+        mask = (label == i).astype(np.uint8)
+        if mask.sum() < area_thrs:
+            continue
+        
+        rmin, rmax, cmin, cmax = _get_bbox(mask)
+        mask_edge = _generate_edge(mask[rmin:rmax+1, cmin:cmax+1])
+        edge[rmin:rmax+1, cmin:cmax+1][mask_edge > 0] = 255
+    
+    # dilation on edge
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (edge_width, edge_width))
+    edge = cv2.dilate(edge, kernel)
+    return edge
+
+
+def generate_train_val_edge(label_path, edge_path, kernel_size=10, area_thrs=200):
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        label = np.array(Image.open(label_path + label_file))
+        edge = generate_edge(label, kernel_size, area_thrs=area_thrs)
+        
+        im_edge = Image.fromarray(edge, 'P')
+
+        edge_file = label_file.replace('label', 'edge')
+        im_edge.save(edge_path + edge_file)
+
+        # out_edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+def get_cityscapes_colors():
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    num_cls = 20
+    colors = [0] * (num_cls * 3)
+    colors[0:3] = (128, 64, 128)       # 0: 'road'
+    colors[3:6] = (244, 35,232)        # 1 'sidewalk'
+    colors[6:9] = (70, 70, 70)         # 2''building'
+    colors[9:12] = (102,102,156)       # 3 wall
+    colors[12:15] =  (190,153,153)     # 4 fence
+    colors[15:18] = (153,153,153)      # 5 pole
+    colors[18:21] = (250,170, 30)      # 6 'traffic light'
+    colors[21:24] = (220,220, 0)       # 7 'traffic sign'
+    colors[24:27] = (107,142, 35)      # 8 'vegetation'
+    colors[27:30] = (152,251,152)      # 9 'terrain'
+    colors[30:33] = ( 70,130,180)      # 10 sky
+    colors[33:36] = (220, 20, 60)      # 11 person
+    colors[36:39] = (255, 0, 0)        # 12 rider
+    colors[39:42] = (0, 0, 142)        # 13 car
+    colors[42:45] = (0, 0, 70)         # 14 truck
+    colors[45:48] = (0, 60,100)        # 15 bus
+    colors[48:51] = (0, 80,100)        # 16 train
+    colors[51:54] = (0, 0,230)         # 17 'motorcycle'
+    colors[54:57] = (119, 11, 32)      # 18 'bicycle'
+    colors[57:60] = (105, 105, 105)
+    return colors
+
+def _encode_label(labelmap):
+    label_list = [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
+    encoded_labelmap = np.ones_like(labelmap, dtype=np.uint8) * 255
+    for i, class_id in enumerate(label_list):
+        encoded_labelmap[labelmap == class_id] = i
+
+    return encoded_labelmap
+
+def label_edge2void(label_path, edge_path, dest_label_path):
+    '''
+    Set the pixels along the edge as void label.
+    Used to train the models without supervision on the edge pixels.
+    '''
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+
+        label = np.array(Image.open(label_path + label_file).convert('P'))
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        label[edge == 255] = 255
+        # label = _encode_label(label)
+        label_update = Image.fromarray(label)
+        label_update.putpalette(get_cityscapes_colors())
+
+        label_update.save(dest_label_path + label_file)
+
+
+def label_nedge2void(label_path, edge_path, dest_label_path):
+    '''
+    Set the pixels except the edge as void label.
+    Used to evaluate the performance of various models on the edge pixels.
+    '''
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+
+        label = np.array(Image.open(label_path + label_file).convert('P'))
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        label[edge == 0] = 255
+        label_update = Image.fromarray(label)
+        
+        label_update.save(dest_label_path + label_file)
+
+
+def calculate_edge(edge_path):
+    '''
+    Set the pixels except the edge as void label.
+    Used to evaluate the performance of various models on the edge pixels.
+    '''
+    edge_cnt = 0.0
+    non_edge_cnt = 0.0
+
+    print("ratio: {:f}".format(1/2))
+
+    for label_file in os.listdir(label_path):
+        print(label_file)
+        edge_file = label_file.replace('label', 'edge')
+        edge = np.array(Image.open(edge_path + edge_file).convert('P'))
+
+        edge_cnt += np.sum(edge == 255)
+        non_edge_cnt += np.sum(edge == 0)
+
+    print("ratio: {:f}".format(edge_cnt/non_edge_cnt))
+
+
+if __name__ == "__main__":
+    label_path = "/home/huanglang/datasets/Cityscape/val/label/"
+    instance_path = "/home/huanglang/datasets/Cityscape/val/instance/"
+    edge_path = "/home/huanglang/datasets/Cityscape/val/edge_instance/"
+    if not os.path.exists(edge_path):
+        os.makedirs(edge_path)
+    
+    generate_train_val_edge(instance_path, edge_path, 10, area_thrs=3600)
+
+    label_edge2void_path = "/home/huanglang/datasets/Cityscape/edge_inst_width10/val/label_edge_void/"
+    label_nedge2void_path = "/home/huanglang/datasets/Cityscape/edge_inst_width10/val/label_non_edge_void/"
+    if not os.path.exists(label_edge2void_path):
+        os.makedirs(label_edge2void_path)
+    if not os.path.exists(label_nedge2void_path):
+        os.makedirs(label_nedge2void_path)
+
+    label_edge2void(label_path, edge_path, label_edge2void_path)
+    label_nedge2void(label_path, edge_path, label_nedge2void_path)
+
+    calculate_edge(edge_path)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/coco_stuff/coco_stuff_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/coco_stuff/coco_stuff_generator.py
new file mode 100644
index 0000000..2beece9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/coco_stuff/coco_stuff_generator.py
@@ -0,0 +1,65 @@
+import os
+import json
+import shutil
+import argparse
+from pathlib import Path
+import multiprocessing as mp
+import multiprocessing.pool as mpp
+from collections import defaultdict
+
+import numpy as np
+import scipy.io as io
+from PIL import Image
+import pycocotools.mask as mask_util
+
+
+class COCOProcessor:
+    def build(self, name):
+        in_label = args.ori_root_dir / 'annotations' / (name + '.mat')
+        return io.loadmat(str(in_label))['S'].astype(np.uint8)
+
+
+def process(inputs):
+    split, name = inputs
+    print('Processing', name, split)
+    in_img = args.ori_root_dir / 'images' / (name + '.jpg')
+    out_img: Path = args.save_dir / split / 'images' / (name + '.jpg')
+    out_img.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy(str(in_img), str(out_img))
+
+    out_label: Path = args.save_dir / split / 'label' / (name + '.png')
+    labelmap = coco.build(name)
+
+    if args.validate_dir is not None:
+        validate_label = args.validate_dir / split / 'label' / (name + '.png')
+        validate_labelmap = np.array(Image.open(str(validate_label))).astype(
+            np.uint8)
+        diff = (validate_labelmap != labelmap).sum() / labelmap.size * 100
+        if diff > 1:
+            print('{:.6f}%'.format(diff))
+        equal = (np.unique(validate_labelmap) == np.unique(labelmap))
+        assert equal if isinstance(equal, bool) else equal.all()
+
+    out_label.parent.mkdir(parents=True, exist_ok=True)
+    Image.fromarray(labelmap).save(str(out_label))
+
+
+def input_args():
+    with (args.ori_root_dir / 'imageLists' / 'test.txt').open() as f:
+        for name in f:
+            yield ('val', name.strip())
+
+    with (args.ori_root_dir / 'imageLists' / 'train.txt').open() as f:
+        for name in f:
+            yield ('train', name.strip())
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ori_root_dir', type=Path)
+    parser.add_argument('--save_dir', type=Path)
+    parser.add_argument('--validate_dir', type=lambda x: x and Path(x))
+    args = parser.parse_args()
+
+    coco = COCOProcessor()
+    mpp.Pool(processes=None).map(process, input_args())
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_color.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_color.py
new file mode 100644
index 0000000..59d8b8d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_color.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Reference: https://github.com/switchablenorms/CelebAMask-HQ/blob/master/face_parsing/Data_preprocessing/g_mask.py
+#            
+
+import os
+from PIL import Image
+import glob
+import numpy as np
+
+def make_folder(path):
+    if not os.path.exists(os.path.join(path)):
+        os.makedirs(os.path.join(path))
+
+if __name__ == "__main__":
+    color_list = [[0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204],
+                  [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], [102, 204, 0], [255, 255, 0],
+                  [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]]
+    root_path = '/home/yuhui/teamdrive/dataset/face_parse/CelebAMask-HQ/'
+
+    folder_base = root_path + 'CelebAMask-HQ-mask'
+    folder_save = root_path + 'CelebAMask-HQ-mask-color'
+
+    img_num = 10
+
+    make_folder(folder_save)
+
+    for k in range(img_num):
+        filename = os.path.join(folder_base, str(k) + '.png')
+        if (os.path.exists(filename)):
+            im_base = np.zeros((512, 512, 3))
+            im = Image.open(filename)
+            im = np.array(im)
+            for idx, color in enumerate(color_list):
+                im_base[im == idx] = color
+        filename_save = os.path.join(folder_save, str(k) + '.png')
+        result = Image.fromarray((im_base).astype(np.uint8))
+        print (filename_save)
+        result.save(filename_save)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_label_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_label_generator.py
new file mode 100644
index 0000000..ed57481
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_label_generator.py
@@ -0,0 +1,84 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Reference: https://github.com/switchablenorms/CelebAMask-HQ/blob/master/face_parsing/Data_preprocessing/g_mask.py
+#            
+
+# other resource: 
+#                   https://github.com/switchablenorms/CelebAMask-HQ
+#                   https://github.com/zllrunning/face-parsing.PyTorch
+#                   https://github.com/JACKYLUO1991/FaceParsing
+
+
+
+import os
+import cv2
+import glob
+import numpy as np
+
+label_list = ['skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear', 'r_ear', 
+              'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l', 'neck', 'cloth']
+
+def make_folder(path):
+    if not os.path.exists(os.path.join(path)):
+        os.makedirs(os.path.join(path))
+
+if __name__ == "__main__":
+    root_path = '/home/yuhui/teamdrive/dataset/face_parse/CelebAMask-HQ/'
+    folder_base = root_path + 'CelebAMask-HQ-mask-anno'
+    folder_save = root_path + 'CelebAMask-HQ-mask'
+    img_num = 30000
+    make_folder(folder_save)
+
+    for k in range(14700, img_num):
+        folder_num = k // 2000
+        im_base = np.zeros((512, 512))
+        for idx, label in enumerate(label_list):
+            filename = os.path.join(folder_base, str(folder_num), str(k).rjust(5, '0') + '_' + label + '.png')
+            if (os.path.exists(filename)):
+                print (label, idx+1)
+                im = cv2.imread(filename)
+                im = im[:, :, 0]
+                im_base[im != 0] = (idx + 1)
+
+        filename_save = os.path.join(folder_save, str(k) + '.png')
+        print (filename_save)
+        cv2.imwrite(filename_save, im_base)
+
+
+'''
+# based on https://raw.githubusercontent.com/zllrunning/face-parsing.PyTorch/master/prepropess_data.py
+import os.path as osp
+import os
+import cv2
+from PIL import Image
+import numpy as np
+root_path = '/home/yuhui/teamdrive/dataset/face_parse/CelebAMask-HQ/'
+face_data = root_path + 'CelebA-HQ-img'
+face_sep_mask = root_path + 'CelebAMask-HQ-mask-anno'
+mask_path = root_path + 'CelebAMaskHQ-mask'
+counter = 0
+total = 0
+for i in range(15):
+
+    atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
+            'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
+
+    for j in range(i * 2000, (i + 1) * 2000):
+
+        mask = np.zeros((512, 512))
+
+        for l, att in enumerate(atts, 1):
+            total += 1
+            file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
+            path = osp.join(face_sep_mask, str(i), file_name)
+
+            if os.path.exists(path):
+                counter += 1
+                sep_mask = np.array(Image.open(path).convert('P'))
+                # print(np.unique(sep_mask))
+                mask[sep_mask == 225] = l
+        cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
+        print(j)
+print(counter, total)
+'''
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_partition.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_partition.py
new file mode 100644
index 0000000..4636b88
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_partition.py
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Reference: https://github.com/switchablenorms/CelebAMask-HQ/blob/master/face_parsing/Data_preprocessing/g_mask.py
+#            
+
+import os
+import pdb
+import shutil
+import pandas as pd
+from shutil import copyfile
+
+def make_folder(path):
+    if not os.path.exists(os.path.join(path)):
+        os.makedirs(os.path.join(path))
+
+if __name__ == "__main__":
+    root_path = '/home/yuhui/teamdrive/dataset/face_parse/CelebAMask-HQ/'
+    #### source data path
+    s_label = root_path + 'CelebAMask-HQ-mask'
+    s_img = root_path + 'CelebA-HQ-img'
+    #### destination training data path
+    d_train_label = root_path + 'train/label'
+    d_train_img = root_path + 'train/image'
+    #### destination testing data path
+    d_test_label = root_path + 'test/label'
+    d_test_img = root_path + 'test/image'
+    #### val data path
+    d_val_label = root_path + 'val/label'
+    d_val_img = root_path + 'val/image'
+
+    #### make folderYY
+    make_folder(d_train_label)
+    make_folder(d_train_img)
+    make_folder(d_test_label)
+    make_folder(d_test_img)
+    make_folder(d_val_label)
+    make_folder(d_val_img)
+
+    #### calculate data counts in destination folder
+    train_count = 0
+    test_count = 0
+    val_count = 0
+
+    image_list = pd.read_csv(root_path + 'CelebA-HQ-to-CelebA-mapping.txt', delim_whitespace=True, header=None)
+    # f_train = open('train_list.txt', 'w')
+    # f_val = open('val_list.txt', 'w')
+    # f_test = open('test_list.txt', 'w')
+
+    for idx, x in enumerate(image_list.loc[:, 1]):
+        print (idx, x)
+        # if idx < 14700:
+        #     continue
+        # pdb.set_trace()
+        if x >= 162771 and x < 182638:
+            # copyfile(os.path.join(s_label, str(idx)+'.png'), os.path.join(d_val_label, str(val_count)+'.png'))
+            # copyfile(os.path.join(s_img, str(idx)+'.jpg'), os.path.join(d_val_img, str(val_count)+'.jpg'))        
+            val_count += 1
+        elif x >= 182638:
+            copyfile(os.path.join(s_label, str(idx)+'.png'), os.path.join(d_test_label, str(test_count)+'.png'))
+            copyfile(os.path.join(s_img, str(idx)+'.jpg'), os.path.join(d_test_img, str(test_count)+'.jpg'))
+            test_count += 1
+        else:
+            # copyfile(os.path.join(s_label, str(idx)+'.png'), os.path.join(d_train_label, str(train_count)+'.png'))
+            # copyfile(os.path.join(s_img, str(idx)+'.jpg'), os.path.join(d_train_img, str(train_count)+'.jpg'))
+            train_count += 1
+
+    print (train_count + test_count + val_count)
+    #### close the file
+    # f_train.close()
+    # f_val.close()
+    # f_test.close()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_resize.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_resize.py
new file mode 100644
index 0000000..db58433
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/celebmask_resize.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Reference: https://github.com/switchablenorms/CelebAMask-HQ/blob/master/face_parsing/Data_preprocessing/g_mask.py
+#            
+
+# other resource: 
+#                   https://github.com/switchablenorms/CelebAMask-HQ
+#                   https://github.com/zllrunning/face-parsing.PyTorch
+#                   https://github.com/JACKYLUO1991/FaceParsing
+
+
+
+import os
+import sys
+import cv2
+import glob
+import numpy as np
+
+from PIL import Image
+
+label_list = ['skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear', 'r_ear', 
+              'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l', 'neck', 'cloth']
+
+def make_folder(path):
+    if not os.path.exists(os.path.join(path)):
+        os.makedirs(os.path.join(path))
+
+def resize_and_move(ori_path, dest_path):
+    dirs = os.listdir(ori_path)
+    for item in dirs:
+        print(item)
+        if os.path.isfile(ori_path+item):
+            im = Image.open(ori_path+item)
+            imResize = im.resize((512,512), Image.ANTIALIAS)
+            imResize.save(dest_path+item, 'JPEG', quality=90)
+
+if __name__ == "__main__":
+    root_path = '/home/yuhui/teamdrive/dataset/face_parse/CelebAMask-HQ/'
+    val_folder = root_path + 'val/image/'
+    resized_val_folder = root_path + 'val/image_resize/'
+    make_folder(resized_val_folder)
+    resize_and_move(val_folder, resized_val_folder)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/prepare_celeb.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/prepare_celeb.sh
new file mode 100644
index 0000000..f546e6c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/face/prepare_celeb.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+PYTHON="/data/anaconda/envs/pytorch1.6.0/bin/python"
+
+# $PYTHON celebmask_label_generator.py
+# $PYTHON celebmask_partition.py
+$PYTHON celebmask_resize.py
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/lip/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/lip/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/lip/lip.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/lip/lip.py
new file mode 100644
index 0000000..a2f6921
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/lip/lip.py
@@ -0,0 +1,200 @@
+import os
+import os.path as osp
+import numpy as np
+import random
+import collections
+import torch
+import torchvision
+import cv2
+from torch.utils import data 
+from PIL import Image as PILImage
+ 
+ 
+class LIPParsingEdgeDataSet(data.Dataset):
+    def __init__(self, root, list_path, max_iters=None, crop_size=(473, 473), 
+        scale=True, mirror=True, ignore_label=255, network="resnet101"):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        self.scale = scale
+        self.ignore_label = ignore_label
+        self.is_mirror = mirror 
+        
+        self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
+        if not max_iters==None:
+            self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) 
+                
+        self.files = []       
+        for item in self.img_ids:
+            image_path, label_path, label_rev_path, edge_path = item
+            name = osp.splitext(osp.basename(label_path))[0]  
+            img_file = osp.join(self.root, image_path)
+            label_file = osp.join(self.root, label_path) 
+            label_rev_file = osp.join(self.root, label_rev_path)
+            edge_file = osp.join(self.root, edge_path)
+            self.files.append({
+                "img": img_file,
+                "label": label_file,
+                "label_rev": label_rev_file, 
+                "edge": edge_file,
+                "name": name
+            })
+          
+    def __len__(self):
+        return len(self.files)
+ 
+    def generate_scale_label(self, image, label, edge):
+        f_scale = 0.5 + random.randint(0, 11) / 10.0
+        image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
+        label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
+        edge = cv2.resize(edge, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
+         
+        return image, label, edge
+     
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+          
+        name = datafiles["name"]  
+         
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
+        edge = cv2.imread(datafiles["edge"], cv2.IMREAD_GRAYSCALE)
+        edge[edge==255] = 1
+        label_rev = cv2.imread(datafiles["label_rev"], cv2.IMREAD_GRAYSCALE)
+         
+        size = image.shape 
+        if self.is_mirror:
+            flip = np.random.choice(2) * 2 - 1
+            image = image[:, ::flip, :] 
+            edge = edge[:, ::flip] 
+            if flip == -1:
+                label = label_rev 
+
+        if self.scale:
+            image, label, edge = self.generate_scale_label(image, label, edge)
+            
+        image = np.asarray(image, np.float32)
+
+        if self.network == "resnet101":
+            mean = (102.9801, 115.9465, 122.7717)
+            image = image[:,:,::-1]
+            image -= mean
+        else: #define other data pre-processing method
+            pass
+
+        img_h, img_w = label.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        
+        if pad_h > 0 or pad_w > 0:
+            img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+            label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT,
+                value=(self.ignore_label,))
+            edge_pad = cv2.copyMakeBorder(edge, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT,
+                value=(0.0,))
+        else:
+            img_pad, label_pad, edge_pad = image, label, edge
+
+        img_h, img_w = label_pad.shape
+        h_off = random.randint(0, img_h - self.crop_h)
+        w_off = random.randint(0, img_w - self.crop_w) 
+        image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        edge = np.asarray(edge_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+       
+        image = image.transpose((2, 0, 1)) 
+        return image.copy(), label.copy(), edge.copy(), np.array(size), name    
+  
+
+class LIPDataValSet(data.Dataset):
+    def __init__(self, root, list_path, crop_size=(473, 473)):
+        self.root = root 
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size   
+        self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
+        self.files = [] 
+
+        for item in self.img_ids:
+            image_path, label_path = item
+            name = osp.splitext(osp.basename(image_path))[0]
+            img_file = osp.join(self.root, image_path)
+            label_file = osp.join(self.root, label_path) 
+            self.files.append({
+                "img": img_file,  
+                "label": label_file,
+                "name": name
+            }) 
+    def generate_scale_image(self, image, f_scale): 
+        image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR) 
+        return image
+    
+    def resize_image(self, image, size): 
+        image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) 
+        return image
+    
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)   
+        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
+        ori_size = image.shape
+        image = self.resize_image(image, (self.crop_h, self.crop_w))
+         
+        name = datafiles["name"]
+        image = np.asarray(image, np.float32)
+        if self.network == "resnet101":
+            mean = (102.9801, 115.9465, 122.7717)
+            image = image[:,:,::-1]
+            image -= mean
+        else: #define other data pre-processing method
+            pass
+         
+        image = image.transpose((2, 0, 1))
+        return image, label,  np.array(ori_size), name
+     
+ 
+class LIPDataTestSet(data.Dataset):
+    def __init__(self, root, list_path, crop_size=(473, 473)):
+        self.root = root 
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size     
+        self.img_ids = [i_id.strip().split()[0] for i_id in open(list_path)]
+        self.files = []  
+        for image_path in self.img_ids:
+            name = osp.splitext(osp.basename(image_path))[0]
+            img_file = osp.join(self.root, image_path) 
+            self.files.append({
+                "img": img_file 
+            })
+
+    def __len__(self):
+        return len(self.files)
+    
+    def resize_image(self, image, size): 
+        image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) 
+        return image
+    
+    def __getitem__(self, index):
+        datafiles = self.files[index] 
+        name = osp.splitext(osp.basename(datafiles["img"]))[0]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)  
+        ori_size = image.shape 
+        image = self.resize_image(image, (self.crop_h, self.crop_w))
+         
+        image = np.asarray(image, np.float32)
+        if self.network == "resnet101":
+            mean = (102.9801, 115.9465, 122.7717)
+            image = image[:,:,::-1]
+            image -= mean
+        else: #define other data pre-processing method
+            pass
+        image = image.transpose((2, 0, 1))
+        
+        return image, np.array(ori_size), name
+    
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.py
new file mode 100644
index 0000000..c8107eb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: LayneH
+# COCO det data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import json
+import os
+import argparse
+import shutil
+import numpy as np
+import PIL.Image as Image
+import cv2
+
+
+LABEL_DIR = 'label'
+IMAGE_DIR = 'image'
+
+
+class MapillaryGenerator(object):
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+    def generate_label(self):
+        trans_idx = self.get_trans_idx()
+
+        # train_img_folder = os.path.join(self.args.ori_root_dir, 'images/training')
+        train_mask_folder = os.path.join(self.args.ori_root_dir, 'train/label')
+
+        # val_img_folder = os.path.join(self.args.ori_root_dir, 'images/validation')
+        val_mask_folder = os.path.join(self.args.ori_root_dir, 'val/label')
+
+        for filename in os.listdir(train_mask_folder):
+            print(filename)
+            if filename.endswith(".png"):
+                maskpath = os.path.join(train_mask_folder, filename)
+                if os.path.isfile(maskpath):
+                    mask = np.asarray(Image.open(maskpath))
+                    mask = trans_idx[mask]
+                    cv2.imwrite(os.path.join(self.train_label_dir, filename), mask.astype(np.uint8))
+                else:
+                    print('cannot find the mask:', maskpath)
+
+        for filename in os.listdir(val_mask_folder):
+            print(filename)
+            if filename.endswith(".png"):
+                maskpath = os.path.join(val_mask_folder, filename)
+                if os.path.isfile(maskpath):
+                    mask = np.asarray(Image.open(maskpath))
+                    mask = trans_idx[mask]
+                    cv2.imwrite(os.path.join(self.val_label_dir, filename), mask.astype(np.uint8))
+                else:
+                    print('cannot find the mask:', maskpath)
+    
+
+    def get_trans_idx(self):
+        # class name and index of cityscapes dataset
+        # [7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33],
+        class_name_dict = {7:'road', 8:'sidewalk', 11:'building', 12:'wall', 13:'fence', 17:'pole',
+                           19:'trafficlight', 20:'trafficsign', 21:'vegetation', 22:'terrain', 23:'sky',
+                           24:'person', 25:'rider', 26:'car', 27:'truck', 28:'bus', 31:'train',
+                           32:'motorcycle', 33:'bicycle'}
+        # class_name_dict = {0:'road', 1:'sidewalk', 2:'building', 3:'wall', 4:'fence', 5:'pole',
+        #            6:'trafficlight', 7:'trafficsign', 8:'vegetation', 9:'terrain', 10:'sky',
+        #            11:'person', 12:'rider', 13:'car', 14:'truck', 15:'bus', 16:'train',
+        #            17:'motorcycle', 18:'bicycle'}
+        class_name_dict = {v: k for k, v in class_name_dict.items()}
+
+        # class name and index of mapillary dataset
+        with open(os.path.join(self.args.ori_root_dir, 'config.json')) as config_file:
+            labels = json.load(config_file)['labels']
+
+        print("Following classes are mapped to corresponding classes in cityscapes:")
+        mapillary2city = [255] * len(labels)
+        ignored = []
+
+        for label_id, label in enumerate(labels):
+            name = label["readable"].lower().replace(' ', '').replace('-', '')
+            if name in class_name_dict.keys():
+                mapillary2city[label_id] = class_name_dict[name]
+                print("{} => {}: {} => {}".format(name, name, label_id, class_name_dict[name]))
+            elif "trafficsign" in name or "front" in name or "back" in name:
+                mapillary2city[label_id] = class_name_dict["trafficsign"]
+                print("{} => {}: {} => {}".format(name, "traffic sign", label_id, class_name_dict["trafficsign"]))
+            elif "onrail" in name:
+                mapillary2city[label_id] = class_name_dict["train"]
+                print("{} => {}: {} => {}".format(name, "train", label_id, class_name_dict["train"]))
+            elif "cyclist" in name or "rider" in name:
+                mapillary2city[label_id] = class_name_dict["rider"]
+                print("{} => {}: {} => {}".format(name, "rider", label_id, class_name_dict["rider"]))
+            elif "pole" in name or "streetlight" in name:
+                mapillary2city[label_id] = class_name_dict["pole"]
+                print("{} => {}: {} => {}".format(name, "pole", label_id, class_name_dict["pole"]))
+            elif "curb" in name or "pedestrianarea" in name:
+                mapillary2city[label_id] = class_name_dict["sidewalk"]
+                print("{} => {}: {} => {}".format(name, "sidewalk", label_id, class_name_dict["sidewalk"]))
+            elif "crosswalkplain" in name or "parking" in name or "bikelane" in name or "servicelane" in name or "lanemarking" in name:
+                mapillary2city[label_id] = class_name_dict["road"]
+                print("{} => {}: {} => {}".format(name, "road", label_id, class_name_dict["road"]))
+            else:
+                ignored.append(name)
+
+        print("\nFollowing classes are mapped to void class:")
+        print(ignored)
+        return np.asarray(mapillary2city, dtype=np.uint8)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    Mapillary_generator = MapillaryGenerator(args)
+    Mapillary_generator.generate_label()
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.sh
new file mode 100644
index 0000000..8594bb5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/mapillary/mapillary_generator.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+# check the enviroment info
+nvidia-smi
+# PYTHON="/root/miniconda3/bin/python"
+PYTHON="/data/anaconda/envs/py35/bin/python"
+
+ORI_ROOT_DIR='/msravcshare/dataset/mapillary-vista-v1.1'
+SAVE_DIR='/msravcshare/dataset/cityscapes/mapillary'
+
+mkdir -p ${SAVE_DIR}
+
+# directly copy images
+# mkdir -p ${SAVE_DIR}/train
+# cp -r ${ORI_ROOT_DIR}/training/images ${SAVE_DIR}/train/image
+
+# mkdir -p ${SAVE_DIR}/val
+# cp -r ${ORI_ROOT_DIR}/validation/images ${SAVE_DIR}/val/image
+
+
+${PYTHON} mapillary_generator.py --ori_root_dir $ORI_ROOT_DIR \
+                          --save_dir $SAVE_DIR
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.py
new file mode 100644
index 0000000..21f2c0f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Lang Huang(layenhuang@outlook.com)
+# Pascal Context aug data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import argparse
+import shutil
+import scipy.io as sio
+import cv2
+import numpy as np
+import torch
+
+
+LABEL_DIR = 'label'
+IMAGE_DIR = 'image'
+
+
+class PContextGenerator(object):
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+        self.train_image_dir = os.path.join(self.args.save_dir, 'train', image_dir)
+        self.val_image_dir = os.path.join(self.args.save_dir, 'val', image_dir)
+        if not os.path.exists(self.train_image_dir):
+            os.makedirs(self.train_image_dir)
+
+        if not os.path.exists(self.val_image_dir):
+            os.makedirs(self.val_image_dir)
+        
+        self.train_mask = torch.load(os.path.join( "train.pth"))
+        self.val_mask = torch.load(os.path.join("val.pth"))
+    
+
+    def generate_label(self):
+        train_img_folder = os.path.join(self.args.ori_root_dir, 'JPEGImages')
+        val_img_folder = os.path.join(self.args.ori_root_dir, 'JPEGImages')
+
+        for basename, mask in self.train_mask.items():
+            basename = str(basename)
+            print(basename)
+            basename = basename[:4] + "_" + basename[4:]
+            filename = basename + ".jpg"
+            imgpath = os.path.join(train_img_folder, filename)
+            shutil.copy(imgpath,
+                        os.path.join(self.train_image_dir, filename))
+            mask = np.asarray(mask)
+            cv2.imwrite(os.path.join(self.train_label_dir, basename + ".png"), mask)
+        
+        for basename, mask in self.val_mask.items():
+            basename = str(basename)
+            print(basename)
+            basename = basename[:4] + "_" + basename[4:]
+            filename = basename + ".jpg"
+            imgpath = os.path.join(val_img_folder, filename)
+            shutil.copy(imgpath,
+                        os.path.join(self.val_image_dir, filename))
+            mask = np.asarray(mask)
+            cv2.imwrite(os.path.join(self.val_label_dir, basename + ".png"), mask)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    pcontext_generator = PContextGenerator(args)
+    pcontext_generator.generate_label()
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.sh
new file mode 100644
index 0000000..5795809
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_context/pascal_context_generator.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# -*- coding:utf-8 -*-
+# Author: Lang Huang(layenhuang@outlook.com)
+# Pascal context aug data generator.
+
+PYTHON="/root/miniconda3/envs/pytorch1.0/bin/python"
+ORI_ROOT_DIR='/msravcshare/dataset/pascal_context/' #'/msravcshare/dataset/pcontext/'
+SAVE_DIR='/msravcshare/dataset/pascal_context/' #'/msravcshare/dataset/pcontext/'
+SCRIPT_DIR='/msravcshare/yuyua/code/segmentation/openseg.pytorch/lib/datasets/preprocess/pascal_context'
+
+cd ${ORI_ROOT_DIR}
+
+# if [ ! -f train.pth ]; then
+#     echo "Download training annotations"
+#     wget https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth
+# fi
+
+# if [ ! -f val.pth ]; then
+#     echo "Download val annotations"
+#     wget https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth
+# fi
+
+cd ${SCRIPT_DIR}
+echo "Start generation..."
+
+python pascal_context_generator.py --ori_root_dir ${ORI_ROOT_DIR} \
+                           --save_dir ${SAVE_DIR}
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_voc/pascal_voc_generator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_voc/pascal_voc_generator.py
new file mode 100644
index 0000000..8a633ed
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/pascal_voc/pascal_voc_generator.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Lang Huang(layenhuang@outlook.com)
+# Pascal Context aug data generator.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import argparse
+import shutil
+import scipy.io as sio
+import cv2
+import numpy as np
+import torch
+
+
+LABEL_DIR = 'label'
+IMAGE_DIR = 'image'
+
+
+class PascalVOCGenerator(object):
+    def __init__(self, args, image_dir=IMAGE_DIR, label_dir=LABEL_DIR):
+        self.args = args
+        self.train_label_dir = os.path.join(self.args.save_dir, 'train', label_dir)
+        self.val_label_dir = os.path.join(self.args.save_dir, 'val', label_dir)
+        if not os.path.exists(self.train_label_dir):
+            os.makedirs(self.train_label_dir)
+
+        if not os.path.exists(self.val_label_dir):
+            os.makedirs(self.val_label_dir)
+
+        self.train_image_dir = os.path.join(self.args.save_dir, 'train', image_dir)
+        self.val_image_dir = os.path.join(self.args.save_dir, 'val', image_dir)
+        if not os.path.exists(self.train_image_dir):
+            os.makedirs(self.train_image_dir)
+
+        if not os.path.exists(self.val_image_dir):
+            os.makedirs(self.val_image_dir)
+        
+        self.train_mask = torch.load(os.path.join(self.args.ori_root_dir, "PytorchEncoding/train.pth"))
+        self.val_mask = torch.load(os.path.join(self.args.ori_root_dir, "PytorchEncoding/val.pth"))
+    
+
+    def generate_label(self):
+        train_img_folder = os.path.join(self.args.ori_root_dir, 'JPEGImages')
+        val_img_folder = os.path.join(self.args.ori_root_dir, 'JPEGImages')
+
+        for basename, mask in self.train_mask.items():
+            basename = str(basename)
+            print(basename)
+            basename = basename[:4] + "_" + basename[4:]
+            filename = basename + ".jpg"
+            imgpath = os.path.join(train_img_folder, filename)
+            shutil.copy(imgpath,
+                        os.path.join(self.train_image_dir, filename))
+            mask = np.asarray(mask)
+            cv2.imwrite(os.path.join(self.train_label_dir, basename + ".png"), mask)
+        
+        for basename, mask in self.val_mask.items():
+            basename = str(basename)
+            print(basename)
+            basename = basename[:4] + "_" + basename[4:]
+            filename = basename + ".jpg"
+            imgpath = os.path.join(val_img_folder, filename)
+            shutil.copy(imgpath,
+                        os.path.join(self.val_image_dir, filename))
+            mask = np.asarray(mask)
+            cv2.imwrite(os.path.join(self.val_label_dir, basename + ".png"), mask)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--save_dir', default=None, type=str,
+                        dest='save_dir', help='The directory to save the data.')
+    parser.add_argument('--ori_root_dir', default=None, type=str,
+                        dest='ori_root_dir', help='The directory of the cityscapes data.')
+
+    args = parser.parse_args()
+
+    pcontext_generator = PContextGenerator(args)
+    pcontext_generator.generate_label()
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/FV.json b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/FV.json
new file mode 100644
index 0000000..7565aa6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/FV.json
@@ -0,0 +1,29 @@
+{
+  "extrinsic": {
+    "quaternion": [
+      0.592188269837962,
+      -0.584690916322556,
+      0.39504292969920435,
+      -0.3890895387065559
+    ],
+    "translation": [
+      3.7484,
+      0.0,
+      0.68133
+    ]
+  },
+  "intrinsic": {
+    "aspect_ratio": 1.0,
+    "cx_offset": 3.942,
+    "cy_offset": -3.093,
+    "height": 966.0,
+    "k1": 339.749,
+    "k2": -31.988,
+    "k3": 48.275,
+    "k4": -7.201,
+    "model": "radial_poly",
+    "poly_order": 4,
+    "width": 1280.0
+  },
+  "name": "FV"
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVL.json b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVL.json
new file mode 100644
index 0000000..71d6ddc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVL.json
@@ -0,0 +1,29 @@
+{
+  "extrinsic": {
+    "quaternion": [
+      0.9597923475490652,
+      0.24268656748912834,
+      -0.004160525068239742,
+      -0.14101265750901937
+    ],
+    "translation": [
+      1.9445999999999999,
+      0.9572999999999999,
+      0.9549
+    ]
+  },
+  "intrinsic": {
+    "aspect_ratio": 1.0,
+    "cx_offset": 1.829,
+    "cy_offset": -0.49,
+    "height": 966.0,
+    "k1": 335.497,
+    "k2": -11.41,
+    "k3": 22.009,
+    "k4": 2.539,
+    "model": "radial_poly",
+    "poly_order": 4,
+    "width": 1280.0
+  },
+  "name": "MVL"
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVR.json b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVR.json
new file mode 100644
index 0000000..c2d2024
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/MVR.json
@@ -0,0 +1,29 @@
+{
+  "extrinsic": {
+    "quaternion": [
+      0.23466017209146606,
+      0.9622497661995224,
+      -0.13751608301948323,
+      -0.00996584135669057
+    ],
+    "translation": [
+      1.9445999999999999,
+      -0.9572999999999999,
+      0.9753099999999999
+    ]
+  },
+  "intrinsic": {
+    "aspect_ratio": 1.0,
+    "cx_offset": 3.468,
+    "cy_offset": -1.705,
+    "height": 966.0,
+    "k1": 337.657,
+    "k2": -16.126,
+    "k3": 27.487,
+    "k4": 0.888,
+    "model": "radial_poly",
+    "poly_order": 4,
+    "width": 1280.0
+  },
+  "name": "MVR"
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/RV.json b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/RV.json
new file mode 100644
index 0000000..205516c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/RV.json
@@ -0,0 +1,29 @@
+{
+  "extrinsic": {
+    "quaternion": [
+      0.636314098922813,
+      0.6310055318192165,
+      -0.30810493642236725,
+      -0.3193864970185268
+    ],
+    "translation": [
+      -1.0678,
+      0.0545,
+      0.90934
+    ]
+  },
+  "intrinsic": {
+    "aspect_ratio": 1.0,
+    "cx_offset": 6.67,
+    "cy_offset": -3.291,
+    "height": 966.0,
+    "k1": 339.039,
+    "k2": -29.815,
+    "k3": 46.483,
+    "k4": -6.655,
+    "model": "radial_poly",
+    "poly_order": 4,
+    "width": 1280.0
+  },
+  "name": "RV"
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate.py
new file mode 100644
index 0000000..261250b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate.py
@@ -0,0 +1,64 @@
+import os
+import numpy as np
+import cv2, os, glob
+from scipy.spatial.transform import Rotation as SciRot
+from projection import Camera, RadialPolyCamProjection, CylindricalProjection, read_cam_from_json, \
+    create_img_projection_maps
+
+woodscape_dir = '/raid/Research/dataset/tfzhou/woodscape'
+save_dir = os.path.join(woodscape_dir, 'cylindrical')
+os.makedirs(save_dir, exist_ok=True)
+
+sets = ['test']
+
+for set in sets:
+    os.makedirs(os.path.join(save_dir, set, 'image'), exist_ok=True)
+    if set in ['train', 'val']:
+        os.makedirs(os.path.join(save_dir, set, 'label'), exist_ok=True)
+
+
+def make_cylindrical_cam(cam: Camera):
+    """generates a cylindrical camera with a centered horizon"""
+    assert isinstance(cam.lens, RadialPolyCamProjection)
+    # creates a cylindrical projection
+    lens = CylindricalProjection(cam.lens.coefficients[0])
+    rot_zxz = SciRot.from_matrix(cam.rotation).as_euler('zxz')
+    # adjust all angles to multiples of 90 degree
+    rot_zxz = np.round(rot_zxz / (np.pi / 2)) * (np.pi / 2)
+    # center horizon
+    rot_zxz[1] = np.pi / 2
+    # noinspection PyArgumentList
+    return Camera(
+        rotation=SciRot.from_euler(angles=rot_zxz, seq='zxz').as_matrix(),
+        translation=cam.translation,
+        lens=lens,
+        size=cam.size, principle_point=(cam.cx_offset, cam.cy_offset),
+        aspect_ratio=cam.aspect_ratio
+    )
+
+
+for set in sets:
+    if set == 'test':
+        imagefiles = glob.glob(os.path.join(woodscape_dir, set, '*.png'))
+    else:
+        imagefiles = glob.glob(os.path.join(woodscape_dir, set, 'image/*.png'))
+    for imagefile in imagefiles:
+        print(set, imagefile)
+        basename = os.path.basename(imagefile)
+        splits = basename.split('_')
+        type = splits[-1][:-4]
+
+        fisheye_cam = read_cam_from_json('{}.json'.format(type))
+        cylindrical_cam = make_cylindrical_cam(fisheye_cam)
+
+        fisheye_image = cv2.imread(imagefile)
+
+        map1, map2 = create_img_projection_maps(fisheye_cam, cylindrical_cam)
+        cylindrical_image = cv2.remap(fisheye_image, map1, map2, cv2.INTER_CUBIC)
+        cv2.imwrite(os.path.join(save_dir, set, 'image', basename), cylindrical_image)
+        if set in ['train', 'val']:
+            fisheye_label = cv2.imread(imagefile.replace('/image/', '/label/'), 0)
+            cylindrical_label = cv2.remap(fisheye_label, map1, map2, cv2.INTER_NEAREST)
+            cv2.imwrite(os.path.join(save_dir, set, 'label', basename), cylindrical_label)
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate_to_rect.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate_to_rect.py
new file mode 100644
index 0000000..54a1e22
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/calibrate_to_rect.py
@@ -0,0 +1,64 @@
+import os
+import numpy as np
+import cv2, os, glob
+from scipy.spatial.transform import Rotation as SciRot
+from projection import Camera, RadialPolyCamProjection, CylindricalProjection, read_cam_from_json, \
+    create_img_projection_maps, PinholeLens
+
+woodscape_dir = '/raid/Research/dataset/tfzhou/woodscape'
+save_dir = os.path.join(woodscape_dir, 'rect')
+os.makedirs(save_dir, exist_ok=True)
+
+sets = ['test']
+
+for set in sets:
+    os.makedirs(os.path.join(save_dir, set, 'image'), exist_ok=True)
+    if set in ['train', 'val']:
+        os.makedirs(os.path.join(save_dir, set, 'label'), exist_ok=True)
+
+
+def make_rect_cam(cam: Camera):
+    """generates a cylindrical camera with a centered horizon"""
+    assert isinstance(cam.lens, RadialPolyCamProjection)
+    # creates a cylindrical projection
+    lens = PinholeLens(cam.lens.coefficients[0])
+    rot_zxz = SciRot.from_matrix(cam.rotation).as_euler('zxz')
+    # adjust all angles to multiples of 90 degree
+    rot_zxz = np.round(rot_zxz / (np.pi / 2)) * (np.pi / 2)
+    # center horizon
+    rot_zxz[1] = np.pi / 2
+    # noinspection PyArgumentList
+    return Camera(
+        rotation=SciRot.from_euler(angles=rot_zxz, seq='zxz').as_matrix(),
+        translation=cam.translation,
+        lens=lens,
+        size=cam.size, principle_point=(cam.cx_offset, cam.cy_offset),
+        aspect_ratio=cam.aspect_ratio
+    )
+
+
+for set in sets:
+    if set == 'test':
+        imagefiles = glob.glob(os.path.join(woodscape_dir, set, '*.png'))
+    else:
+        imagefiles = glob.glob(os.path.join(woodscape_dir, set, 'image/*.png'))
+    for imagefile in imagefiles:
+        print(set, imagefile)
+        basename = os.path.basename(imagefile)
+        splits = basename.split('_')
+        type = splits[-1][:-4]
+
+        fisheye_cam = read_cam_from_json('{}.json'.format(type))
+        cylindrical_cam = make_rect_cam(fisheye_cam)
+
+        fisheye_image = cv2.imread(imagefile)
+
+        map1, map2 = create_img_projection_maps(fisheye_cam, cylindrical_cam)
+        cylindrical_image = cv2.remap(fisheye_image, map1, map2, cv2.INTER_CUBIC)
+        cv2.imwrite(os.path.join(save_dir, set, 'image', basename), cylindrical_image)
+        if set in ['train', 'val']:
+            fisheye_label = cv2.imread(imagefile.replace('/image/', '/label/'), 0)
+            cylindrical_label = cv2.remap(fisheye_label, map1, map2, cv2.INTER_NEAREST)
+            cv2.imwrite(os.path.join(save_dir, set, 'label', basename), cylindrical_label)
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/cylindrical_to_fisheye.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/cylindrical_to_fisheye.py
new file mode 100644
index 0000000..bb76ff2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/cylindrical_to_fisheye.py
@@ -0,0 +1,55 @@
+import os
+import numpy as np
+import cv2, os, glob
+from scipy.spatial.transform import Rotation as SciRot
+from projection import Camera, RadialPolyCamProjection, CylindricalProjection, read_cam_from_json, \
+    create_img_projection_maps
+
+woodscape_dir = '/raid/Research/dataset/tfzhou/woodscape/cylindrical'
+save_dir = os.path.join(woodscape_dir, 'cylindrical_to_fisheye')
+os.makedirs(save_dir, exist_ok=True)
+
+sets = ['test']
+
+for set in sets:
+    os.makedirs(os.path.join(save_dir, set), exist_ok=True)
+
+
+def make_cylindrical_cam(cam: Camera):
+    """generates a cylindrical camera with a centered horizon"""
+    assert isinstance(cam.lens, RadialPolyCamProjection)
+    # creates a cylindrical projection
+    lens = CylindricalProjection(cam.lens.coefficients[0])
+    rot_zxz = SciRot.from_matrix(cam.rotation).as_euler('zxz')
+    # adjust all angles to multiples of 90 degree
+    rot_zxz = np.round(rot_zxz / (np.pi / 2)) * (np.pi / 2)
+    # center horizon
+    rot_zxz[1] = np.pi / 2
+    # noinspection PyArgumentList
+    return Camera(
+        rotation=SciRot.from_euler(angles=rot_zxz, seq='zxz').as_matrix(),
+        translation=cam.translation,
+        lens=lens,
+        size=cam.size, principle_point=(cam.cx_offset, cam.cy_offset),
+        aspect_ratio=cam.aspect_ratio
+    )
+
+
+for set in sets:
+    imagefiles = glob.glob(os.path.join(woodscape_dir, set, '*.png'))
+    for imagefile in imagefiles:
+        print(set, imagefile)
+        basename = os.path.basename(imagefile)
+        splits = basename.split('_')
+        type = splits[-1][:-4]
+
+        fisheye_cam = read_cam_from_json('{}.json'.format(type))
+        cylindrical_cam = make_cylindrical_cam(fisheye_cam)
+
+        cylindrical_image = cv2.imread(imagefile)
+
+        map1, map2 = create_img_projection_maps(cylindrical_cam, fisheye_cam)
+        fisheye_image = cv2.remap(cylindrical_image, map1, map2, cv2.INTER_CUBIC)
+        cv2.imwrite(os.path.join(save_dir, set, basename), fisheye_image)
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/projection.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/projection.py
new file mode 100644
index 0000000..21f185d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/preprocess/woodscape/projection.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+# Copyright 2021 Valeo Schalter und Sensoren GmbH and contributors
+#
+# Author: Christian Witt <christian.witt@valeo.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import json
+import typing
+
+import numpy as np
+import cv2
+from scipy.spatial.transform import Rotation as SciRot
+
+
+def ensure_point_list(points, dim, concatenate=True, crop=True):
+    if isinstance(points, list):
+        points = np.array(points)
+    assert isinstance(points, np.ndarray)
+    assert points.ndim == 2
+
+    if crop:
+        for test_dim in range(4, dim, -1):
+            if points.shape[1] == test_dim:
+                new_shape = test_dim - 1
+                assert np.array_equal(points[:, new_shape], np.ones(points.shape[0]))
+                points = points[:, 0:new_shape]
+
+    if concatenate and points.shape[1] == (dim - 1):
+        points = np.concatenate((np.array(points), np.ones((points.shape[0], 1))), axis=1)
+
+    if points.shape[1] != dim:
+        raise AssertionError('points.shape[1] == dim failed ({} != {})'.format(points.shape[1], dim))
+    return points
+
+
+class Projection(object):
+    def project_3d_to_2d(self, cam_points: np.ndarray, invalid_value=np.nan):
+        raise NotImplementedError()
+
+    def project_2d_to_3d(self, lens_points: np.ndarray, norm: np.ndarray):
+        raise NotImplementedError()
+
+
+class PinholeLens(Projection):
+    def __init__(self, focal_length: typing.Union[float, list]):
+        self.focal_length = focal_length if isinstance(focal_length, (float, int)) else focal_length[0]
+        self.K = np.array([self.focal_length, self.focal_length, 1], dtype=float)
+
+    def project_3d_to_2d(self, cam_points, invalid_value=np.nan):
+        camera_points = ensure_point_list(cam_points, dim=3)
+        camera_points = camera_points * self.K
+        zs = camera_points[:, 2][:, np.newaxis]
+        uv = np.divide(camera_points[:, 0:2], zs, where=(zs != 0))
+
+        # mark points behind the camera (z <= 0) as invalid
+        uv[camera_points[:, 2] <= 0] = [invalid_value, invalid_value]
+        return uv
+
+    def project_2d_to_3d(self, image_points: np.ndarray, norms: np.ndarray):
+        image_points = ensure_point_list(image_points, dim=3)
+        norms = ensure_point_list(norms, dim=1)
+
+        xy_normed = image_points / self.K
+        xy_normed_norm = np.linalg.norm(np.array(xy_normed), axis=1)
+        z = norms / xy_normed_norm[:, np.newaxis]
+        return z * xy_normed
+
+
+class CylindricalProjection(Projection):
+    def __init__(self, focal_length: typing.Union[float, list]):
+        self.focal_length = focal_length if isinstance(focal_length, (float, int)) else focal_length[0]
+
+    def project_3d_to_2d(self, cam_points, invalid_value=np.nan):
+        camera_points = ensure_point_list(cam_points, dim=3)
+
+        theta = np.arctan2(camera_points.T[0], camera_points.T[2])
+        chi = np.sqrt(camera_points.T[0] * camera_points.T[0] + camera_points.T[2] * camera_points.T[2])
+
+        uv = np.zeros((camera_points.shape[0], 2))
+        uv.T[0] = self.focal_length * theta
+        uv.T[1] = self.focal_length * camera_points.T[1] * np.divide(1, chi, where=(chi != 0))
+        uv[chi == 0] = invalid_value
+        return uv
+
+    def project_2d_to_3d(self, image_points: np.ndarray, norms: np.ndarray):
+        image_points = ensure_point_list(image_points, dim=2)
+        norms = ensure_point_list(norms, dim=1)
+
+        outs = np.zeros((image_points.shape[0], 3))
+
+        theta = image_points.T[0] / self.focal_length
+        scale = np.divide(norms.flat,
+                          np.sqrt(image_points.T[1] * image_points.T[1] + self.focal_length * self.focal_length))
+        outs.T[0] = self.focal_length * np.sin(theta) * scale
+        outs.T[1] = image_points.T[1] * scale
+        outs.T[2] = self.focal_length * np.cos(theta) * scale
+        return outs
+
+
+class RadialPolyCamProjection(Projection):
+    def __init__(self, distortion_params: list):
+        self.coefficients = np.asarray(distortion_params)
+        self.power = np.array([np.arange(start=1, stop=self.coefficients.size + 1)]).T
+
+    def project_3d_to_2d(self, cam_points, invalid_value=np.nan):
+        camera_points = ensure_point_list(cam_points, dim=3)
+        chi = np.sqrt(camera_points.T[0] * camera_points.T[0] + camera_points.T[1] * camera_points.T[1])
+        theta = np.pi / 2.0 - np.arctan2(camera_points.T[2], chi)
+        rho = self._theta_to_rho(theta)
+        lens_points = np.divide(rho, chi, where=(chi != 0))[:, np.newaxis] * camera_points[:, 0:2]
+
+        # set (0, 0, 0) = np.nan
+        lens_points[(chi == 0) & (cam_points[:, 2] == 0)] = invalid_value
+        return lens_points
+
+    def project_2d_to_3d(self, lens_points: np.ndarray, norms: np.ndarray):
+        lens_points = ensure_point_list(lens_points, dim=2)
+        norms = ensure_point_list(norms, dim=1).reshape(norms.size)
+
+        rhos = np.linalg.norm(lens_points, axis=1)
+        thetas = self._rho_to_theta(rhos)
+        chis = norms * np.sin(thetas)
+        zs = norms * np.cos(thetas)
+        xy = np.divide(chis, rhos, where=(rhos != 0))[:, np.newaxis] * lens_points
+        xyz = np.hstack((xy, zs[:, np.newaxis]))
+        return xyz
+
+    def _theta_to_rho(self, theta):
+        return np.dot(self.coefficients, np.power(np.array([theta]), self.power))
+
+    def _rho_to_theta(self, rho):
+        coeff = list(reversed(self.coefficients))
+        results = np.zeros_like(rho)
+        for i, _r in enumerate(rho):
+            theta = np.roots([*coeff, -_r])
+            theta = np.real(theta[theta.imag == 0])
+            theta = theta[np.where(np.abs(theta) < np.pi)]
+            theta = np.min(theta) if theta.size > 0 else 0
+            results[i] = theta
+        return results
+
+
+class Camera(object):
+    def __init__(self, lens: Projection, translation, rotation, size, principle_point,
+                 aspect_ratio: float = 1.0):
+        self.lens = lens
+        pose = np.eye(4)
+        pose[0:3, 3] = translation
+        pose[0:3, 0:3] = rotation
+        self._pose = np.asarray(pose, dtype=float)
+        self._inv_pose = np.linalg.inv(self._pose)
+        self._size = np.array([size[0], size[1]], dtype=int)
+        self._principle_point = 0.5 * self._size + np.array([principle_point[0], principle_point[1]], dtype=float) - 0.5
+        self._aspect_ratio = np.array([1, aspect_ratio], dtype=float)
+
+    size = property(lambda self: self._size)
+    width = property(lambda self: self._size[0])
+    height = property(lambda self: self._size[1])
+    cx = property(lambda self: self._principle_point[0])
+    cy = property(lambda self: self._principle_point[1])
+    cx_offset = property(lambda self: self._principle_point[0] - 0.5 * self._size[0] + 0.5)
+    cy_offset = property(lambda self: self._principle_point[1] - 0.5 * self._size[1] + 0.5)
+    aspect_ratio = property(lambda self: self._aspect_ratio[1])
+
+    rotation = property(lambda self: self._pose[0:3, 0:3])
+    translation = property(lambda self: self._pose[0:3, 3])
+
+    def project_3d_to_2d(self, world_points: np.ndarray, do_clip=False, invalid_value=np.nan):
+        world_points = ensure_point_list(world_points, dim=4)
+
+        camera_points = world_points @ self._inv_pose.T
+        lens_points = self.lens.project_3d_to_2d(camera_points[:, 0:3], invalid_value=invalid_value)
+        screen_points = (lens_points * self._aspect_ratio) + self._principle_point
+        return self._apply_clip(screen_points, screen_points) if do_clip else screen_points
+
+    def project_2d_to_3d(self, screen_points: np.ndarray, norm: np.ndarray, do_clip=False):
+        screen_points = ensure_point_list(screen_points, dim=2, concatenate=False, crop=False)
+        norm = ensure_point_list(norm[:, np.newaxis], dim=1, concatenate=False, crop=False)
+        lens_points = (screen_points - self._principle_point) / self._aspect_ratio
+        lens_points = self._apply_clip(lens_points, screen_points) if do_clip else lens_points
+
+        camera_points = self.lens.project_2d_to_3d(lens_points, norm)
+
+        camera_points = ensure_point_list(camera_points, dim=4)
+        world_points = camera_points @ self._pose.T
+        return world_points[:, 0:3]
+
+    def _apply_clip(self, points, clip_source) -> np.ndarray:
+        if self._size[0] == 0 or self._size[1] == 0:
+            raise RuntimeError('clipping without a size is not possible')
+        mask = (clip_source[:, 0] < 0) | (clip_source[:, 0] >= self._size[0]) | \
+               (clip_source[:, 1] < 0) | (clip_source[:, 1] >= self._size[1])
+
+        points[mask] = [np.nan]
+        return points
+
+
+def create_img_projection_maps(source_cam: Camera, destination_cam: Camera):
+    """generates maps for cv2.remap to remap from one camera to another"""
+    u_map = np.zeros((destination_cam.height, destination_cam.width, 1), dtype=np.float32)
+    v_map = np.zeros((destination_cam.height, destination_cam.width, 1), dtype=np.float32)
+
+    destination_points_b = np.arange(destination_cam.height)
+
+    for u_px in range(destination_cam.width):
+        destination_points_a = np.ones(destination_cam.height) * u_px
+        destination_points = np.vstack((destination_points_a, destination_points_b)).T
+
+        source_points = source_cam.project_3d_to_2d(
+            destination_cam.project_2d_to_3d(destination_points, norm=np.array([1])))
+
+        u_map.T[0][u_px] = source_points.T[0]
+        v_map.T[0][u_px] = source_points.T[1]
+
+    map1, map2 = cv2.convertMaps(u_map, v_map, dstmap1type=cv2.CV_16SC2, nninterpolation=False)
+    return map1, map2
+
+
+def read_cam_from_json(path):
+    """generates a Camera object from a json file"""
+    with open(path) as f:
+        config = json.load(f)
+
+    intrinsic = config['intrinsic']
+    coefficients = [intrinsic['k1'], intrinsic['k2'], intrinsic['k3'], intrinsic['k4']]
+
+    cam = Camera(
+        rotation=SciRot.from_quat(config['extrinsic']['quaternion']).as_matrix(),
+        translation=config['extrinsic']['translation'],
+        lens=RadialPolyCamProjection(coefficients),
+        size=(intrinsic['width'], intrinsic['height']),
+        principle_point=(intrinsic['cx_offset'], intrinsic['cy_offset']),
+        aspect_ratio=intrinsic['aspect_ratio']
+    )
+
+    return cam
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/collate.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/collate.py
new file mode 100644
index 0000000..6279b29
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/collate.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data.dataloader import default_collate
+
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+def stack(batch, data_key=None, return_dc=False):
+    if isinstance(batch[0][data_key], DataContainer):
+        if batch[0][data_key].stack:
+            assert isinstance(batch[0][data_key].data, torch.Tensor)
+            samples = [sample[data_key].data for sample in batch]
+            return default_collate(samples)
+
+        elif not return_dc:
+            return [sample[data_key].data for sample in batch]
+
+        else:
+            return DataContainer([sample[data_key].data for sample in batch])
+
+    else:
+        return default_collate([sample[data_key] for sample in batch])
+
+
+def collate(batch, trans_dict):
+    data_keys = batch[0].keys()
+    if trans_dict['size_mode'] == 'diverse_size':
+        target_widths = [[batch][i]['img'].size(2) for i in range(len(batch))]
+        target_heights = [batch[i]['img'].size(1) for i in range(len(batch))]
+
+    elif trans_dict['size_mode'] == 'fix_size':
+        target_width, target_height = trans_dict['input_size']
+        target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch) 
+
+    elif trans_dict['size_mode'] == 'multi_size':
+        ms_input_size = trans_dict['ms_input_size']
+        target_width, target_height = ms_input_size[random.randint(0, len(ms_input_size) - 1)]
+        target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch) 
+
+    elif trans_dict['size_mode'] == 'max_size':
+        border_width = [sample['img'].size(2) for sample in batch]
+        border_height = [sample['img'].size(1) for sample in batch]
+        target_width, target_height = max(border_width), max(border_height)
+        target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch) 
+
+    else:
+        raise NotImplementedError('Size Mode {} is invalid!'.format(trans_dict['size_mode']))
+
+    if 'fit_stride' in trans_dict:
+        stride = trans_dict['fit_stride']
+        for i in range(len(batch)):
+            target_width, target_height = target_widths[i], target_heights[i]
+            pad_w = 0 if (target_width % stride == 0) else stride - (target_width % stride)  # right
+            pad_h = 0 if (target_height % stride == 0) else stride - (target_height % stride)  # down
+            target_widths[i] = target_width + pad_w
+            target_heights[i] = target_height + pad_h
+
+    for i in range(len(batch)):
+        target_width, target_height = target_widths[i], target_heights[i]
+
+        if 'meta' in data_keys:
+            batch[i]['meta'].data['input_size'] = [target_width, target_height]
+
+        channels, height, width = batch[i]['img'].size()
+        if height == target_height and width == target_width:
+            continue
+
+        scaled_size = [width, height]
+        # Log.info('{} {} {} {} {}'.format(target_width, target_height, scaled_size[0], scaled_size[1], trans_dict['align_method']))
+
+        if trans_dict['align_method'] in ['only_scale', 'scale_and_pad']:
+            w_scale_ratio = target_width / width
+            h_scale_ratio = target_height / height
+            if trans_dict['align_method'] == 'scale_and_pad':
+                w_scale_ratio = min(w_scale_ratio, h_scale_ratio)
+                h_scale_ratio = w_scale_ratio
+
+            scaled_size = (int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio)))
+            if 'meta' in data_keys and 'border_size' in batch[i]['meta'].data:
+                batch[i]['meta'].data['border_size'] = scaled_size
+
+            scaled_size_hw = (scaled_size[1], scaled_size[0])
+            batch[i]['img'] = DataContainer(F.interpolate(batch[i]['img'].data.unsqueeze(0),
+                                            scaled_size_hw, mode='bilinear', align_corners=True).squeeze(0), stack=True)
+            if 'labelmap' in data_keys:
+                labelmap = batch[i]['labelmap'].data.unsqueeze(0).unsqueeze(0).float()
+                labelmap = F.interpolate(labelmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
+                batch[i]['labelmap'] = DataContainer(labelmap, stack=True)
+
+            if 'maskmap' in data_keys:
+                maskmap = batch[i]['maskmap'].data.unsqueeze(0).unsqueeze(0).float()
+                maskmap = F.interpolate(maskmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
+                batch[i]['maskmap'].data = DataContainer(maskmap, stack=True)
+
+        pad_width = target_width - scaled_size[0]
+        pad_height = target_height - scaled_size[1]
+        assert pad_height >= 0 and pad_width >= 0
+        if pad_width > 0 or pad_height > 0:
+            assert trans_dict['align_method'] in ['only_pad', 'scale_and_pad']
+            left_pad = 0
+            up_pad = 0
+            if 'pad_mode' not in trans_dict or trans_dict['pad_mode'] == 'random':
+                left_pad = random.randint(0, pad_width)  # pad_left
+                up_pad = random.randint(0, pad_height)  # pad_up
+
+            elif trans_dict['pad_mode'] == 'pad_left_up':
+                left_pad = pad_width
+                up_pad = pad_height
+
+            elif trans_dict['pad_mode'] == 'pad_right_down':
+                left_pad = 0
+                up_pad = 0
+
+            elif trans_dict['pad_mode'] == 'pad_center':
+                left_pad = pad_width // 2
+                up_pad = pad_height // 2
+
+            elif trans_dict['pad_mode'] == 'pad_border':
+                if random.randint(0, 1) == 0:
+                    left_pad = pad_width
+                    up_pad = pad_height
+                else:
+                    left_pad = 0
+                    up_pad = 0
+            else:
+                Log.error('Invalid pad mode: {}'.format(trans_dict['pad_mode']))
+                exit(1)
+
+            pad = (left_pad, pad_width-left_pad, up_pad, pad_height-up_pad)
+
+            batch[i]['img'] = DataContainer(F.pad(batch[i]['img'].data, pad=pad, value=0), stack=batch[i]['img'].stack)
+
+            if 'labelmap' in data_keys:
+                batch[i]['labelmap'] = DataContainer(F.pad(batch[i]['labelmap'].data, pad=pad, value=-1), stack=batch[i]['labelmap'].stack)
+
+            if 'maskmap' in data_keys:
+                batch[i]['maskmap'] = DataContainer(F.pad(batch[i]['maskmap'].data, pad=pad, value=0), stack=batch[i]['maskmap'].stack)
+
+            if 'distance_map' in data_keys:
+                batch[i]['distance_map'] = DataContainer(F.pad(batch[i]['distance_map'].data, pad=pad, value=255), stack=batch[i]['distance_map'].stack)
+
+            if 'angle_map' in data_keys:
+                batch[i]['angle_map'] = DataContainer(F.pad(batch[i]['angle_map'].data, pad=pad, value=0), stack=batch[i]['angle_map'].stack)
+ 
+            if 'mask_label_map' in data_keys:
+                batch[i]['mask_label_map'] = DataContainer(F.pad(batch[i]['mask_label_map'].data, pad=pad, value=-1), stack=batch[i]['mask_label_map'].stack)
+
+            if 'direction_label_map' in data_keys:
+                batch[i]['direction_label_map'] = DataContainer(F.pad(batch[i]['direction_label_map'].data, pad=pad, value=-1), stack=batch[i]['direction_label_map'].stack)
+
+            if 'multi_label_direction_map' in data_keys:
+                batch[i]['multi_label_direction_map'] = DataContainer(F.pad(batch[i]['multi_label_direction_map'].data, pad=pad, value=-1), stack=batch[i]['multi_label_direction_map'].stack)
+                
+            if 'energy_label_map' in data_keys:
+                batch[i]['energy_label_map'] = DataContainer(F.pad(batch[i]['energy_label_map'].data, pad=pad, value=-1), stack=batch[i]['energy_label_map'].stack)
+
+            if 'offsetmap_h' in data_keys:
+                batch[i]['offsetmap_h'] = DataContainer(F.pad(batch[i]['offsetmap_h'].data, pad=pad, value=0), stack=batch[i]['offsetmap_h'].stack)
+
+            if 'offsetmap_w' in data_keys:
+                batch[i]['offsetmap_w'] = DataContainer(F.pad(batch[i]['offsetmap_w'].data, pad=pad, value=0), stack=batch[i]['offsetmap_w'].stack)
+
+    return dict({key: stack(batch, data_key=key) for key in data_keys})
+
+
+
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/cv2_aug_transforms.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/cv2_aug_transforms.py
new file mode 100644
index 0000000..007b58e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/cv2_aug_transforms.py
@@ -0,0 +1,903 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Jingyi Xie
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import math
+import random
+
+import cv2
+import numpy as np
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.datasets.tools.transforms import DeNormalize
+
+
+class _BaseTransform(object):
+    DATA_ITEMS = (
+        'labelmap', 'maskmap',
+        'distance_map', 'angle_map', 'multi_label_direction_map',
+        'boundary_map', 'offsetmap',
+        # 'offsetmap_h', 'offsetmap_w', 
+        'region_indexmap'
+    )
+
+    def __call__(self, img, **kwargs):
+
+        data_dict = collections.defaultdict(lambda: None)
+        data_dict.update(kwargs)
+
+        return img, data_dict
+
+    def _process(self, img, data_dict, skip_condition, *args, **kwargs):
+        assert isinstance(img, np.ndarray), \
+            "img should be numpy array, got {}.".format(type(img))
+        if not skip_condition:
+            img = self._process_img(img, *args, **kwargs)
+
+        ret_dict = collections.defaultdict(lambda: None)
+        for name in self.DATA_ITEMS:
+            func_name = '_process_' + name
+            x = data_dict[name]
+
+            assert isinstance(x, np.ndarray) or x is None, \
+                "{} should be numpy array or None, got {}.".format(
+                    name, type(x))
+
+            if hasattr(self, func_name) and x is not None and not skip_condition:
+                ret_dict[name] = getattr(self, func_name)(x, *args, **kwargs)
+            else:
+                ret_dict[name] = x
+
+        return img, ret_dict
+
+
+class Padding(_BaseTransform):
+    """ Padding the Image to proper size.
+            Args:
+                stride: the stride of the network.
+                pad_value: the value that pad to the image border.
+                img: Image object as input.
+            Returns::
+                img: Image object.
+    """
+
+    def __init__(self, pad=None, pad_ratio=0.5, mean=(104, 117, 123), allow_outside_center=True):
+        self.pad = pad
+        self.ratio = pad_ratio
+        self.mean = mean
+        self.allow_outside_center = allow_outside_center
+
+    def _pad(self, x, pad_value, height, width, target_size, offset_left, offset_up):
+        expand_x = np.zeros((
+            max(height, target_size[1]) + abs(offset_up),
+            max(width, target_size[0]) + abs(offset_left),
+            *x.shape[2:]
+        ), dtype=x.dtype)
+        expand_x[:, :] = pad_value
+        expand_x[
+        abs(min(offset_up, 0)):abs(min(offset_up, 0)) + height,
+        abs(min(offset_left, 0)):abs(min(offset_left, 0)) + width] = x
+        x = expand_x[
+            max(offset_up, 0):max(offset_up, 0) + target_size[1],
+            max(offset_left, 0):max(offset_left, 0) + target_size[0]
+            ]
+        return x
+
+    def _process_img(self, img, *args):
+        return self._pad(img, self.mean, *args)
+
+    def _process_labelmap(self, x, *args):
+        return self._pad(x, 255, *args)
+
+    def _process_region_indexmap(self, x, *args):
+        return self._pad(x, 0, *args)
+
+    def _process_maskmap(self, x, *args):
+        return self._pad(x, 1, *args)
+
+    def _process_distance_map(self, x, *args):
+        return self._pad(x, 255, *args)
+
+    def _process_angle_map(self, x, *args):
+        return self._pad(x, 0, *args)
+
+    def _process_boundary_map(self, x, *args):
+        return self._pad(x, 0, *args)
+
+    def _process_multi_label_direction_map(self, x, *args):
+        return self._pad(x, 0, *args)
+
+    # def _process_offsetmap_h(self, x, *args):
+    #     return self._pad(x, 0, *args)
+
+    # def _process_offsetmap_w(self, x, *args):
+    #     return self._pad(x, 0, *args)
+
+    def _process_offsetmap(self, x, *args):
+        return self._pad(x, 0, *args)
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        height, width, channels = img.shape
+        left_pad, up_pad, right_pad, down_pad = self.pad
+
+        target_size = [width + left_pad +
+                       right_pad, height + up_pad + down_pad]
+        offset_left = -left_pad
+        offset_up = -up_pad
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            height, width, target_size, offset_left, offset_up
+        )
+
+
+class RandomHFlip(_BaseTransform):
+    def __init__(self, swap_pair=None, flip_ratio=0.5):
+        self.swap_pair = swap_pair
+        self.ratio = flip_ratio
+
+    def _process_img(self, img):
+        return cv2.flip(img, 1)
+
+    def _process_labelmap(self, labelmap):
+        labelmap = cv2.flip(labelmap, 1)
+        # to handle datasets with left/right annatations
+        if self.swap_pair is not None:
+            assert isinstance(self.swap_pair, (tuple, list))
+            temp = labelmap.copy()
+            for pair in self.swap_pair:
+                assert isinstance(pair, (tuple, list)) and len(pair) == 2
+                labelmap[temp == pair[0]] = pair[1]
+                labelmap[temp == pair[1]] = pair[0]
+
+        return labelmap
+
+    def _process_region_indexmap(self, labelmap):
+        return cv2.flip(labelmap, 1)
+
+    def _process_maskmap(self, x):
+        return cv2.flip(x, 1)
+
+    def _process_distance_map(self, x):
+        return cv2.flip(x, 1)
+
+    def _process_angle_map(self, angle_map):
+        ret_angle_map = angle_map.copy()
+        mask = (angle_map > 0) & (angle_map < 180)
+        ret_angle_map[mask] = 180 - angle_map[mask]
+        mask = (angle_map < 0) & (angle_map > -180)
+        ret_angle_map[mask] = - (180 + angle_map[mask])
+        ret_angle_map = cv2.flip(ret_angle_map, 1)
+        return ret_angle_map
+
+    def _process_boundary_map(self, x):
+        return cv2.flip(x, 1)
+
+    def _process_multi_label_direction_map(self, multi_label_direction_map):
+        perm = [4, 3, 2, 1, 0, 7, 6, 5]
+        multi_label_direction_map = cv2.flip(multi_label_direction_map, 1)
+        multi_label_direction_map = multi_label_direction_map[..., perm]
+        return multi_label_direction_map
+
+    # def _process_offsetmap_h(self, x):
+    #     return cv2.flip(x, 1)
+
+    # def _process_offsetmap_w(self, x):
+    #     return -cv2.flip(x, 1)
+
+    def _process_offsetmap_w(self, x):
+        x = cv2.flip(x, 1)
+        x[..., 1] = -x[..., 1]
+        return x
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomSaturation(_BaseTransform):
+    def __init__(self, lower=0.5, upper=1.5, saturation_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = saturation_ratio
+        assert self.upper >= self.lower, "saturation upper must be >= lower."
+        assert self.lower >= 0, "saturation lower must be non-negative."
+
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+        img[:, :, 1] *= random.uniform(self.lower, self.upper)
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomHue(_BaseTransform):
+    def __init__(self, delta=18, hue_ratio=0.5):
+        assert 0 <= delta <= 360
+        self.delta = delta
+        self.ratio = hue_ratio
+
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+        img[:, :, 0] += random.uniform(-self.delta, self.delta)
+        img[:, :, 0][img[:, :, 0] > 360] -= 360
+        img[:, :, 0][img[:, :, 0] < 0] += 360
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomPerm(_BaseTransform):
+    def __init__(self, perm_ratio=0.5):
+        self.ratio = perm_ratio
+        self.perms = ((0, 1, 2), (0, 2, 1),
+                      (1, 0, 2), (1, 2, 0),
+                      (2, 0, 1), (2, 1, 0))
+
+    def _process_img(self, img):
+        swap = self.perms[random.randint(0, len(self.perms) - 1)]
+        img = img[:, :, swap].astype(np.uint8)
+        return img
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomContrast(_BaseTransform):
+    def __init__(self, lower=0.5, upper=1.5, contrast_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = contrast_ratio
+        assert self.upper >= self.lower, "contrast upper must be >= lower."
+        assert self.lower >= 0, "contrast lower must be non-negative."
+
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img *= random.uniform(self.lower, self.upper)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomBrightness(_BaseTransform):
+    def __init__(self, shift_value=30, brightness_ratio=0.5):
+        self.shift_value = shift_value
+        self.ratio = brightness_ratio
+
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        shift = random.randint(-self.shift_value, self.shift_value)
+        img[:, :, :] += shift
+        img = np.around(img)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+
+
+class RandomResize(_BaseTransform):
+    """Resize the given numpy.ndarray to random size and aspect ratio.
+
+    Args:
+        scale_min: the min scale to resize.
+        scale_max: the max scale to resize.
+    """
+
+    def __init__(self, scale_range=(0.75, 1.25), aspect_range=(0.9, 1.1), target_size=None,
+                 resize_bound=None, method='random', max_side_bound=None, scale_list=None, resize_ratio=0.5):
+        self.scale_range = scale_range
+        self.aspect_range = aspect_range
+        self.resize_bound = resize_bound
+        self.max_side_bound = max_side_bound
+        self.scale_list = scale_list
+        self.method = method
+        self.ratio = resize_ratio
+
+        if target_size is not None:
+            if isinstance(target_size, int):
+                self.input_size = (target_size, target_size)
+            elif isinstance(target_size, (list, tuple)) and len(target_size) == 2:
+                self.input_size = target_size
+            else:
+                raise TypeError(
+                    'Got inappropriate size arg: {}'.format(target_size))
+        else:
+            self.input_size = None
+
+    def get_scale(self, img_size):
+        if self.method == 'random':
+            scale_ratio = random.uniform(
+                self.scale_range[0], self.scale_range[1])
+            return scale_ratio
+
+        elif self.method == 'bound':
+            scale1 = self.resize_bound[0] / min(img_size)
+            scale2 = self.resize_bound[1] / max(img_size)
+            scale = min(scale1, scale2)
+            return scale
+
+        else:
+            Log.error('Resize method {} is invalid.'.format(self.method))
+            exit(1)
+
+    def _process_img(self, img, converted_size, *args):
+        return cv2.resize(img, converted_size, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
+
+    def _process_labelmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_region_indexmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_maskmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_distance_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_angle_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_boundary_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def _process_multi_label_direction_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    # def _process_offsetmap_h(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+    #     return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST) * h_scale_ratio
+
+    # def _process_offsetmap_w(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+    #     return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST) * w_scale_ratio
+
+    def _process_offsetmap(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img     (Image):   Image to be resized.
+            maskmap    (Image):   Mask to be resized.
+            kpt     (list):    keypoints to be resized.
+            center: (list):    center points to be resized.
+
+        Returns:
+            Image:  Randomly resize image.
+            Image:  Randomly resize maskmap.
+            list:   Randomly resize keypoints.
+            list:   Randomly resize center points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+
+        height, width, _ = img.shape
+        if self.scale_list is None:
+            scale_ratio = self.get_scale([width, height])
+        else:
+            scale_ratio = self.scale_list[random.randint(
+                0, len(self.scale_list) - 1)]
+
+        aspect_ratio = random.uniform(*self.aspect_range)
+        w_scale_ratio = math.sqrt(aspect_ratio) * scale_ratio
+        h_scale_ratio = math.sqrt(1.0 / aspect_ratio) * scale_ratio
+        if self.max_side_bound is not None and max(height * h_scale_ratio, width * w_scale_ratio) > self.max_side_bound:
+            d_ratio = self.max_side_bound / max(height * h_scale_ratio, width * w_scale_ratio)
+            w_scale_ratio *= d_ratio
+            h_scale_ratio *= d_ratio
+
+        converted_size = (int(width * w_scale_ratio),
+                          int(height * h_scale_ratio))
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            converted_size, h_scale_ratio, w_scale_ratio
+        )
+
+
+class RandomRotate(_BaseTransform):
+    """Rotate the input numpy.ndarray and points to the given degree.
+
+    Args:
+        degree (number): Desired rotate degree.
+    """
+
+    def __init__(self, max_degree, rotate_ratio=0.5, mean=(104, 117, 123)):
+        assert isinstance(max_degree, int), '{}'.format(type(max_degree))
+        self.max_degree = max_degree
+        self.ratio = rotate_ratio
+        self.mean = mean
+        Log.warn(
+            'Currently `RandomRotate` is only implemented for `img`, `labelmap` and `maskmap`.')
+
+    def _warp(self, x, border_value, rotate_mat, new_width, new_height):
+        return cv2.warpAffine(x, rotate_mat, (new_width, new_height), borderValue=border_value)
+
+    def _process_img(self, x, *args):
+        return self._warp(x, self.mean, *args).astype(np.uint8)
+
+    def _process_labelmap(self, x, *args):
+        return self._warp(x, (255, 255, 255), *args).astype(np.uint8)
+
+    def _process_maskmap(self, x, *args):
+        return self._warp(x, (1, 1, 1), *args).astype(np.uint8)
+
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img    (Image):     Image to be rotated.
+            maskmap   (Image):     Mask to be rotated.
+            kpt    (list):      Keypoints to be rotated.
+            center (list):      Center points to be rotated.
+
+        Returns:
+            Image:     Rotated image.
+            list:      Rotated key points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+
+        rotate_degree = random.uniform(-self.max_degree, self.max_degree)
+        height, width, _ = img.shape
+        img_center = (width / 2.0, height / 2.0)
+        rotate_mat = cv2.getRotationMatrix2D(img_center, rotate_degree, 1.0)
+        cos_val = np.abs(rotate_mat[0, 0])
+        sin_val = np.abs(rotate_mat[0, 1])
+        new_width = int(height * sin_val + width * cos_val)
+        new_height = int(height * cos_val + width * sin_val)
+        rotate_mat[0, 2] += (new_width / 2.) - img_center[0]
+        rotate_mat[1, 2] += (new_height / 2.) - img_center[1]
+
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            rotate_mat, new_width, new_height
+        )
+
+
+class RandomCrop(_BaseTransform):
+    """Crop the given numpy.ndarray and  at a random location.
+
+    Args:
+        size (int or tuple): Desired output size of the crop.(w, h)
+    """
+
+    def __init__(self, crop_size, crop_ratio=0.5, method='random', grid=None, allow_outside_center=True):
+        self.ratio = crop_ratio
+        self.method = method
+        self.grid = grid
+        self.allow_outside_center = allow_outside_center
+
+        if isinstance(crop_size, float):
+            self.size = (crop_size, crop_size)
+        elif isinstance(crop_size, collections.Iterable) and len(crop_size) == 2:
+            self.size = crop_size
+        else:
+            raise TypeError('Got inappropriate size arg: {}'.format(crop_size))
+
+    def get_lefttop(self, crop_size, img_size):
+        if self.method == 'center':
+            return [(img_size[0] - crop_size[0]) // 2, (img_size[1] - crop_size[1]) // 2]
+
+        elif self.method == 'random':
+            x = random.randint(0, img_size[0] - crop_size[0])
+            y = random.randint(0, img_size[1] - crop_size[1])
+            return [x, y]
+
+        elif self.method == 'grid':
+            grid_x = random.randint(0, self.grid[0] - 1)
+            grid_y = random.randint(0, self.grid[1] - 1)
+            x = grid_x * ((img_size[0] - crop_size[0]) // (self.grid[0] - 1))
+            y = grid_y * ((img_size[1] - crop_size[1]) // (self.grid[1] - 1))
+            return [x, y]
+
+        else:
+            Log.error('Crop method {} is invalid.'.format(self.method))
+            exit(1)
+
+    def _crop(self, x, offset_up, offset_left, target_size):
+        return x[offset_up:offset_up + target_size[1], offset_left:offset_left + target_size[0]]
+
+    def _process_img(self, img, *args):
+        return self._crop(img, *args)
+
+    def _process_labelmap(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_region_indexmap(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_maskmap(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_distance_map(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_angle_map(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_boundary_map(self, x, *args):
+        return self._crop(x, *args)
+
+    def _process_multi_label_direction_map(self, x, *args):
+        return self._crop(x, *args)
+
+    # def _process_offsetmap_h(self, x, *args):
+    #     return self._crop(x, *args)
+
+    # def _process_offsetmap_w(self, x, *args):
+    #     return self._crop(x, *args)
+
+    def _process_offsetmap(self, x, *args):
+        return self._crop(x, *args)
+
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img (Image):   Image to be cropped.
+            maskmap (Image):  Mask to be cropped.
+
+        Returns:
+            Image:  Cropped image.
+            Image:  Cropped maskmap.
+            list:   Cropped keypoints.
+            list:   Cropped center points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+
+        height, width, _ = img.shape
+        target_size = [min(self.size[0], width), min(self.size[1], height)]
+
+        offset_left, offset_up = self.get_lefttop(target_size, [width, height])
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            offset_up, offset_left, target_size
+        )
+
+
+class Resize(RandomResize):
+    """Resize the given numpy.ndarray to random size and aspect ratio.
+    Args:
+        scale_min: the min scale to resize.
+        scale_max: the max scale to resize.
+    """
+
+    def __init__(self, target_size=None, min_side_length=None, max_side_length=None, max_side_bound=None):
+        self.target_size = target_size
+        self.min_side_length = min_side_length
+        self.max_side_length = max_side_length
+        self.max_side_bound = max_side_bound
+
+    def __call__(self, img, **kwargs):
+        img, data_dict = super(RandomResize, self).__call__(img, **kwargs)
+
+        height, width, _ = img.shape
+        if self.target_size is not None:
+            target_size = self.target_size
+            w_scale_ratio = self.target_size[0] / width
+            h_scale_ratio = self.target_size[1] / height
+
+        elif self.min_side_length is not None:
+            scale_ratio = self.min_side_length / min(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+
+        else:
+            scale_ratio = self.max_side_length / max(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+
+        if self.max_side_bound is not None and max(target_size) > self.max_side_bound:
+            d_ratio = self.max_side_bound / max(target_size)
+            w_scale_ratio = d_ratio * w_scale_ratio
+            h_scale_ratio = d_ratio * h_scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+
+        target_size = tuple(target_size)
+        return self._process(
+            img, data_dict,
+            False,
+            target_size, h_scale_ratio, w_scale_ratio
+        )
+
+
+class CV2AugCompose(object):
+    """Composes several transforms together.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> CV2AugCompose([
+        >>>     RandomCrop(),
+        >>> ])
+    """
+
+    def __init__(self, configer, split='train'):
+        self.configer = configer
+        self.split = split
+
+        if self.split == 'train':
+            shuffle_train_trans = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    train_trans_seq_list = self.configer.get(
+                        'train_trans', 'shuffle_trans_seq')
+                    for train_trans_seq in train_trans_seq_list:
+                        shuffle_train_trans += train_trans_seq
+
+                else:
+                    shuffle_train_trans = self.configer.get(
+                        'train_trans', 'shuffle_trans_seq')
+            trans_seq = self.configer.get(
+                'train_trans', 'trans_seq') + shuffle_train_trans
+            trans_key = 'train_trans'
+        else:
+            trans_seq = self.configer.get('val_trans', 'trans_seq')
+            trans_key = 'val_trans'
+
+        self.transforms = dict()
+        self.trans_config = self.configer.get(trans_key)
+        for trans_name in trans_seq:
+            specs = TRANSFORM_SPEC[trans_name]
+            config = self.configer.get(trans_key, trans_name)
+            for spec in specs:
+                if 'when' not in spec:
+                    break
+                choose_this = True
+                for cond_key, cond_value in spec['when'].items():
+                    choose_this = choose_this and (
+                            config[cond_key] == cond_value)
+                if choose_this:
+                    break
+            else:
+                raise RuntimeError("Not support!")
+
+            kwargs = {}
+            for arg_name, arg_path in spec["args"].items():
+                if isinstance(arg_path, str):
+                    arg_value = config.get(arg_path, None)
+                elif isinstance(arg_path, list):
+                    arg_value = self.configer.get(*arg_path)
+                kwargs[arg_name] = arg_value
+
+            klass = TRANSFORM_MAPPING[trans_name]
+            self.transforms[trans_name] = klass(**kwargs)
+
+    def __call__(self, img, **data_dict):
+
+        orig_key_list = list(data_dict)
+
+        if self.configer.get('data', 'input_mode') == 'RGB':
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+
+        if self.split == 'train':
+            shuffle_trans_seq = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    shuffle_trans_seq_list = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    shuffle_trans_seq = shuffle_trans_seq_list[random.randint(0, len(shuffle_trans_seq_list))]
+                else:
+                    shuffle_trans_seq = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    random.shuffle(shuffle_trans_seq)
+            trans_seq = shuffle_trans_seq + self.configer.get('train_trans', 'trans_seq')
+        else:
+            trans_seq = self.configer.get('val_trans', 'trans_seq')
+
+        for trans_key in trans_seq:
+            img, data_dict = self.transforms[trans_key](img, **data_dict)
+
+        if self.configer.get('data', 'input_mode') == 'RGB':
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        return (img, *[data_dict[key] for key in orig_key_list])
+
+    def __repr__(self):
+        import pprint
+        return 'CV2AugCompose({})'.format(pprint.pformat(self.trans_config))
+
+
+TRANSFORM_MAPPING = {
+    "random_saturation": RandomSaturation,
+    "random_hue": RandomHue,
+    "random_perm": RandomPerm,
+    "random_contrast": RandomContrast,
+    "padding": Padding,
+    "random_brightness": RandomBrightness,
+    "random_hflip": RandomHFlip,
+    "random_resize": RandomResize,
+    "random_crop": RandomCrop,
+    "random_rotate": RandomRotate,
+    "resize": Resize,
+}
+
+TRANSFORM_SPEC = {
+    "random_style": [{
+        "args": {
+            "style_ratio": "ratio"
+        }
+    }],
+    "random_saturation": [{
+        "args": {
+            "lower": "lower",
+            "upper": "upper",
+            "saturation_ratio": "ratio"
+        }
+    }],
+    "random_hue": [{
+        "args": {
+            "delta": "delta",
+            "hue_ratio": "ratio"
+        }
+    }],
+    "ramdom_perm": [{
+        "args": {
+            "perm_ratio": "ratio"
+        }
+    }],
+    "random_contrast": [{
+        "args": {
+            "lower": "lower",
+            "upper": "upper",
+            "contrast_ratio": "ratio"
+        }
+    }],
+    "padding": [{
+        "args": {
+            "pad": "pad",
+            "pad_ratio": "ratio",
+            "mean": ["normalize", "mean_value"],
+            "allow_outside_center": "allow_outside_center"
+        }
+    }],
+    "random_brightness": [{
+        "args": {
+            "shift_value": "shift_value",
+            "brightness_ratio": "ratio"
+        }
+    }],
+    "random_hflip": [{
+        "args": {
+            "swap_pair": "swap_pair",
+            "flip_ratio": "ratio"
+        }
+    }],
+    "random_resize": [
+        {
+            "args": {
+                "method": "method",
+                "scale_range": "scale_range",
+                "aspect_range": "aspect_range",
+                "max_side_bound": "max_side_bound",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "random"
+            }
+        },
+        {
+            "args": {
+                "method": "method",
+                "scale_range": "scale_range",
+                "aspect_range": "aspect_range",
+                "target_size": "target_size",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "focus"
+            }
+        },
+        {
+            "args": {
+                "method": "method",
+                "aspect_range": "aspect_range",
+                "resize_bound": "resize_bound",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "bound"
+            }
+        },
+    ],
+    "random_crop": [
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "random"
+            }
+        },
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "center"
+            }
+        },
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "grid": "grid",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "grid"
+            }
+        },
+    ],
+    "random_rotate": [{
+        "args": {
+            "max_degree": "rotate_degree",
+            "rotate_ratio": "ratio",
+            "mean": ["normalize", "mean_value"]
+        }
+    }],
+    "resize": [{
+        "args": {
+            "target_size": "target_size",
+            "min_side_length": "min_side_length",
+            "max_side_bound": "max_side_bound",
+            "max_side_length": "max_side_length"
+        }
+    }],
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/pil_aug_transforms.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/pil_aug_transforms.py
new file mode 100644
index 0000000..78c5256
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/pil_aug_transforms.py
@@ -0,0 +1,954 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import math
+import random
+
+import cv2
+import matplotlib
+import numpy as np
+from PIL import Image, ImageFilter, ImageOps
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class Padding(object):
+    """ Padding the Image to proper size.
+            Args:
+                stride: the stride of the network.
+                pad_value: the value that pad to the image border.
+                img: Image object as input.
+            Returns::
+                img: Image object.
+    """
+    def __init__(self, pad=None, pad_ratio=0.5, mean=(104, 117, 123), allow_outside_center=True):
+        self.pad = pad
+        self.ratio = pad_ratio
+        self.mean = tuple(mean)
+        self.allow_outside_center = allow_outside_center
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        width, height = img.size
+        left_pad, up_pad, right_pad, down_pad = self.pad
+        target_size = [width + left_pad + right_pad, height + up_pad + down_pad]
+        offset_left = -left_pad
+        offset_up = -up_pad
+
+        img = ImageOps.expand(img, border=tuple(self.pad), fill=tuple(self.mean))
+        if maskmap is not None:
+            maskmap = ImageOps.expand(maskmap, border=tuple(self.pad), fill=1)
+
+        if labelmap is not None:
+            labelmap = ImageOps.expand(labelmap, border=tuple(self.pad), fill=255)
+
+        return img, labelmap, maskmap
+
+
+class RandomHFlip(object):
+    def __init__(self, swap_pair=None, flip_ratio=0.5):
+        self.swap_pair = swap_pair
+        self.ratio = flip_ratio
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        img = img.transpose(Image.FLIP_LEFT_RIGHT)
+        if labelmap is not None:
+            labelmap = labelmap.transpose(Image.FLIP_LEFT_RIGHT)
+
+        if maskmap is not None:
+            maskmap = maskmap.transpose(Image.FLIP_LEFT_RIGHT)
+
+        return img, labelmap, maskmap
+
+
+class RandomSaturation(object):
+    def __init__(self, lower=0.5, upper=1.5, saturation_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = saturation_ratio
+        assert self.upper >= self.lower, "saturation upper must be >= lower."
+        assert self.lower >= 0, "saturation lower must be non-negative."
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        img = np.array(img).astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+
+        img[:, :, 1] *= random.uniform(self.lower, self.upper)
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+        img = np.clip(img, 0, 255)
+        return Image.fromarray(img.astype(np.uint8)), labelmap, maskmap
+
+
+class RandomHue(object):
+    def __init__(self, delta=18, hue_ratio=0.5):
+        assert 0 <= delta <= 360
+        self.delta = delta
+        self.ratio = hue_ratio
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        img = np.array(img).astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+
+        img[:, :, 0] += random.uniform(-self.delta, self.delta)
+        img[:, :, 0][img[:, :, 0] > 360] -= 360
+        img[:, :, 0][img[:, :, 0] < 0] += 360
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
+        img = np.clip(img, 0, 255)
+        return Image.fromarray(img.astype(np.uint8)), labelmap, maskmap
+
+
+class RandomPerm(object):
+    def __init__(self, perm_ratio=0.5):
+        self.ratio = perm_ratio
+        self.perms = ((0, 1, 2), (0, 2, 1),
+                      (1, 0, 2), (1, 2, 0),
+                      (2, 0, 1), (2, 1, 0))
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        swap = self.perms[random.randint(0, len(self.perms)-1)]
+        img = np.array(img)
+        img = img[:, :, swap]
+        return Image.fromarray(img.astype(np.uint8)), labelmap, maskmap
+
+
+class RandomContrast(object):
+    def __init__(self, lower=0.5, upper=1.5, contrast_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = contrast_ratio
+        assert self.upper >= self.lower, "contrast upper must be >= lower."
+        assert self.lower >= 0, "contrast lower must be non-negative."
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        img = np.array(img).astype(np.float32)
+        img *= random.uniform(self.lower, self.upper)
+        img = np.clip(img, 0, 255)
+
+        return Image.fromarray(img.astype(np.uint8)), labelmap, maskmap
+
+
+class RandomBrightness(object):
+    def __init__(self, shift_value=30, brightness_ratio=0.5):
+        self.shift_value = shift_value
+        self.ratio = brightness_ratio
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        shift = np.random.uniform(-self.shift_value, self.shift_value, size=1)
+        image = np.array(img).astype(np.float32)
+        image[:, :, :] += shift
+        image = np.around(image)
+        image = np.clip(image, 0, 255)
+        image = image.astype(np.uint8)
+        image = Image.fromarray(image)
+
+        return image, labelmap, maskmap
+
+
+class RandomGaussBlur(object):
+    def __init__(self, max_blur=4, blur_ratio=0.5):
+        self.max_blur = max_blur
+        self.ratio = blur_ratio
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        blur_value = np.random.uniform(0, self.max_blur)
+        img = img.filter(ImageFilter.GaussianBlur(radius=blur_value))
+        return img, labelmap, maskmap
+
+
+class RandomHSV(object):
+    """
+        Args:
+            h_range (float tuple): random ratio of the hue channel,
+                new_h range from h_range[0]*old_h to h_range[1]*old_h.
+            s_range (float tuple): random ratio of the saturation channel,
+                new_s range from s_range[0]*old_s to s_range[1]*old_s.
+            v_range (int tuple): random bias of the value channel,
+                new_v range from old_v-v_range to old_v+v_range.
+        Notice:
+            h range: 0-1
+            s range: 0-1
+            v range: 0-255
+    """
+
+    def __init__(self, h_range, s_range, v_range, hsv_ratio=0.5):
+        assert isinstance(h_range, (list, tuple)) and \
+               isinstance(s_range, (list, tuple)) and \
+               isinstance(v_range, (list, tuple))
+        self.h_range = h_range
+        self.s_range = s_range
+        self.v_range = v_range
+        self.ratio = hsv_ratio
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        img = np.array(img)
+        img_hsv = matplotlib.colors.rgb_to_hsv(img)
+        img_h, img_s, img_v = img_hsv[:, :, 0], img_hsv[:, :, 1], img_hsv[:, :, 2]
+        h_random = np.random.uniform(min(self.h_range), max(self.h_range))
+        s_random = np.random.uniform(min(self.s_range), max(self.s_range))
+        v_random = np.random.uniform(min(self.v_range), max(self.v_range))
+        img_h = np.clip(img_h * h_random, 0, 1)
+        img_s = np.clip(img_s * s_random, 0, 1)
+        img_v = np.clip(img_v * v_random, 0, 255)
+        img_hsv = np.stack([img_h, img_s, img_v], axis=2)
+        img_new = matplotlib.colors.hsv_to_rgb(img_hsv)
+
+        return Image.fromarray(img_new.astype(np.uint8)), labelmap, maskmap
+
+
+class RandomResize(object):
+    """Resize the given numpy.ndarray to random size and aspect ratio.
+
+    Args:
+        scale_min: the min scale to resize.
+        scale_max: the max scale to resize.
+    """
+
+    def __init__(self, scale_range=(0.75, 1.25), aspect_range=(0.9, 1.1), target_size=None,
+                 resize_bound=None, method='random', max_side_bound=None, scale_list=None, resize_ratio=0.5):
+        self.scale_range = scale_range
+        self.aspect_range = aspect_range
+        self.resize_bound = resize_bound
+        self.max_side_bound = max_side_bound
+        self.scale_list = scale_list
+        self.method = method
+        self.ratio = resize_ratio
+
+        if target_size is not None:
+            if isinstance(target_size, int):
+                self.input_size = (target_size, target_size)
+            elif isinstance(target_size, (list, tuple)) and len(target_size) == 2:
+                self.input_size = target_size
+            else:
+                raise TypeError('Got inappropriate size arg: {}'.format(target_size))
+        else:
+            self.input_size = None
+
+    def get_scale(self, img_size):
+        if self.method == 'random':
+            scale_ratio = random.uniform(self.scale_range[0], self.scale_range[1])
+            return scale_ratio
+
+        elif self.method == 'bound':
+            scale1 = self.resize_bound[0] / min(img_size)
+            scale2 = self.resize_bound[1] / max(img_size)
+            scale = min(scale1, scale2)
+            return scale
+
+        else:
+            Log.error('Resize method {} is invalid.'.format(self.method))
+            exit(1)
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        """
+        Args:
+            img     (Image):   Image to be resized.
+            maskmap    (Image):   Mask to be resized.
+            kpt     (list):    keypoints to be resized.
+            center: (list):    center points to be resized.
+
+        Returns:
+            Image:  Randomly resize image.
+            Image:  Randomly resize maskmap.
+            list:   Randomly resize keypoints.
+            list:   Randomly resize center points.
+        """
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        width, height = img.size
+        if random.random() < self.ratio:
+            if self.scale_list is None:
+                scale_ratio = self.get_scale([width, height])
+            else:
+                scale_ratio = self.scale_list[random.randint(0, len(self.scale_list)-1)]
+
+            aspect_ratio = random.uniform(*self.aspect_range)
+            w_scale_ratio = math.sqrt(aspect_ratio) * scale_ratio
+            h_scale_ratio = math.sqrt(1.0 / aspect_ratio) * scale_ratio
+            if self.max_side_bound is not None and max(height*h_scale_ratio, width*w_scale_ratio) > self.max_side_bound:
+                d_ratio = self.max_side_bound / max(height * h_scale_ratio, width * w_scale_ratio)
+                w_scale_ratio *= d_ratio
+                h_scale_ratio *= d_ratio
+        else:
+            w_scale_ratio, h_scale_ratio = 1.0, 1.0
+
+        converted_size = (int(width * w_scale_ratio), int(height * h_scale_ratio))
+
+        img = img.resize(converted_size, Image.BILINEAR)
+        if labelmap is not None:
+            labelmap = labelmap.resize(converted_size, Image.NEAREST)
+        if maskmap is not None:
+            maskmap = maskmap.resize(converted_size, Image.NEAREST)
+
+        return img, labelmap, maskmap
+
+
+class RandomRotate(object):
+    """Rotate the input numpy.ndarray and points to the given degree.
+
+    Args:
+        degree (number): Desired rotate degree.
+    """
+
+    def __init__(self, max_degree, rotate_ratio=0.5, mean=(104, 117, 123)):
+        assert isinstance(max_degree, int)
+        self.max_degree = max_degree
+        self.ratio = rotate_ratio
+        self.mean = tuple(mean)
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        """
+        Args:
+            img    (Image):     Image to be rotated.
+            maskmap   (Image):     Mask to be rotated.
+            kpt    (np.array):      Keypoints to be rotated.
+            center (list):      Center points to be rotated.
+
+        Returns:
+            Image:     Rotated image.
+            list:      Rotated key points.
+        """
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() < self.ratio:
+            rotate_degree = random.uniform(-self.max_degree, self.max_degree)
+        else:
+            return img, labelmap, maskmap
+
+        img = np.array(img)
+        height, width, _ = img.shape
+
+        img_center = (width / 2.0, height / 2.0)
+
+        rotate_mat = cv2.getRotationMatrix2D(img_center, rotate_degree, 1.0)
+        cos_val = np.abs(rotate_mat[0, 0])
+        sin_val = np.abs(rotate_mat[0, 1])
+        new_width = int(height * sin_val + width * cos_val)
+        new_height = int(height * cos_val + width * sin_val)
+        rotate_mat[0, 2] += (new_width / 2.) - img_center[0]
+        rotate_mat[1, 2] += (new_height / 2.) - img_center[1]
+        img = cv2.warpAffine(img, rotate_mat, (new_width, new_height), borderValue=self.mean)
+        img = Image.fromarray(img.astype(np.uint8))
+        if labelmap is not None:
+            labelmap = np.array(labelmap)
+            labelmap = cv2.warpAffine(labelmap, rotate_mat, (new_width, new_height),
+                                      borderValue=(255, 255, 255), flags=cv2.INTER_NEAREST)
+            labelmap = Image.fromarray(labelmap.astype(np.uint8))
+
+        if maskmap is not None:
+            maskmap = np.array(maskmap)
+            maskmap = cv2.warpAffine(maskmap, rotate_mat, (new_width, new_height),
+                                     borderValue=(1, 1, 1), flags=cv2.INTER_NEAREST)
+            maskmap = Image.fromarray(maskmap.astype(np.uint8))
+
+        return img, labelmap, maskmap
+
+
+class RandomCrop(object):
+    """Crop the given numpy.ndarray and  at a random location.
+
+    Args:
+        size (int or tuple): Desired output size of the crop.(w, h)
+    """
+
+    def __init__(self, crop_size, crop_ratio=0.5, method='focus', grid=None, allow_outside_center=True):
+        self.ratio = crop_ratio
+        self.method = method
+        self.grid = grid
+        self.allow_outside_center = allow_outside_center
+
+        if isinstance(crop_size, float):
+            self.size = (crop_size, crop_size)
+        elif isinstance(crop_size, collections.Iterable) and len(crop_size) == 2:
+            self.size = crop_size
+        else:
+            raise TypeError('Got inappropriate size arg: {}'.format(crop_size))
+
+    def get_lefttop(self, crop_size, img_size):
+        if self.method == 'center':
+            return [(img_size[0] - crop_size[0]) // 2, (img_size[1] - crop_size[1]) // 2]
+
+        elif self.method == 'random':
+            x = random.randint(0, img_size[0] - crop_size[0])
+            y = random.randint(0, img_size[1] - crop_size[1])
+            return [x, y]
+
+        elif self.method == 'grid':
+            grid_x = random.randint(0, self.grid[0] - 1)
+            grid_y = random.randint(0, self.grid[1] - 1)
+            x = grid_x * ((img_size[0] - crop_size[0]) // (self.grid[0] - 1))
+            y = grid_y * ((img_size[1] - crop_size[1]) // (self.grid[1] - 1))
+            return [x, y]
+
+        else:
+            Log.error('Crop method {} is invalid.'.format(self.method))
+            exit(1)
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        """
+        Args:
+            img (Image):   Image to be cropped.
+            maskmap (Image):  Mask to be cropped.
+            kpts (np.array):    keypoints to be cropped.
+            bboxes (np.array): bounding boxes.
+
+        Returns:
+            Image:  Cropped image.
+            Image:  Cropped maskmap.
+            np.array:   Cropped keypoints.
+            np.ndarray:   Cropped center points.
+        """
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        if random.random() > self.ratio:
+            return img, labelmap, maskmap
+
+        target_size = (min(self.size[0], img.size[0]), min(self.size[1], img.size[1]))
+
+        offset_left, offset_up = self.get_lefttop(target_size, img.size)
+
+        img = img.crop((offset_left, offset_up, offset_left + target_size[0], offset_up + target_size[1]))
+
+        if maskmap is not None:
+            maskmap = maskmap.crop((offset_left, offset_up, offset_left + target_size[0], offset_up + target_size[1]))
+
+        if labelmap is not None:
+            labelmap = labelmap.crop((offset_left, offset_up, offset_left + target_size[0], offset_up + target_size[1]))
+
+        return img, labelmap, maskmap
+
+
+class Resize(object):
+    def __init__(self, target_size=None, min_side_length=None, max_side_length=None, max_side_bound=None):
+        self.target_size = target_size
+        self.min_side_length = min_side_length
+        self.max_side_length = max_side_length
+        self.max_side_bound = max_side_bound
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+        assert isinstance(img, Image.Image)
+        assert labelmap is None or isinstance(labelmap, Image.Image)
+        assert maskmap is None or isinstance(maskmap, Image.Image)
+
+        width, height = img.size
+        if self.target_size is not None:
+            target_size = self.target_size
+            w_scale_ratio = self.target_size[0] / width
+            h_scale_ratio = self.target_size[1] / height
+
+        elif self.min_side_length is not None:
+            scale_ratio = self.min_side_length / min(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))]
+
+        else:
+            scale_ratio = self.max_side_length / max(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))]
+
+        if self.max_side_bound is not None and  max(target_size) > self.max_side_bound:
+            d_ratio = self.max_side_bound / max(target_size)
+            w_scale_ratio = d_ratio * w_scale_ratio
+            h_scale_ratio = d_ratio * h_scale_ratio
+            target_size = [int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))]
+
+        img = img.resize(target_size, Image.BILINEAR)
+        if labelmap is not None:
+            labelmap = labelmap.resize(target_size, Image.NEAREST)
+
+        if maskmap is not None:
+            maskmap = maskmap.resize(target_size, Image.NEAREST)
+
+        return img, labelmap, maskmap
+
+
+class PILAugCompose(object):
+    """Composes several transforms together.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> PILAugCompose([
+        >>>     RandomCrop(),
+        >>> ])
+    """
+
+    def __init__(self, configer, split='train'):
+        self.configer = configer
+        self.split = split
+
+        self.transforms = dict()
+        if self.split == 'train':
+            shuffle_train_trans = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    train_trans_seq_list = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    for train_trans_seq in train_trans_seq_list:
+                        shuffle_train_trans += train_trans_seq
+
+                else:
+                    shuffle_train_trans = self.configer.get('train_trans', 'shuffle_trans_seq')
+
+            if 'random_saturation' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_saturation'] = RandomSaturation(
+                    lower=self.configer.get('train_trans', 'random_saturation')['lower'],
+                    upper=self.configer.get('train_trans', 'random_saturation')['upper'],
+                    saturation_ratio=self.configer.get('train_trans', 'random_saturation')['ratio']
+                )
+
+            if 'random_hue' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_hue'] = RandomHue(
+                    delta=self.configer.get('train_trans', 'random_hue')['delta'],
+                    hue_ratio=self.configer.get('train_trans', 'random_hue')['ratio']
+                )
+
+            if 'random_perm' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_perm'] = RandomPerm(
+                    perm_ratio=self.configer.get('train_trans', 'random_perm')['ratio']
+                )
+
+            if 'random_contrast' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_contrast'] = RandomContrast(
+                    lower=self.configer.get('train_trans', 'random_contrast')['lower'],
+                    upper=self.configer.get('train_trans', 'random_contrast')['upper'],
+                    contrast_ratio=self.configer.get('train_trans', 'random_contrast')['ratio']
+                )
+
+            if 'padding' in self.configer.get('train_trans', 'trans_seq'):
+                self.transforms['padding'] = Padding(
+                    pad=self.configer.get('train_trans', 'padding')['pad'],
+                    pad_ratio=self.configer.get('train_trans', 'padding')['ratio'],
+                    mean=self.configer.get('normalize', 'mean_value'),
+                    allow_outside_center=self.configer.get('train_trans', 'padding')['allow_outside_center']
+                )
+
+            if 'random_brightness' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_brightness'] = RandomBrightness(
+                    shift_value=self.configer.get('train_trans', 'random_brightness')['shift_value'],
+                    brightness_ratio=self.configer.get('train_trans', 'random_brightness')['ratio']
+                )
+
+            if 'random_hsv' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_hsv'] = RandomHSV(
+                    h_range=self.configer.get('train_trans', 'random_hsv')['h_range'],
+                    s_range=self.configer.get('train_trans', 'random_hsv')['s_range'],
+                    v_range=self.configer.get('train_trans', 'random_hsv')['v_range'],
+                    hsv_ratio=self.configer.get('train_trans', 'random_hsv')['ratio']
+                )
+
+            if 'random_gauss_blur' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_gauss_blur'] = RandomGaussBlur(
+                    max_blur=self.configer.get('train_trans', 'random_gauss_blur')['max_blur'],
+                    blur_ratio=self.configer.get('train_trans', 'random_gauss_blur')['ratio']
+                )
+
+            if 'random_hflip'  in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_hflip'] = RandomHFlip(
+                    swap_pair=self.configer.get('train_trans', 'random_hflip')['swap_pair'],
+                    flip_ratio=self.configer.get('train_trans', 'random_hflip')['ratio']
+                )
+
+            if 'random_resize' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                if self.configer.get('train_trans', 'random_resize')['method'] == 'random':
+                    if 'scale_list' not in self.configer.get('train_trans', 'random_resize'):
+                        if 'max_side_bound' in self.configer.get('train_trans', 'random_resize'):
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('train_trans', 'random_resize')['method'],
+                                scale_range=self.configer.get('train_trans', 'random_resize')['scale_range'],
+                                aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                                max_side_bound=self.configer.get('train_trans', 'random_resize')['max_side_bound'],
+                                resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                            )
+                        else:
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('train_trans', 'random_resize')['method'],
+                                scale_range=self.configer.get('train_trans', 'random_resize')['scale_range'],
+                                aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                                resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                            )
+                    else:
+                        if 'max_side_bound' in self.configer.get('train_trans', 'random_resize'):
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('train_trans', 'random_resize')['method'],
+                                scale_list=self.configer.get('train_trans', 'random_resize')['scale_list'],
+                                aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                                max_side_bound=self.configer.get('train_trans', 'random_resize')['max_side_bound'],
+                                resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                            )
+                        else:
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('train_trans', 'random_resize')['method'],
+                                scale_list=self.configer.get('train_trans', 'random_resize')['scale_list'],
+                                aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                                resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                            )
+
+                elif self.configer.get('train_trans', 'random_resize')['method'] == 'focus':
+                    self.transforms['random_resize'] = RandomResize(
+                        method=self.configer.get('train_trans', 'random_resize')['method'],
+                        scale_range=self.configer.get('train_trans', 'random_resize')['scale_range'],
+                        aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                        target_size=self.configer.get('train_trans', 'random_resize')['target_size'],
+                        resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                    )
+
+                elif self.configer.get('train_trans', 'random_resize')['method'] == 'bound':
+                    self.transforms['random_resize'] = RandomResize(
+                        method=self.configer.get('train_trans', 'random_resize')['method'],
+                        aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                        resize_bound=self.configer.get('train_trans', 'random_resize')['resize_bound'],
+                        resize_ratio=self.configer.get('train_trans', 'random_resize')['ratio']
+                    )
+
+                else:
+                    Log.error('Not Support Resize Method!')
+                    exit(1)
+
+            if 'random_crop' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                if self.configer.get('train_trans', 'random_crop')['method'] == 'random':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('train_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('train_trans', 'random_crop')['method'],
+                        crop_ratio=self.configer.get('train_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('train_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                elif self.configer.get('train_trans', 'random_crop')['method'] == 'center':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('train_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('train_trans', 'random_crop')['method'],
+                        crop_ratio=self.configer.get('train_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('train_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                elif self.configer.get('train_trans', 'random_crop')['method'] == 'grid':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('train_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('train_trans', 'random_crop')['method'],
+                        grid=self.configer.get('train_trans', 'random_crop')['grid'],
+                        crop_ratio=self.configer.get('train_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('train_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                else:
+                    Log.error('Not Support Crop Method!')
+                    exit(1)
+
+            if 'random_rotate' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                self.transforms['random_rotate'] = RandomRotate(
+                    max_degree=self.configer.get('train_trans', 'random_rotate')['rotate_degree'],
+                    rotate_ratio=self.configer.get('train_trans', 'random_rotate')['ratio'],
+                    mean=self.configer.get('normalize', 'mean_value')
+                )
+
+            if 'resize' in self.configer.get('train_trans', 'trans_seq') + shuffle_train_trans:
+                if 'target_size' in self.configer.get('train_trans', 'resize'):
+                    self.transforms['resize'] = Resize(
+                        target_size=self.configer.get('train_trans', 'resize')['target_size']
+                    )
+                if 'min_side_length' in self.configer.get('train_trans', 'resize'):
+                    if 'max_side_bound' in self.configer.get('train_trans', 'resize'):
+                        self.transforms['resize'] = Resize(
+                            min_side_length=self.configer.get('train_trans', 'resize')['min_side_length'],
+                            max_side_bound=self.configer.get('train_trans', 'resize')['max_side_bound'],
+                        )
+                    else:
+                        self.transforms['resize'] = Resize(
+                            min_side_length=self.configer.get('train_trans', 'resize')['min_side_length']
+                        )
+                if 'max_side_length' in self.configer.get('train_trans', 'resize'):
+                    self.transforms['resize'] = Resize(
+                        max_side_length=self.configer.get('train_trans', 'resize')['max_side_length']
+                    )
+
+        else:
+            if 'random_saturation' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_saturation'] = RandomSaturation(
+                    lower=self.configer.get('val_trans', 'random_saturation')['lower'],
+                    upper=self.configer.get('val_trans', 'random_saturation')['upper'],
+                    saturation_ratio=self.configer.get('val_trans', 'random_saturation')['ratio']
+                )
+
+            if 'random_hue' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_hue'] = RandomHue(
+                    delta=self.configer.get('val_trans', 'random_hue')['delta'],
+                    hue_ratio=self.configer.get('val_trans', 'random_hue')['ratio']
+                )
+
+            if 'random_perm' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_perm'] = RandomPerm(
+                    perm_ratio=self.configer.get('val_trans', 'random_perm')['ratio']
+                )
+
+            if 'random_contrast' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_contrast'] = RandomContrast(
+                    lower=self.configer.get('val_trans', 'random_contrast')['lower'],
+                    upper=self.configer.get('val_trans', 'random_contrast')['upper'],
+                    contrast_ratio=self.configer.get('val_trans', 'random_contrast')['ratio']
+                )
+
+            if 'padding' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['padding'] = Padding(
+                    pad=self.configer.get('val_trans', 'padding')['pad'],
+                    pad_ratio=self.configer.get('val_trans', 'padding')['ratio'],
+                    mean=self.configer.get('normalize', 'mean_value'),
+                    allow_outside_center=self.configer.get('val_trans', 'padding')['allow_outside_center']
+                )
+
+            if 'random_brightness' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_brightness'] = RandomBrightness(
+                    shift_value=self.configer.get('val_trans', 'random_brightness')['shift_value'],
+                    brightness_ratio=self.configer.get('val_trans', 'random_brightness')['ratio']
+                )
+
+            if 'random_hsv' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_hsv'] = RandomHSV(
+                    h_range=self.configer.get('val_trans', 'random_hsv')['h_range'],
+                    s_range=self.configer.get('val_trans', 'random_hsv')['s_range'],
+                    v_range=self.configer.get('val_trans', 'random_hsv')['v_range'],
+                    hsv_ratio=self.configer.get('val_trans', 'random_hsv')['ratio']
+                )
+
+            if 'random_gauss_blur' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_gauss_blur'] = RandomGaussBlur(
+                    max_blur=self.configer.get('val_trans', 'random_gauss_blur')['max_blur'],
+                    blur_ratio=self.configer.get('val_trans', 'random_gauss_blur')['ratio']
+                )
+
+            if 'random_hflip' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_hflip'] = RandomHFlip(
+                    swap_pair=self.configer.get('val_trans', 'random_hflip')['swap_pair'],
+                    flip_ratio=self.configer.get('val_trans', 'random_hflip')['ratio']
+                )
+
+            if 'random_resize' in self.configer.get('val_trans', 'trans_seq'):
+                if self.configer.get('train_trans', 'random_resize')['method'] == 'random':
+                    if 'scale_list' not in self.configer.get('val_trans', 'random_resize'):
+                        if 'max_side_bound' in self.configer.get('val_trans', 'random_resize'):
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('val_trans', 'random_resize')['method'],
+                                scale_range=self.configer.get('val_trans', 'random_resize')['scale_range'],
+                                aspect_range=self.configer.get('val_trans', 'random_resize')['aspect_range'],
+                                max_side_bound=self.configer.get('val_trans', 'random_resize')['max_side_bound'],
+                                resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                            )
+                        else:
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('val_trans', 'random_resize')['method'],
+                                scale_range=self.configer.get('val_trans', 'random_resize')['scale_range'],
+                                aspect_range=self.configer.get('val_trans', 'random_resize')['aspect_range'],
+                                resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                            )
+                    else:
+                        if 'max_side_bound' in self.configer.get('val_trans', 'random_resize'):
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('val_trans', 'random_resize')['method'],
+                                scale_list=self.configer.get('val_trans', 'random_resize')['scale_list'],
+                                aspect_range=self.configer.get('val_trans', 'random_resize')['aspect_range'],
+                                max_side_bound=self.configer.get('val_trans', 'random_resize')['max_side_bound'],
+                                resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                            )
+                        else:
+                            self.transforms['random_resize'] = RandomResize(
+                                method=self.configer.get('val_trans', 'random_resize')['method'],
+                                scale_list=self.configer.get('val_trans', 'random_resize')['scale_list'],
+                                aspect_range=self.configer.get('val_trans', 'random_resize')['aspect_range'],
+                                resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                            )
+
+                elif self.configer.get('val_trans', 'random_resize')['method'] == 'focus':
+                    self.transforms['random_resize'] = RandomResize(
+                        method=self.configer.get('val_trans', 'random_resize')['method'],
+                        scale_range=self.configer.get('val_trans', 'random_resize')['scale_range'],
+                        aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                        target_size=self.configer.get('val_trans', 'random_resize')['target_size'],
+                        resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                    )
+
+                elif self.configer.get('val_trans', 'random_resize')['method'] == 'bound':
+                    self.transforms['random_resize'] = RandomResize(
+                        method=self.configer.get('val_trans', 'random_resize')['method'],
+                        aspect_range=self.configer.get('train_trans', 'random_resize')['aspect_range'],
+                        resize_bound=self.configer.get('val_trans', 'random_resize')['resize_bound'],
+                        resize_ratio=self.configer.get('val_trans', 'random_resize')['ratio']
+                    )
+
+                else:
+                    Log.error('Not Support Resize Method!')
+                    exit(1)
+
+            if 'random_crop' in self.configer.get('val_trans', 'trans_seq'):
+                if self.configer.get('val_trans', 'random_crop')['method'] == 'random':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('val_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('val_trans', 'random_crop')['method'],
+                        crop_ratio=self.configer.get('val_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('val_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                elif self.configer.get('val_trans', 'random_crop')['method'] == 'center':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('val_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('val_trans', 'random_crop')['method'],
+                        crop_ratio=self.configer.get('val_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('val_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                elif self.configer.get('val_trans', 'random_crop')['method'] == 'grid':
+                    self.transforms['random_crop'] = RandomCrop(
+                        crop_size=self.configer.get('val_trans', 'random_crop')['crop_size'],
+                        method=self.configer.get('val_trans', 'random_crop')['method'],
+                        grid=self.configer.get('val_trans', 'random_crop')['grid'],
+                        crop_ratio=self.configer.get('val_trans', 'random_crop')['ratio'],
+                        allow_outside_center=self.configer.get('val_trans', 'random_crop')['allow_outside_center']
+                    )
+
+                else:
+                    Log.error('Not Support Crop Method!')
+                    exit(1)
+
+            if 'random_rotate' in self.configer.get('val_trans', 'trans_seq'):
+                self.transforms['random_rotate'] = RandomRotate(
+                    max_degree=self.configer.get('val_trans', 'random_rotate')['rotate_degree'],
+                    rotate_ratio=self.configer.get('val_trans', 'random_rotate')['ratio'],
+                    mean=self.configer.get('normalize', 'mean_value')
+                )
+
+            if 'resize' in self.configer.get('val_trans', 'trans_seq'):
+                if 'target_size' in self.configer.get('val_trans', 'resize'):
+                    self.transforms['resize'] = Resize(
+                        target_size=self.configer.get('val_trans', 'resize')['target_size']
+                    )
+                if 'min_side_length' in self.configer.get('val_trans', 'resize'):
+                    if 'max_side_bound' in self.configer.get('val_trans', 'resize'):
+                        self.transforms['resize'] = Resize(
+                            min_side_length=self.configer.get('val_trans', 'resize')['min_side_length'],
+                            max_side_bound=self.configer.get('val_trans', 'resize')['max_side_bound'],
+                        )
+                    else:
+                        self.transforms['resize'] = Resize(
+                            min_side_length=self.configer.get('val_trans', 'resize')['min_side_length']
+                        )
+                if 'max_side_length' in self.configer.get('val_trans', 'resize'):
+                    self.transforms['resize'] = Resize(
+                        max_side_length=self.configer.get('val_trans', 'resize')['max_side_length']
+                    )
+
+    def __check_none(self, key_list, value_list):
+        for key, value in zip(key_list, value_list):
+            if value == 'y' and key is None:
+                return False
+
+            if value == 'n' and key is not None:
+                return False
+
+        return True
+
+    def __call__(self, img, labelmap=None, maskmap=None):
+
+        if self.split == 'train':
+            shuffle_trans_seq = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    shuffle_trans_seq_list = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    shuffle_trans_seq = shuffle_trans_seq_list[random.randint(0, len(shuffle_trans_seq_list))]
+                else:
+                    shuffle_trans_seq = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    random.shuffle(shuffle_trans_seq)
+
+            for trans_key in (shuffle_trans_seq + self.configer.get('train_trans', 'trans_seq')):
+                img, labelmap, maskmap = self.transforms[trans_key](img, labelmap, maskmap)
+
+        else:
+            for trans_key in self.configer.get('val_trans', 'trans_seq'):
+                img, labelmap, maskmap = self.transforms[trans_key](img, labelmap, maskmap)
+
+        if self.__check_none([labelmap, maskmap], ['n', 'n']):
+            return img
+
+        if self.__check_none([labelmap, maskmap], ['y', 'n']):
+            return img, labelmap
+
+        if self.__check_none([labelmap, maskmap], ['n', 'y']):
+            return img, maskmap
+
+        if self.__check_none([labelmap, maskmap], ['y', 'y']):
+            return img, labelmap, maskmap
+
+        Log.error('Params is not valid.')
+        exit(1)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/transforms.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/transforms.py
new file mode 100644
index 0000000..5b4dbf4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/datasets/tools/transforms.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import torch
+from PIL import Image
+
+
+class Normalize(object):
+    """Normalize a ``torch.tensor``
+
+    Args:
+        inputs (torch.tensor): tensor to be normalized.
+        mean: (list): the mean of RGB
+        std: (list): the std of RGB
+
+    Returns:
+        Tensor: Normalized tensor.
+    """
+    def __init__(self, div_value, mean, std):
+        self.div_value = div_value
+        self.mean = mean
+        self.std =std
+
+    def __call__(self, inputs):
+        inputs = inputs.div(self.div_value)
+        for t, m, s in zip(inputs, self.mean, self.std):
+            t.sub_(m).div_(s)
+
+        return inputs
+
+
+class DeNormalize(object):
+    """DeNormalize a ``torch.tensor``
+
+    Args:
+        inputs (torch.tensor): tensor to be normalized.
+        mean: (list): the mean of RGB
+        std: (list): the std of RGB
+
+    Returns:
+        Tensor: Normalized tensor.
+    """
+    def __init__(self, div_value, mean, std):
+        self.div_value = div_value
+        self.mean = mean
+        self.std =std
+
+    def __call__(self, inputs):
+        result = inputs.clone()
+        for i in range(result.size(0)):
+            result[i, :, :] = result[i, :, :] * self.std[i] + self.mean[i]
+
+        return result.mul_(self.div_value)
+
+
+class ToTensor(object):
+    """Convert a ``numpy.ndarray or Image`` to tensor.
+
+    See ``ToTensor`` for more details.
+
+    Args:
+        inputs (numpy.ndarray or Image): Image to be converted to tensor.
+
+    Returns:
+        Tensor: Converted image.
+    """
+    def __call__(self, inputs):
+        if isinstance(inputs, Image.Image):
+            channels = len(inputs.mode)
+            inputs = np.array(inputs)
+            inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], channels)
+            inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
+        else:
+            inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
+
+        return inputs.float()
+
+
+class ToLabel(object):
+    def __call__(self, inputs):
+        return torch.from_numpy(np.array(inputs)).long()
+
+
+class ReLabel(object):
+    """
+      255 indicate the background, relabel 255 to some value.
+    """
+    def __init__(self, olabel, nlabel):
+        self.olabel = olabel
+        self.nlabel = nlabel
+
+    def __call__(self, inputs):
+        assert isinstance(inputs, torch.LongTensor), 'tensor needs to be LongTensor'
+
+        inputs[inputs == self.olabel] = self.nlabel
+        return inputs
+
+
+class Compose(object):
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, inputs):
+        for t in self.transforms:
+            inputs = t(inputs)
+
+        return inputs
+
+
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/__init__.py
new file mode 100644
index 0000000..dc570f5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/__init__.py
@@ -0,0 +1 @@
+from .functions import PAM_Module, CrissCrossAttention, CrossAttention, ca_weight, ca_map
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/_ext/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/_ext/__init__.py
new file mode 100644
index 0000000..ec10cfb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/_ext/__init__.py
@@ -0,0 +1,15 @@
+
+from torch.utils.ffi import _wrap_function
+from .__ext import lib as _lib, ffi as _ffi
+
+__all__ = []
+def _import_symbols(locals):
+    for symbol in dir(_lib):
+        fn = getattr(_lib, symbol)
+        if callable(fn):
+            locals[symbol] = _wrap_function(fn, _ffi)
+        else:
+            locals[symbol] = fn
+        __all__.append(symbol)
+
+_import_symbols(locals())
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.py
new file mode 100644
index 0000000..d1aff23
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.py
@@ -0,0 +1,24 @@
+import os
+
+from torch.utils.ffi import create_extension
+
+sources = ['src/lib_cffi.cpp']
+headers = ['src/lib_cffi.h']
+extra_objects = ['src/ca.o']
+with_cuda = True
+
+this_file = os.path.dirname(os.path.realpath(__file__))
+extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
+
+ffi = create_extension(
+    '_ext',
+    headers=headers,
+    sources=sources,
+    relative_to=__file__,
+    with_cuda=with_cuda,
+    extra_objects=extra_objects,
+    extra_compile_args=["-std=c++11"]
+)
+
+if __name__ == '__main__':
+    ffi.build()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.sh
new file mode 100644
index 0000000..57ae088
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Configuration
+CUDA_GENCODE="\
+-gencode=arch=compute_60,code=sm_60 \
+-gencode=arch=compute_61,code=sm_61 \
+-gencode=arch=compute_52,code=sm_52 \
+-gencode=arch=compute_50,code=sm_50"
+
+
+cd src
+/usr/local/cuda-8.0/bin/nvcc -I/usr/local/cuda/include --expt-extended-lambda -O3 -c -o ca.o ca.cu -x cu -Xcompiler -fPIC -std=c++11 ${CUDA_GENCODE}
+cd ..
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/functions.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/functions.py
new file mode 100644
index 0000000..85b69e4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/functions.py
@@ -0,0 +1,167 @@
+import torch
+import torch.autograd as autograd
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd.function import once_differentiable
+
+from lib.extensions.cc_attention import _ext
+# from . import _ext
+
+
+# from libs import InPlaceABN, InPlaceABNSync
+# BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
+
+
+def _check_contiguous(*args):
+    if not all([mod is None or mod.is_contiguous() for mod in args]):
+        raise ValueError("Non-contiguous input")
+
+
+class CA_Weight(autograd.Function):
+    @staticmethod
+    def forward(ctx, t, f):
+        # Save context
+        n, c, h, w = t.size()
+        size = (n, h+w-1, h, w)
+        weight = torch.zeros(size, dtype=t.dtype, layout=t.layout, device=t.device)
+
+        _ext.ca_forward_cuda(t, f, weight)
+        
+        # Output
+        ctx.save_for_backward(t, f)
+
+        return weight
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dw):
+        t, f = ctx.saved_tensors
+
+        dt = torch.zeros_like(t)
+        df = torch.zeros_like(f)
+
+        _ext.ca_backward_cuda(dw.contiguous(), t, f, dt, df)
+
+        _check_contiguous(dt, df)
+
+        return dt, df
+
+class CA_Map(autograd.Function):
+    @staticmethod
+    def forward(ctx, weight, g):
+        # Save context
+        out = torch.zeros_like(g)
+        _ext.ca_map_forward_cuda(weight, g, out)
+        
+        # Output
+        ctx.save_for_backward(weight, g)
+
+        return out
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dout):
+        weight, g = ctx.saved_tensors
+
+        dw = torch.zeros_like(weight)
+        dg = torch.zeros_like(g)
+
+        _ext.ca_map_backward_cuda(dout.contiguous(), weight, g, dw, dg)
+
+        _check_contiguous(dw, dg)
+
+        return dw, dg
+
+ca_weight = CA_Weight.apply
+ca_map = CA_Map.apply
+
+
+class CrossAttention(nn.Module):
+    def __init__(self, dim_in, dim_inner, dim_out):
+        super(CrossAttention, self).__init__()
+
+        self.t_func = nn.Conv2d(in_channels=dim_in, out_channels=dim_inner, 
+                kernel_size=1, stride=1, padding=0)
+        self.f_func = nn.Conv2d(in_channels=dim_in, out_channels=dim_inner, 
+                kernel_size=1, stride=1, padding=0)
+        
+        self.g_func = nn.Conv2d(in_channels=dim_in, out_channels=dim_out,
+                kernel_size=1, stride=1, padding=0)
+
+        self.inc = nn.Conv2d(in_channels=dim_out, out_channels=dim_in,
+                kernel_size=1, stride=1, padding=0)
+
+        nn.init.constant_(self.inc.weight, 0)
+        nn.init.constant_(self.inc.bias, 0)
+
+    def forward(self, x):
+        t = self.t_func(x)
+        f = self.f_func(x)
+        g = self.g_func(x)
+
+        w = ca_weight(t, f)
+        w = F.softmax(w, 1)
+        out = ca_map(w, g)
+        x = x + self.inc(out)
+
+        return x
+
+class CrissCrossAttention(nn.Module):
+    """ Pixel-wise attention module"""
+    def __init__(self,in_dim):
+        super(CrissCrossAttention,self).__init__()
+        self.chanel_in = in_dim
+
+        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
+        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
+        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+
+    def forward(self,x):
+        proj_query = self.query_conv(x)
+        proj_key = self.key_conv(x)
+        proj_value = self.value_conv(x)
+
+        energy = ca_weight(proj_query, proj_key)
+        attention = F.softmax(energy, 1)
+        out = ca_map(attention, proj_value)
+        out = self.gamma*out + x
+
+        return out
+
+class PAM_Module(nn.Module):
+    """ Position attention module"""
+    #Ref from SAGAN
+    def __init__(self, in_dim):
+        super(PAM_Module, self).__init__()
+        self.chanel_in = in_dim
+
+        self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
+        self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
+        self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+
+    def forward(self, x):
+        """
+            inputs :
+                x : input feature maps( B X C X H X W)
+            returns :
+                out : attention value + input feature
+                attention: B X (HxW) X (HxW)
+        """
+        m_batchsize, C, height, width = x.size()
+        proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1)
+        proj_key = self.key_conv(x).view(m_batchsize, -1, width*height)
+        energy = torch.bmm(proj_query, proj_key)
+        attention = F.softmax(energy, 1)
+        proj_value = self.value_conv(x).view(m_batchsize, -1, width*height)
+
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(m_batchsize, C, height, width)
+
+        out = self.gamma*out + x
+        return out
+
+
+
+__all__ = ["PAM_Module", "CrissCrossAttention", "CrossAttention", "ca_weight", "ca_map"]
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.cu
new file mode 100644
index 0000000..4dcbdb4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.cu
@@ -0,0 +1,260 @@
+#include <thrust/device_ptr.h>
+#include <thrust/transform.h>
+#include <thrust/execution_policy.h>
+
+#include "common.h"
+#include "ca.h"
+
+
+__global__ void ca_forward_kernel(const float *t, const float *f, float *weight, int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int z = blockIdx.z;
+
+  if (x < width && y < height && z < height+width-1) {
+    for (int batch = 0; batch < num; ++batch) {
+      for (int plane = 0; plane < chn; ++plane) {
+        float _t = t[(batch * chn + plane) * sp + y*width + x];
+
+        if (z < width) {
+          int i = z;
+          float _f = f[(batch * chn + plane) * sp + y*width + i];
+          weight[(batch * len + i) * sp + y*width + x] += _t*_f;
+        } else {
+          int i = z - width;
+          int j = i<y ? i : i+1;
+
+          float _f = f[(batch * chn + plane) * sp + j*width + x];
+          weight[(batch * len + width + i) * sp + y*width + x] += _t*_f;
+        }
+      }
+    }
+  }
+}
+
+__global__ void ca_backward_kernel_t(const float *dw, const float *t, const float *f, float *dt,
+                                int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+        
+        for (int i = 0; i < width; ++i) {
+          float _dw = dw[(batch * len + i) * sp + y*width + x];
+          float _f = f[(batch * chn + plane) * sp + y*width + i];
+          dt[(batch * chn + plane) * sp + y*width + x] += _dw * _f;
+        }
+        for (int i = 0; i < height; ++i)  {
+          if (i == y) continue;
+          int j = i<y ? i : i-1;
+
+          float _dw = dw[(batch * len + width + j) * sp + y*width + x];
+          float _f = f[(batch * chn + plane) * sp + i*width + x];
+          dt[(batch * chn + plane) * sp + y*width + x] += _dw * _f;
+        }
+    }
+
+  }
+}
+
+__global__ void ca_backward_kernel_f(const float *dw, const float *t, const float *f, float *df, 
+                                int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    
+    for (int batch = 0; batch < num; ++batch) {
+      
+      for (int i = 0; i < width; ++i) {
+        float _dw = dw[(batch * len + x) * sp + y*width + i];
+        float _t = t[(batch * chn + plane) * sp + y*width + i];
+        df[(batch * chn + plane) * sp + y*width + x] += _dw * _t;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+        int j = i>y ? y : y-1;
+
+        float _dw = dw[(batch * len + width + j) * sp + i*width + x];
+        float _t = t[(batch * chn + plane) * sp + i*width + x];
+        df[(batch * chn + plane) * sp + y*width + x] += _dw * _t;
+      }
+    }
+
+  }
+}
+
+
+__global__ void ca_map_forward_kernel(const float *weight, const float *g, float *out, int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+    for (int batch = 0; batch < num; ++batch) {
+
+      for (int i = 0; i < width; ++i) {
+        float _g = g[(batch * chn + plane) * sp + y*width + i];
+        float _w = weight[(batch * len + i) * sp + y*width + x];
+        out[(batch * chn + plane) * sp + y*width + x] += _g * _w;
+      }
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+
+        int j = i<y ? i : i-1;
+
+        float _g = g[(batch * chn + plane) * sp + i*width + x];
+        float _w = weight[(batch * len + width + j) * sp + y*width + x];
+        out[(batch * chn + plane) * sp + y*width + x] += _g * _w;
+      }
+    }
+  }
+
+}
+
+__global__ void ca_map_backward_kernel_w(const float *dout, const float *weight, const float *g, float *dw,
+                                int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int z = blockIdx.z;
+
+  if (x < width && y < height && z < height+width-1) {
+
+    for (int batch = 0; batch < num; ++batch) {
+      for (int plane = 0; plane < chn; ++plane) {
+        float _dout = dout[(batch * chn + plane) * sp + y*width + x];
+
+        if (z < width) {
+          int i = z;
+          float _g = g[(batch * chn + plane) * sp + y*width + i];
+          dw[(batch * len + i) * sp + y*width + x] += _dout * _g;
+        } else {
+          int i = z - width;
+          int j = i<y ? i : i+1;
+
+          float _g = g[(batch * chn + plane) * sp + j*width + x];
+          dw[(batch * len + width + i) * sp + y*width + x] += _dout * _g;
+        }
+      }
+    }
+  }
+}
+
+__global__ void ca_map_backward_kernel_g(const float *dout, const float *weight, const float *g, float *dg, 
+                                int num, int chn, int height, int width) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int sp = height * width;
+  int len = height + width - 1;
+  int plane = blockIdx.z;
+
+  if (x < width && y < height && plane < chn) {
+
+    for (int batch = 0; batch < num; ++batch) {
+      for (int i = 0; i < width; ++i) {
+        float _dout = dout[(batch * chn + plane) * sp + y*width + i];
+        float _w = weight[(batch * len + x) * sp + y*width + i];
+        dg[(batch * chn + plane) * sp + y*width + x] += _dout * _w;
+      }
+
+      for (int i = 0; i < height; ++i) {
+        if (i == y) continue;
+        int j = i>y ? y : y-1;
+
+        float _dout = dout[(batch * chn + plane) * sp + i*width + x];
+        float _w = weight[(batch * len + width + j) * sp + i*width + x];
+        dg[(batch * chn + plane) * sp + y*width + x] += _dout * _w;
+      }
+    }
+  }
+}
+
+/*
+ * Implementations
+ */
+extern "C" int _ca_forward_cuda(int N, int C, int H, int W, const float *t, 
+                                const float *f, float *weight, cudaStream_t stream) {
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (W+threads.x-1)/threads.x;
+  int d2 = (H+threads.y-1)/threads.y;
+  int d3 = H+W;
+  dim3 blocks(d1, d2, d3);
+  ca_forward_kernel<<<blocks, threads, 0, stream>>>(t, f, weight, N, C, H, W);
+
+  // Check for errors
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+    return 0;
+  else
+    return 1;
+}
+
+
+extern "C" int _ca_backward_cuda(int N, int C, int H, int W, const float *dw, const float *t, const float *f, float *dt, float *df, cudaStream_t stream) {
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (W+threads.x-1)/threads.x;
+  int d2 = (H+threads.y-1)/threads.y;
+  int d3 = C;
+  dim3 blocks(d1, d2, d3);
+  // printf("%f\n", dw[0]);
+  ca_backward_kernel_t<<<blocks, threads, 0, stream>>>(dw, t, f, dt, N, C, H, W);
+  ca_backward_kernel_f<<<blocks, threads, 0, stream>>>(dw, t, f, df, N, C, H, W);
+
+  // Check for errors
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+    return 0;
+  else
+    return 1;
+}
+
+
+extern "C" int _ca_map_forward_cuda(int N, int C, int H, int W, const float *weight, const float *g, float *out, cudaStream_t stream) {
+  // Run kernel
+  dim3 threads(32, 32);
+  dim3 blocks((W+threads.x-1)/threads.x, (H+threads.y-1)/threads.y, C);
+  ca_map_forward_kernel<<<blocks, threads, 0, stream>>>(weight, g, out, N, C, H, W);
+
+  // Check for errors
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+    return 0;
+  else
+    return 1;
+}
+
+extern "C" int _ca_map_backward_cuda(int N, int C, int H, int W, const float *dout, const float *weight, const float *g, float *dw, float *dg, cudaStream_t stream) {
+  // Run kernel
+  dim3 threads(32, 32);
+  int d1 = (W+threads.x-1)/threads.x;
+  int d2 = (H+threads.y-1)/threads.y;
+  int d3 = H+W;
+  dim3 blocks(d1, d2, d3);
+  ca_map_backward_kernel_w<<<blocks, threads, 0, stream>>>(dout, weight, g, dw, N, C, H, W);
+
+  d3 = C;
+  blocks = dim3(d1, d2, d3);
+  ca_map_backward_kernel_g<<<blocks, threads, 0, stream>>>(dout, weight, g, dg, N, C, H, W);
+
+  // Check for errors
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+    return 0;
+  else
+    return 1;
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.h
new file mode 100644
index 0000000..6adf3ea
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/ca.h
@@ -0,0 +1,12 @@
+#ifndef __CA__
+#define __CA__
+
+/*
+ * Exported functions
+ */
+extern "C" int _ca_forward_cuda(int N, int C, int H, int W, const float *t, const float *f, float *weight, cudaStream_t stream);
+extern "C" int _ca_backward_cuda(int N, int C, int H, int W, const float *dw, const float *t, const float *f, float *dt, float *df, cudaStream_t stream);
+extern "C" int _ca_map_forward_cuda(int N, int C, int H, int W, const float *weight, const float *g, float *out, cudaStream_t stream);
+extern "C" int _ca_map_backward_cuda(int N, int C, int H, int W, const float *dout, const float *weight, const float *g, float *dw, float *dg, cudaStream_t stream);
+
+#endif
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/common.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/common.h
new file mode 100644
index 0000000..7a84801
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/common.h
@@ -0,0 +1,37 @@
+#ifndef __COMMON__
+#define __COMMON__
+#include <cuda_runtime_api.h>
+
+/*
+ * General settings
+ */
+const int WARP_SIZE = 32;
+const int MAX_BLOCK_SIZE = 512;
+
+/*
+ * Utility functions
+ */
+template <typename T>
+__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
+                                           unsigned int mask = 0xffffffff) {
+#if CUDART_VERSION >= 9000
+  return __shfl_xor_sync(mask, value, laneMask, width);
+#else
+  return __shfl_xor(value, laneMask, width);
+#endif
+}
+
+__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
+
+static int getNumThreads(int nElem) {
+  int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
+  for (int i = 0; i != 5; ++i) {
+    if (nElem <= threadSizes[i]) {
+      return threadSizes[i];
+    }
+  }
+  return MAX_BLOCK_SIZE;
+}
+
+
+#endif
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.cpp
new file mode 100644
index 0000000..47d39d2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.cpp
@@ -0,0 +1,86 @@
+// All functions assume that input and output tensors are already initialized
+// and have the correct dimensions
+#include <THC/THC.h>
+
+// Forward definition of implementation functions
+extern "C" {
+int _ca_forward_cuda(int N, int C, int H, int W, const float *t, const float *f, float *weight, cudaStream_t);
+int _ca_backward_cuda(int N, int C, int H, int W, const float *dw, const float *t, const float *f, float *dt, float *df, cudaStream_t);
+
+int _ca_map_forward_cuda(int N, int C, int H, int W, const float *weight, const float *g, float *out, cudaStream_t);
+int _ca_map_backward_cuda(int N, int C, int H, int W, const float *dout, const float *weight, const float *g, float *dw, float *dg, cudaStream_t);
+}
+
+extern THCState *state;
+
+void get_sizes(const THCudaTensor *t, int *N, int *C, int *H, int *W){
+  // Get sizes
+  *N = THCudaTensor_size(state, t, 0);
+  *C = THCudaTensor_size(state, t, 1);
+  *H = THCudaTensor_size(state, t, 2);
+  *W = THCudaTensor_size(state, t, 3);
+}
+
+extern "C" int ca_forward_cuda(const THCudaTensor *t, const THCudaTensor *f, THCudaTensor *weight) {
+  cudaStream_t stream = THCState_getCurrentStream(state);
+
+  int N, C, H, W;
+  get_sizes(t, &N, &C, &H, &W);
+
+  // Get pointers
+  const float *t_data = THCudaTensor_data(state, t);
+  const float *f_data = THCudaTensor_data(state, f);
+  float *weight_data = THCudaTensor_data(state, weight);
+
+
+  return _ca_forward_cuda(N, C, H, W, t_data, f_data, weight_data, stream);
+}
+
+extern "C" int ca_backward_cuda(const THCudaTensor *dw, const THCudaTensor *t, const THCudaTensor *f, THCudaTensor *dt, THCudaTensor *df) {
+  cudaStream_t stream = THCState_getCurrentStream(state);
+
+  int N, C, H, W;
+  get_sizes(t, &N, &C, &H, &W);
+
+  // Get pointers
+  const float *dw_data = THCudaTensor_data(state, dw);
+  const float *t_data = THCudaTensor_data(state, t);
+  const float *f_data = THCudaTensor_data(state, f);
+  float *dt_data = THCudaTensor_data(state, dt);
+  float *df_data = THCudaTensor_data(state, df);
+
+
+  return _ca_backward_cuda(N, C, H, W, dw_data, t_data, f_data, dt_data, df_data, stream);
+}
+
+
+extern "C" int ca_map_forward_cuda(const THCudaTensor *weight, const THCudaTensor *g, THCudaTensor *out) {
+  cudaStream_t stream = THCState_getCurrentStream(state);
+
+  int N, C, H, W;
+  get_sizes(g, &N, &C, &H, &W);
+
+  const float *weight_data = THCudaTensor_data(state, weight);
+  const float *g_data = THCudaTensor_data(state, g);
+  float *out_data = THCudaTensor_data(state, out);
+
+  return _ca_map_forward_cuda(N, C, H, W, weight_data, g_data, out_data, stream);
+}
+
+
+extern "C" int ca_map_backward_cuda(const THCudaTensor *dout, const THCudaTensor *weight, const THCudaTensor *g,
+                     THCudaTensor *dw,  THCudaTensor *dg) {
+  cudaStream_t stream = THCState_getCurrentStream(state);
+
+  int N, C, H, W;
+  get_sizes(dout, &N, &C, &H, &W);
+
+  const float *dout_data = THCudaTensor_data(state, dout);
+  const float *weight_data = THCudaTensor_data(state, weight);
+  const float *g_data = THCudaTensor_data(state, g);
+  float *dw_data = THCudaTensor_data(state, dw);
+  float *dg_data = THCudaTensor_data(state, dg);
+
+  return _ca_map_backward_cuda(N, C, H, W, dout_data, weight_data, g_data, dw_data, dg_data, stream);
+}
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.h
new file mode 100644
index 0000000..3f5af4e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/cc_attention/src/lib_cffi.h
@@ -0,0 +1,7 @@
+int ca_forward_cuda(const THCudaTensor *t, const THCudaTensor *f, THCudaTensor *weight);
+
+int ca_backward_cuda(const THCudaTensor *dw, const THCudaTensor *t, const THCudaTensor *f, THCudaTensor *dt, THCudaTensor *df);
+
+int ca_map_forward_cuda(const THCudaTensor *weight, const THCudaTensor *g, THCudaTensor *out);
+int ca_map_backward_cuda(const THCudaTensor *dout, const THCudaTensor *weight, const THCudaTensor *g,
+                     THCudaTensor *dw,  THCudaTensor *dg);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/crf/dense_crf.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/crf/dense_crf.py
new file mode 100644
index 0000000..8a79953
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/crf/dense_crf.py
@@ -0,0 +1,25 @@
+import numpy as np
+import pydensecrf.densecrf as dcrf
+
+def dense_crf(img, output_probs):
+    h = output_probs.shape[0]
+    w = output_probs.shape[1]
+
+    output_probs = np.expand_dims(output_probs, 0)
+    output_probs = np.append(1 - output_probs, output_probs, axis=0)
+
+    d = dcrf.DenseCRF2D(w, h, 2)
+    U = -np.log(output_probs)
+    U = U.reshape((2, -1))
+    U = np.ascontiguousarray(U)
+    img = np.ascontiguousarray(img)
+
+    d.setUnaryEnergy(U)
+
+    d.addPairwiseGaussian(sxy=20, compat=3)
+    d.addPairwiseBilateral(sxy=30, srgb=20, rgbim=img, compat=10)
+
+    Q = d.inference(5)
+    Q = np.argmax(np.array(Q), axis=0).reshape((h, w))
+
+    return Q
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/README.md b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/README.md
new file mode 100644
index 0000000..314d312
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/README.md
@@ -0,0 +1,42 @@
+# Deformable-ConvNets-V2 in PyTorch
+
+This repo is an implementation of [Deformable Convolution V2](https://arxiv.org/abs/1811.11168).
+Ported from the original [MXNet implementation](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
+
+Refer to [mmdetection branch](https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/mmdetection) in this repo for a complete framework. Results of DCNv2 based on mmdetection code base can be found at [model zoo](https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/MODEL_ZOO.md#deformable-conv-v2). Many thanks to [mmdetection](https://github.com/open-mmlab/mmdetection) for their strong and clean framework.
+
+Operators in master branch are compatible with pytorch_v0.4.1. For operators on pytorch v1.0.0 (implemented by [Jiarui Xu](https://github.com/xvjiarui)), please refer to [pytorch_1.0.0 branch](https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0).
+
+Thanks to [Kai Chen](https://github.com/hellock) and other contributors from mmlab, DCNv2 is now included in the official mmdetection repo based on the master branch of this one. It is now written with the new cpp extension apis and it supports both PyTorch 0.4.1 and 1.0, with some minor speed and memory optimization. Results and models can be found at https://github.com/open-mmlab/mmdetection/blob/master/MODEL_ZOO.md#deformable-convolution-v2.
+
+## Build
+
+```
+sh make.sh
+```
+
+See `test.py` and `test_modulated.py` for example usage.
+
+## Notice
+
+This repo provides the deformable conv layer which can reproduce the results in the Deformable ConvNets v2 paper. The major changes are as follows:
+
+* To better handle occasions where sampling locations are outside of the image boundary.
+
+    In the previous operator, if the sampling location is outside of the feature map boundary, its sampled value would be zero. Thus, the gradient with respect to learnable offset would be zero. We found such a scheme may deteriate the performance in ImageNet classification (perhaps because the feature maps are of low resolution). For object detection on COCO, both the previous and the updated operators deliver the same results.
+
+    In the new operator, if the sampling location is within one pixel outside of the feature map boundary, bilinear sampling would also be applied. And gradient with respect to learnable offset can be non zero for such locations. This is implemented by padding zeros (by one row/column) outside of the boundaries of feature maps, and performing bilinear sampling on the padded feature maps.
+
+
+* The efficiency of processing multiple images in a mini-batch is considerably improved.
+
+    Both the previous and the updated operators follow the following computation pipeline (illustrated by a 3x3 deformable convolution with input data of NxCxHxW and output data of NxC'xHxW):
+
+      for i in range(N/S):
+          step 1 (slicing): slicing the input data at the batch dimension from i*S to (i+1)*S, input (NxCxHxW) -> sliced input (SxCxHxW)
+          step 2 (deformable im2col): sliced input (SxCxHxW)+sliced offset (Sx18xHxW) -> column (Cx9xSxHxW)
+          step 3 (MatMul&reshape): weight matrix (C'x 9C) * column (9CxSHW) -> temp sliced output (C'xSxHxW) -> sliced output (SxC'xHxW)
+          step 4 (Merge): merge sliced output to form the whole output data (NxC'xHxW) 
+      end
+
+    In the previous operator, S is fixed as 1. In the updated operator, S can be set by the *im2col_step* parameter, whose default value is min(N, 64). The updated operator is significantly faster than the existing one when the image batch size is large.
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/__init__.py
new file mode 100644
index 0000000..20929f4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/__init__.py
@@ -0,0 +1,4 @@
+from .modules.deform_conv import DeformConv
+from .modules.modulated_dcn import DeformRoIPooling, ModulatedDeformRoIPoolingPack, ModulatedDeformConv, ModulatedDeformConvPack
+
+__all__ = ['DeformConv', 'DeformRoIPooling', 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack']
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/deform_conv/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/deform_conv/__init__.py
new file mode 100644
index 0000000..4536b1a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/deform_conv/__init__.py
@@ -0,0 +1,15 @@
+
+from torch.utils.ffi import _wrap_function
+from ._deform_conv import lib as _lib, ffi as _ffi
+
+__all__ = []
+def _import_symbols(locals):
+    for symbol in dir(_lib):
+        fn = getattr(_lib, symbol)
+        if callable(fn):
+            locals[symbol] = _wrap_function(fn, _ffi)
+        else:
+            locals[symbol] = fn
+        __all__.append(symbol)
+
+_import_symbols(locals())
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/modulated_dcn/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/modulated_dcn/__init__.py
new file mode 100644
index 0000000..cab6e19
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/_ext/modulated_dcn/__init__.py
@@ -0,0 +1,15 @@
+
+from torch.utils.ffi import _wrap_function
+from ._modulated_dcn import lib as _lib, ffi as _ffi
+
+__all__ = []
+def _import_symbols(locals):
+    for symbol in dir(_lib):
+        fn = getattr(_lib, symbol)
+        if callable(fn):
+            locals[symbol] = _wrap_function(fn, _ffi)
+        else:
+            locals[symbol] = fn
+        __all__.append(symbol)
+
+_import_symbols(locals())
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build.py
new file mode 100644
index 0000000..dcec2ff
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build.py
@@ -0,0 +1,37 @@
+import os
+import torch
+from torch.utils.ffi import create_extension
+
+this_file = os.path.dirname(__file__)
+
+sources = ['src/deform_conv.c']
+headers = ['src/deform_conv.h']
+defines = []
+with_cuda = False
+
+if torch.cuda.is_available():
+    print('Including CUDA code.')
+    sources += ['src/deform_conv_cuda.c']
+    headers += ['src/deform_conv_cuda.h']
+    defines += [('WITH_CUDA', None)]
+    with_cuda = True
+
+this_file = os.path.dirname(os.path.realpath(__file__))
+print(this_file)
+extra_objects = ['src/deform_conv_cuda_kernel.cu.so']
+extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
+
+ffi = create_extension(
+    '_ext.deform_conv',
+    headers=headers,
+    sources=sources,
+    define_macros=defines,
+    relative_to=__file__,
+    with_cuda=with_cuda,
+    extra_objects=extra_objects,
+    extra_compile_args=['-std=c++11']
+)
+
+assert torch.cuda.is_available(), 'Please install CUDA for GPU support.'
+ffi.build()
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build_modulated.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build_modulated.py
new file mode 100644
index 0000000..0549f09
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/build_modulated.py
@@ -0,0 +1,43 @@
+import os
+import torch
+from torch.utils.ffi import create_extension
+
+
+sources = ['src/modulated_dcn.c']
+headers = ['src/modulated_dcn.h']
+defines = []
+with_cuda = False
+
+extra_objects = []
+if torch.cuda.is_available():
+    print('Including CUDA code.')
+    sources += ['src/modulated_dcn_cuda.c']
+    headers += ['src/modulated_dcn_cuda.h']
+    defines += [('WITH_CUDA', None)]
+    extra_objects += ['src/cuda/modulated_deform_im2col_cuda.cu.so']
+    extra_objects += ['src/cuda/deform_psroi_pooling_cuda.cu.so']
+    with_cuda = True
+else:
+    raise ValueError('CUDA is not available')
+
+extra_compile_args = ['-fopenmp', '-std=c99']
+
+this_file = os.path.dirname(os.path.realpath(__file__))
+print(this_file)
+sources = [os.path.join(this_file, fname) for fname in sources]
+headers = [os.path.join(this_file, fname) for fname in headers]
+extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
+
+ffi = create_extension(
+    '_ext.modulated_dcn',
+    headers=headers,
+    sources=sources,
+    define_macros=defines,
+    relative_to=__file__,
+    with_cuda=with_cuda,
+    extra_objects=extra_objects,
+    extra_compile_args=extra_compile_args
+)
+
+if __name__ == '__main__':
+    ffi.build()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/__init__.py
new file mode 100644
index 0000000..15139d9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/__init__.py
@@ -0,0 +1,2 @@
+from .deform_conv import DeformConvFunction, deform_conv_function
+from .modulated_dcn_func import DeformRoIPoolingFunction, ModulatedDeformConvFunction
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/deform_conv.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/deform_conv.py
new file mode 100644
index 0000000..666adc3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/deform_conv.py
@@ -0,0 +1,116 @@
+import torch
+from torch.autograd import Function
+from torch.nn.modules.utils import _pair
+
+from lib.extensions.dcn._ext import deform_conv
+
+
+def deform_conv_function(input,
+                  offset,
+                  weight,
+                  stride=1,
+                  padding=0,
+                  dilation=1,
+                  deform_groups=1,
+                  im2col_step=64):
+
+    if input is not None and input.dim() != 4:
+        raise ValueError(
+            "Expected 4D tensor as input, got {}D tensor instead.".format(
+                input.dim()))
+
+    f = DeformConvFunction(
+        _pair(stride), _pair(padding), _pair(dilation), deform_groups, im2col_step)
+    return f(input, offset, weight)
+
+
+class DeformConvFunction(Function):
+    def __init__(self, stride, padding, dilation, deformable_groups=1, im2col_step=64):
+        super(DeformConvFunction, self).__init__()
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.deformable_groups = deformable_groups
+        self.im2col_step = im2col_step
+
+    def forward(self, input, offset, weight):
+        self.save_for_backward(input, offset, weight)
+
+        output = input.new(*self._output_size(input, weight))
+
+        self.bufs_ = [input.new(), input.new()]  # columns, ones
+
+        if not input.is_cuda:
+            raise NotImplementedError
+        else:
+            if isinstance(input, torch.autograd.Variable):
+                if not isinstance(input.data, torch.cuda.FloatTensor):
+                    raise NotImplementedError
+            else:
+                if not isinstance(input, torch.cuda.FloatTensor):
+                    raise NotImplementedError
+            
+            cur_im2col_step = min(self.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+            deform_conv.deform_conv_forward_cuda(
+                input, weight, offset, output, self.bufs_[0], self.bufs_[1],
+                weight.size(3), weight.size(2), self.stride[1], self.stride[0],
+                self.padding[1], self.padding[0], self.dilation[1],
+                self.dilation[0], self.deformable_groups, cur_im2col_step)
+        return output
+
+    def backward(self, grad_output):
+        input, offset, weight = self.saved_tensors
+
+        grad_input = grad_offset = grad_weight = None
+
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        else:
+            if isinstance(grad_output, torch.autograd.Variable):
+                if not isinstance(grad_output.data, torch.cuda.FloatTensor):
+                    raise NotImplementedError
+            else:
+                if not isinstance(grad_output, torch.cuda.FloatTensor):
+                    raise NotImplementedError
+
+            cur_im2col_step = min(self.im2col_step, input.shape[0])
+            assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
+
+            if self.needs_input_grad[0] or self.needs_input_grad[1]:
+                grad_input = input.new(*input.size()).zero_()
+                grad_offset = offset.new(*offset.size()).zero_()
+                deform_conv.deform_conv_backward_input_cuda(
+                    input, offset, grad_output, grad_input,
+                    grad_offset, weight, self.bufs_[0], weight.size(3),
+                    weight.size(2), self.stride[1], self.stride[0],
+                    self.padding[1], self.padding[0], self.dilation[1],
+                    self.dilation[0], self.deformable_groups, cur_im2col_step)
+
+
+            if self.needs_input_grad[2]:
+                grad_weight = weight.new(*weight.size()).zero_()
+                deform_conv.deform_conv_backward_parameters_cuda(
+                    input, offset, grad_output,
+                    grad_weight, self.bufs_[0], self.bufs_[1], weight.size(3),
+                    weight.size(2), self.stride[1], self.stride[0],
+                    self.padding[1], self.padding[0], self.dilation[1],
+                    self.dilation[0], self.deformable_groups, 1, cur_im2col_step)
+
+        return grad_input, grad_offset, grad_weight
+
+    def _output_size(self, input, weight):
+        channels = weight.size(0)
+
+        output_size = (input.size(0), channels)
+        for d in range(input.dim() - 2):
+            in_size = input.size(d + 2)
+            pad = self.padding[d]
+            kernel = self.dilation[d] * (weight.size(d + 2) - 1) + 1
+            stride = self.stride[d]
+            output_size += ((in_size + (2 * pad) - kernel) // stride + 1, )
+        if not all(map(lambda s: s > 0, output_size)):
+            raise ValueError(
+                "convolution input is too small (output would be {})".format(
+                    'x'.join(map(str, output_size))))
+        return output_size
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/modulated_dcn_func.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/modulated_dcn_func.py
new file mode 100644
index 0000000..caa7afc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/functions/modulated_dcn_func.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import pdb
+import torch
+from torch.autograd import Function
+
+from lib.extensions.dcn._ext import modulated_dcn as _backend
+
+
+class ModulatedDeformConvFunction(Function):
+
+    def __init__(self, stride, padding, dilation=1, deformable_groups=1):
+        super(ModulatedDeformConvFunction, self).__init__()
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.deformable_groups = deformable_groups
+
+        # if isinstance(self.padding, tuple):
+        #   self.padding = self.padding[0]
+        # if isinstance(self.dilation, tuple):
+        #   self.dilation = self.dilation[0]
+
+    def forward(self, input, offset, mask, weight, bias):
+        if not input.is_cuda:
+            raise NotImplementedError
+        if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
+            self.save_for_backward(input, offset, mask, weight, bias)
+        output = input.new(*self._infer_shape(input, weight))
+        self._bufs = [input.new(), input.new()]
+        _backend.modulated_deform_conv_cuda_forward(input, weight,
+                                     bias, self._bufs[0],
+                                     offset, mask,
+                                     output, self._bufs[1],
+                                     weight.shape[2], weight.shape[3],
+                                     self.stride, self.stride,
+                                     self.padding, self.padding,
+                                     self.dilation, self.dilation,
+                                     self.deformable_groups)
+        return output
+
+    def backward(self, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+        input, offset, mask, weight, bias = self.saved_tensors
+        grad_input = input.new(*input.size()).zero_()
+        grad_offset = offset.new(*offset.size()).zero_()
+        grad_mask = mask.new(*mask.size()).zero_()
+        grad_weight = weight.new(*weight.size()).zero_()
+        grad_bias = bias.new(*bias.size()).zero_()
+        _backend.modulated_deform_conv_cuda_backward(input, weight,
+                                      bias, self._bufs[0],
+                                      offset, mask,
+                                      self._bufs[1],
+                                      grad_input, grad_weight,
+                                      grad_bias, grad_offset,
+                                      grad_mask, grad_output,
+                                      weight.shape[2], weight.shape[3],
+                                      self.stride, self.stride,
+                                      self.padding, self.padding,
+                                      self.dilation, self.dilation,
+                                      self.deformable_groups)
+
+        return grad_input, grad_offset, grad_mask, grad_weight, grad_bias
+
+    def _infer_shape(self, input, weight):
+        n = input.size(0)
+        channels_out = weight.size(0)
+        height, width = input.shape[2:4]
+        kernel_h, kernel_w = weight.shape[2:4]   
+
+        height_out = (height + 2 * self.padding - (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1
+        width_out = (width + 2 * self.padding - (self.dilation * (kernel_w - 1) + 1)) // self.stride + 1
+        return (n, channels_out, height_out, width_out)
+
+
+class DeformRoIPoolingFunction(Function):
+
+    def __init__(self,
+                 spatial_scale,
+                 pooled_size,
+                 output_dim,
+                 no_trans,
+                 group_size=1,
+                 part_size=None,
+                 sample_per_part=4,
+                 trans_std=.0):
+        super(DeformRoIPoolingFunction, self).__init__()
+        self.spatial_scale = spatial_scale
+        self.pooled_size = pooled_size
+        self.output_dim = output_dim
+        self.no_trans = no_trans
+        self.group_size = group_size
+        self.part_size = pooled_size if part_size is None else part_size
+        self.sample_per_part = sample_per_part
+        self.trans_std = trans_std
+
+        assert self.trans_std >= 0.0 and self.trans_std <= 1.0
+
+    def forward(self, data, rois, offset):
+        if not data.is_cuda:
+            raise NotImplementedError
+
+        output = data.new(*self._infer_shape(data, rois))
+        output_count = data.new(*self._infer_shape(data, rois))
+        _backend.deform_psroi_pooling_cuda_forward(data, rois, offset,
+                                                   output, output_count,
+                                                   self.no_trans, self.spatial_scale,
+                                                   self.output_dim, self.group_size,
+                                                   self.pooled_size, self.part_size,
+                                                   self.sample_per_part, self.trans_std)
+
+        # if data.requires_grad or rois.requires_grad or offset.requires_grad:
+        #     self.save_for_backward(data, rois, offset, output_count)
+        self.data = data
+        self.rois = rois
+        self.offset = offset
+        self.output_count = output_count
+
+        return output
+
+    def backward(self, grad_output):
+        if not grad_output.is_cuda:
+            raise NotImplementedError
+
+        # data, rois, offset, output_count = self.saved_tensors
+        data = self.data
+        rois = self.rois
+        offset = self.offset
+        output_count = self.output_count
+        grad_input = data.new(*data.size()).zero_()
+        grad_offset = offset.new(*offset.size()).zero_()
+
+        _backend.deform_psroi_pooling_cuda_backward(grad_output,
+                                                    data,
+                                                    rois,
+                                                    offset,
+                                                    output_count,
+                                                    grad_input,
+                                                    grad_offset,
+                                                    self.no_trans,
+                                                    self.spatial_scale,
+                                                    self.output_dim,
+                                                    self.group_size,
+                                                    self.pooled_size,
+                                                    self.part_size,
+                                                    self.sample_per_part,
+                                                    self.trans_std)
+        return grad_input, torch.zeros(rois.shape).cuda(), grad_offset
+
+    def _infer_shape(self, data, rois):
+        # _, c, h, w = data.shape[:4]
+        c = data.shape[1]
+        n = rois.shape[0]
+        return (n, self.output_dim, self.pooled_size, self.pooled_size)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make.sh
new file mode 100644
index 0000000..bed4619
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make.sh
@@ -0,0 +1,16 @@
+PYTHON="/root/miniconda3/bin/python"
+
+cd src
+/usr/local/cuda-8.0/bin/nvcc -c -o deform_conv_cuda_kernel.cu.so deform_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11
+
+cd cuda
+
+# compile modulated deform conv
+/usr/local/cuda-8.0/bin/nvcc -c -o modulated_deform_im2col_cuda.cu.so modulated_deform_im2col_cuda.cu -x cu -Xcompiler -fPIC
+
+# compile deform-psroi-pooling
+/usr/local/cuda-8.0/bin/nvcc -c -o deform_psroi_pooling_cuda.cu.so deform_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
+
+cd ../..
+CC=g++ ${PYTHON} build.py
+${PYTHON} build_modulated.py
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make_p100.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make_p100.sh
new file mode 100644
index 0000000..2750bf9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/make_p100.sh
@@ -0,0 +1,16 @@
+PYTHON="/data/anaconda/envs/py35/bin/python"
+
+cd src
+/usr/bin/nvcc -c -o deform_conv_cuda_kernel.cu.so deform_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11
+
+cd cuda
+
+# compile modulated deform conv
+/usr/bin/nvcc -c -o modulated_deform_im2col_cuda.cu.so modulated_deform_im2col_cuda.cu -x cu -Xcompiler -fPIC
+
+# compile deform-psroi-pooling
+/usr/bin/nvcc -c -o deform_psroi_pooling_cuda.cu.so deform_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
+
+cd ../..
+CC=g++ ${PYTHON} build.py
+${PYTHON} build_modulated.py
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/__init__.py
new file mode 100644
index 0000000..94ca8c4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/__init__.py
@@ -0,0 +1,2 @@
+from .deform_conv import DeformConv
+from .modulated_dcn import DeformRoIPooling, ModulatedDeformConv, ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/deform_conv.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/deform_conv.py
new file mode 100644
index 0000000..efbc6e9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/deform_conv.py
@@ -0,0 +1,43 @@
+import math
+
+import torch
+import torch.nn as nn
+from torch.nn.modules.module import Module
+from torch.nn.modules.utils import _pair
+from lib.extensions.dcn.functions import deform_conv_function
+
+
+class DeformConv(Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 num_deformable_groups=1):
+        super(DeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.num_deformable_groups = num_deformable_groups
+
+        self.weight = nn.Parameter(
+            torch.Tensor(out_channels, in_channels, *self.kernel_size))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+
+    def forward(self, input, offset):
+        return deform_conv_function(input, offset, self.weight, self.stride,
+                             self.padding, self.dilation,
+                             self.num_deformable_groups)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/modulated_dcn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/modulated_dcn.py
new file mode 100644
index 0000000..d8a106b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/modules/modulated_dcn.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import math
+from torch import nn
+from torch.nn.modules.utils import _pair
+
+from lib.extensions.dcn.functions.modulated_dcn_func import ModulatedDeformConvFunction
+from lib.extensions.dcn.functions.modulated_dcn_func import DeformRoIPoolingFunction
+
+class ModulatedDeformConv(nn.Module):
+
+    def __init__(self, in_channels, out_channels,
+                 kernel_size, stride, padding, dilation=1, deformable_groups=1, no_bias=True):
+        super(ModulatedDeformConv, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.deformable_groups = deformable_groups
+        self.no_bias = no_bias
+
+        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
+        self.bias = nn.Parameter(torch.zeros(out_channels))
+        self.reset_parameters()
+        if self.no_bias:
+            self.bias.requires_grad = False
+
+    def reset_parameters(self):
+        n = self.in_channels
+        for k in self.kernel_size:
+            n *= k
+        stdv = 1. / math.sqrt(n)
+        self.weight.data.uniform_(-stdv, stdv)
+        self.bias.data.zero_()
+
+    def forward(self, input, offset, mask):
+        func = ModulatedDeformConvFunction(self.stride, self.padding, self.dilation, self.deformable_groups)
+        return func(input, offset, mask, self.weight, self.bias)
+
+
+class ModulatedDeformConvPack(ModulatedDeformConv):
+
+    def __init__(self, in_channels, out_channels,
+                 kernel_size, stride, padding,
+                 dilation=1, deformable_groups=1, no_bias=False):
+        super(ModulatedDeformConvPack, self).__init__(in_channels, out_channels,
+                                  kernel_size, stride, padding, dilation, deformable_groups, no_bias)
+
+        self.conv_offset_mask = nn.Conv2d(self.in_channels,
+                                          self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
+                                          kernel_size=self.kernel_size,
+                                          stride=(self.stride, self.stride),
+                                          padding=(self.padding, self.padding),
+                                          bias=True)
+        self.init_offset()
+
+    def init_offset(self):
+        self.conv_offset_mask.weight.data.zero_()
+        self.conv_offset_mask.bias.data.zero_()
+
+    def forward(self, input):
+        out = self.conv_offset_mask(input)
+        o1, o2, mask = torch.chunk(out, 3, dim=1)
+        offset = torch.cat((o1, o2), dim=1)
+        mask = torch.sigmoid(mask)
+        func = ModulatedDeformConvFunction(self.stride, self.padding, self.dilation, self.deformable_groups)
+        return func(input, offset, mask, self.weight, self.bias)
+
+
+class DeformRoIPooling(nn.Module):
+
+    def __init__(self,
+                 spatial_scale,
+                 pooled_size,
+                 output_dim,
+                 no_trans,
+                 group_size=1,
+                 part_size=None,
+                 sample_per_part=4,
+                 trans_std=.0):
+        super(DeformRoIPooling, self).__init__()
+        self.spatial_scale = spatial_scale
+        self.pooled_size = pooled_size
+        self.output_dim = output_dim
+        self.no_trans = no_trans
+        self.group_size = group_size
+        self.part_size = pooled_size if part_size is None else part_size
+        self.sample_per_part = sample_per_part
+        self.trans_std = trans_std
+        self.func = DeformRoIPoolingFunction(self.spatial_scale,
+                             self.pooled_size,
+                             self.output_dim,
+                             self.no_trans,
+                             self.group_size,
+                             self.part_size,
+                             self.sample_per_part,
+                             self.trans_std)
+
+    def forward(self, data, rois, offset):
+
+        if self.no_trans:
+            offset = data.new()
+        return self.func(data, rois, offset)
+
+class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
+
+    def __init__(self,
+                 spatial_scale,
+                 pooled_size,
+                 output_dim,
+                 no_trans,
+                 group_size=1,
+                 part_size=None,
+                 sample_per_part=4,
+                 trans_std=.0,
+                 deform_fc_dim=1024):
+        super(ModulatedDeformRoIPoolingPack, self).__init__(spatial_scale,
+                                         pooled_size,
+                                         output_dim,
+                                         no_trans,
+                                         group_size,
+                                         part_size,
+                                         sample_per_part,
+                                         trans_std)
+
+        self.deform_fc_dim = deform_fc_dim
+
+        if not no_trans:
+            self.func_offset = DeformRoIPoolingFunction(self.spatial_scale,
+                                                    self.pooled_size,
+                                                    self.output_dim,
+                                                    True,
+                                                    self.group_size,
+                                                    self.part_size,
+                                                    self.sample_per_part,
+                                                    self.trans_std)
+            self.offset_fc = nn.Sequential(
+                nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
+                nn.ReLU(inplace=True),
+                nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
+                nn.ReLU(inplace=True),
+                nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 2)
+            )
+            self.offset_fc[4].weight.data.zero_()
+            self.offset_fc[4].bias.data.zero_()
+            self.mask_fc = nn.Sequential(
+                nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
+                nn.ReLU(inplace=True),
+                nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 1),
+                nn.Sigmoid()
+            )
+            self.mask_fc[2].weight.data.zero_()
+            self.mask_fc[2].bias.data.zero_()
+
+    def forward(self, data, rois):
+        if self.no_trans:
+            offset = data.new()
+        else:
+            n = rois.shape[0]
+            offset = data.new()
+            x = self.func_offset(data, rois, offset)
+            offset = self.offset_fc(x.view(n, -1))
+            offset = offset.view(n, 2, self.pooled_size, self.pooled_size)
+            mask = self.mask_fc(x.view(n, -1))
+            mask = mask.view(n, 1, self.pooled_size, self.pooled_size)
+            feat = self.func(data, rois, offset) * mask
+            return feat
+        return self.func(data, rois, offset)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.cu
new file mode 100644
index 0000000..67996ba
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.cu
@@ -0,0 +1,353 @@
+/*!
+ * Copyright (c) 2017 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file deformable_psroi_pooling.cu
+ * \brief
+ * \author Yi Li, Guodong Zhang, Jifeng Dai
+*/
+/***************** Adapted by Charles Shang *********************/
+#include "deform_psroi_pooling_cuda.h"
+#include <cstdio>
+#include <algorithm>
+#include <cstring>
+
+#define CUDA_KERNEL_LOOP(i, n)                        \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
+       i < (n);                                       \
+       i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N)
+{
+  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
+}
+
+__device__ float bilinear_interp(
+    const float *data,
+    const float x,
+    const float y,
+    const int width,
+    const int height)
+{
+  int x1 = floor(x);
+  int x2 = ceil(x);
+  int y1 = floor(y);
+  int y2 = ceil(y);
+  float dist_x = (float)(x - x1);
+  float dist_y = (float)(y - y1);
+  float value11 = data[y1 * width + x1];
+  float value12 = data[y2 * width + x1];
+  float value21 = data[y1 * width + x2];
+  float value22 = data[y2 * width + x2];
+  float value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22;
+  return value;
+}
+
+__global__ void DeformablePSROIPoolForwardKernel(
+    const int count,
+    const float *bottom_data,
+    const float spatial_scale,
+    const int channels,
+    const int height, const int width,
+    const int pooled_height, const int pooled_width,
+    const float *bottom_rois, const float *bottom_trans,
+    const int no_trans,
+    const float trans_std,
+    const int sample_per_part,
+    const int output_dim,
+    const int group_size,
+    const int part_size,
+    const int num_classes,
+    const int channels_each_class,
+    float *top_data,
+    float *top_count)
+{
+  CUDA_KERNEL_LOOP(index, count)
+  {
+    // The output is in order (n, ctop, ph, pw)
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int ctop = (index / pooled_width / pooled_height) % output_dim;
+    int n = index / pooled_width / pooled_height / output_dim;
+
+    // [start, end) interval for spatial sampling
+    const float *offset_bottom_rois = bottom_rois + n * 5;
+    int roi_batch_ind = offset_bottom_rois[0];
+    float roi_start_w = (float)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
+    float roi_start_h = (float)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
+    float roi_end_w = (float)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
+    float roi_end_h = (float)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
+
+    // Force too small ROIs to be 1x1
+    float roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
+    float roi_height = max(roi_end_h - roi_start_h, 0.1);
+
+    // Compute w and h at bottom
+    float bin_size_h = roi_height / (float)(pooled_height);
+    float bin_size_w = roi_width / (float)(pooled_width);
+
+    float sub_bin_size_h = bin_size_h / (float)(sample_per_part);
+    float sub_bin_size_w = bin_size_w / (float)(sample_per_part);
+
+    int part_h = floor((float)(ph) / pooled_height * part_size);
+    int part_w = floor((float)(pw) / pooled_width * part_size);
+    int class_id = ctop / channels_each_class;
+    float trans_x = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
+    float trans_y = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
+
+    float wstart = (float)(pw)*bin_size_w + roi_start_w;
+    wstart += trans_x * roi_width;
+    float hstart = (float)(ph)*bin_size_h + roi_start_h;
+    hstart += trans_y * roi_height;
+
+    float sum = 0;
+    int count = 0;
+    int gw = floor((float)(pw)*group_size / pooled_width);
+    int gh = floor((float)(ph)*group_size / pooled_height);
+    gw = min(max(gw, 0), group_size - 1);
+    gh = min(max(gh, 0), group_size - 1);
+
+    const float *offset_bottom_data = bottom_data + (roi_batch_ind * channels) * height * width;
+    for (int ih = 0; ih < sample_per_part; ih++)
+    {
+      for (int iw = 0; iw < sample_per_part; iw++)
+      {
+        float w = wstart + iw * sub_bin_size_w;
+        float h = hstart + ih * sub_bin_size_h;
+        // bilinear interpolation
+        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
+        {
+          continue;
+        }
+        w = min(max(w, 0.), width - 1.);
+        h = min(max(h, 0.), height - 1.);
+        int c = (ctop * group_size + gh) * group_size + gw;
+        float val = bilinear_interp(offset_bottom_data + c * height * width, w, h, width, height);
+        sum += val;
+        count++;
+      }
+    }
+    top_data[index] = count == 0 ? (float)(0) : sum / count;
+    top_count[index] = count;
+  }
+}
+
+__global__ void DeformablePSROIPoolBackwardAccKernel(
+    const int count,
+    const float *top_diff,
+    const float *top_count,
+    const int num_rois,
+    const float spatial_scale,
+    const int channels,
+    const int height, const int width,
+    const int pooled_height, const int pooled_width,
+    const int output_dim,
+    float *bottom_data_diff, float *bottom_trans_diff,
+    const float *bottom_data,
+    const float *bottom_rois,
+    const float *bottom_trans,
+    const int no_trans,
+    const float trans_std,
+    const int sample_per_part,
+    const int group_size,
+    const int part_size,
+    const int num_classes,
+    const int channels_each_class)
+{
+  CUDA_KERNEL_LOOP(index, count)
+  {
+    // The output is in order (n, ctop, ph, pw)
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int ctop = (index / pooled_width / pooled_height) % output_dim;
+    int n = index / pooled_width / pooled_height / output_dim;
+
+    // [start, end) interval for spatial sampling
+    const float *offset_bottom_rois = bottom_rois + n * 5;
+    int roi_batch_ind = offset_bottom_rois[0];
+    float roi_start_w = (float)(round(offset_bottom_rois[1])) * spatial_scale - 0.5;
+    float roi_start_h = (float)(round(offset_bottom_rois[2])) * spatial_scale - 0.5;
+    float roi_end_w = (float)(round(offset_bottom_rois[3]) + 1.) * spatial_scale - 0.5;
+    float roi_end_h = (float)(round(offset_bottom_rois[4]) + 1.) * spatial_scale - 0.5;
+
+    // Force too small ROIs to be 1x1
+    float roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
+    float roi_height = max(roi_end_h - roi_start_h, 0.1);
+
+    // Compute w and h at bottom
+    float bin_size_h = roi_height / (float)(pooled_height);
+    float bin_size_w = roi_width / (float)(pooled_width);
+
+    float sub_bin_size_h = bin_size_h / (float)(sample_per_part);
+    float sub_bin_size_w = bin_size_w / (float)(sample_per_part);
+
+    int part_h = floor((float)(ph) / pooled_height * part_size);
+    int part_w = floor((float)(pw) / pooled_width * part_size);
+    int class_id = ctop / channels_each_class;
+    float trans_x = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w] * trans_std;
+    float trans_y = no_trans ? (float)(0) : bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w] * trans_std;
+
+    float wstart = (float)(pw)*bin_size_w + roi_start_w;
+    wstart += trans_x * roi_width;
+    float hstart = (float)(ph)*bin_size_h + roi_start_h;
+    hstart += trans_y * roi_height;
+
+    if (top_count[index] <= 0)
+    {
+      continue;
+    }
+    float diff_val = top_diff[index] / top_count[index];
+    const float *offset_bottom_data = bottom_data + roi_batch_ind * channels * height * width;
+    float *offset_bottom_data_diff = bottom_data_diff + roi_batch_ind * channels * height * width;
+    int gw = floor((float)(pw)*group_size / pooled_width);
+    int gh = floor((float)(ph)*group_size / pooled_height);
+    gw = min(max(gw, 0), group_size - 1);
+    gh = min(max(gh, 0), group_size - 1);
+
+    for (int ih = 0; ih < sample_per_part; ih++)
+    {
+      for (int iw = 0; iw < sample_per_part; iw++)
+      {
+        float w = wstart + iw * sub_bin_size_w;
+        float h = hstart + ih * sub_bin_size_h;
+        // bilinear interpolation
+        if (w < -0.5 || w > width - 0.5 || h < -0.5 || h > height - 0.5)
+        {
+          continue;
+        }
+        w = min(max(w, 0.), width - 1.);
+        h = min(max(h, 0.), height - 1.);
+        int c = (ctop * group_size + gh) * group_size + gw;
+        // backward on feature
+        int x0 = floor(w);
+        int x1 = ceil(w);
+        int y0 = floor(h);
+        int y1 = ceil(h);
+        float dist_x = w - x0, dist_y = h - y0;
+        float q00 = (1 - dist_x) * (1 - dist_y);
+        float q01 = (1 - dist_x) * dist_y;
+        float q10 = dist_x * (1 - dist_y);
+        float q11 = dist_x * dist_y;
+        int bottom_index_base = c * height * width;
+        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x0, q00 * diff_val);
+        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x0, q01 * diff_val);
+        atomicAdd(offset_bottom_data_diff + bottom_index_base + y0 * width + x1, q10 * diff_val);
+        atomicAdd(offset_bottom_data_diff + bottom_index_base + y1 * width + x1, q11 * diff_val);
+
+        if (no_trans)
+        {
+          continue;
+        }
+        float U00 = offset_bottom_data[bottom_index_base + y0 * width + x0];
+        float U01 = offset_bottom_data[bottom_index_base + y1 * width + x0];
+        float U10 = offset_bottom_data[bottom_index_base + y0 * width + x1];
+        float U11 = offset_bottom_data[bottom_index_base + y1 * width + x1];
+        float diff_x = (U11 * dist_y + U10 * (1 - dist_y) - U01 * dist_y - U00 * (1 - dist_y)) * trans_std * diff_val;
+        diff_x *= roi_width;
+        float diff_y = (U11 * dist_x + U01 * (1 - dist_x) - U10 * dist_x - U00 * (1 - dist_x)) * trans_std * diff_val;
+        diff_y *= roi_height;
+
+        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2) * part_size + part_h) * part_size + part_w, diff_x);
+        atomicAdd(bottom_trans_diff + (((n * num_classes + class_id) * 2 + 1) * part_size + part_h) * part_size + part_w, diff_y);
+      }
+    }
+  }
+}
+
+void DeformablePSROIPoolForward(cudaStream_t stream,
+                                const float *data,
+                                const float *bbox,
+                                const float *trans,
+                                float *out,
+                                float *top_count,
+                                const int batch,
+                                const int channels,
+                                const int height,
+                                const int width,
+                                const int num_bbox,
+                                const int channels_trans,
+                                const int no_trans,
+                                const float spatial_scale,
+                                const int output_dim,
+                                const int group_size,
+                                const int pooled_size,
+                                const int part_size,
+                                const int sample_per_part,
+                                const float trans_std)
+{
+
+  const float *bottom_data = data;
+  const float *bottom_rois = bbox;
+  const float *bottom_trans = no_trans ? NULL : trans;
+  float *top_data = out;
+  float *top_count_data = top_count;
+
+  const int pooled_height = pooled_size;
+  const int pooled_width = pooled_size;
+  const int count = num_bbox * output_dim * pooled_height * pooled_width;
+  const int num_classes = no_trans ? 1 : channels_trans / 2;
+  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
+
+  DeformablePSROIPoolForwardKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS, 0, stream>>>(
+      count, bottom_data, spatial_scale, channels, height, width, pooled_height, pooled_width,
+      bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part, output_dim,
+      group_size, part_size, num_classes, channels_each_class, top_data, top_count_data);
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
+  }
+}
+
+void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
+                                    const float *out_grad,
+                                    const float *data,
+                                    const float *bbox,
+                                    const float *trans,
+                                    const float *top_count,
+                                    float *in_grad,
+                                    float *trans_grad,
+                                    const int batch,
+                                    const int channels,
+                                    const int height,
+                                    const int width,
+                                    const int num_bbox,
+                                    const int channels_trans,
+                                    const int no_trans,
+                                    const float spatial_scale,
+                                    const int output_dim,
+                                    const int group_size,
+                                    const int pooled_size,
+                                    const int part_size,
+                                    const int sample_per_part,
+                                    const float trans_std)
+{
+  // LOG(INFO) << "DeformablePSROIPoolBackward";
+  const float *top_diff = out_grad;
+  const float *bottom_data = data;
+  const float *bottom_rois = bbox;
+  const float *bottom_trans = no_trans ? NULL : trans;
+  float *bottom_data_diff = in_grad;
+  float *bottom_trans_diff = no_trans ? NULL : trans_grad;
+  const float *top_count_data = top_count;
+
+  const int num_rois = num_bbox;
+  const int pooled_height = pooled_size;
+  const int pooled_width = pooled_size;
+  const int count = num_bbox * output_dim * pooled_height * pooled_width;
+  const int num_classes = no_trans ? 1 : channels_trans / 2;
+  const int channels_each_class = no_trans ? output_dim : output_dim / num_classes;
+
+  DeformablePSROIPoolBackwardAccKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS, 0, stream>>>(
+      count, top_diff, top_count_data, num_rois, spatial_scale, channels, height, width,
+      pooled_height, pooled_width, output_dim, bottom_data_diff, bottom_trans_diff,
+      bottom_data, bottom_rois, bottom_trans, no_trans, trans_std, sample_per_part,
+      group_size, part_size, num_classes, channels_each_class);
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
+  }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.h
new file mode 100644
index 0000000..5fa2c6c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/deform_psroi_pooling_cuda.h
@@ -0,0 +1,66 @@
+/*!
+ * Copyright (c) 2017 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file deformable_psroi_pooling.cu
+ * \brief
+ * \author Yi Li, Guodong Zhang, Jifeng Dai
+*/
+/***************** Adapted by Charles Shang *********************/
+
+#ifndef DCN_V2_PSROI_POOLING_CUDA
+#define DCN_V2_PSROI_POOLING_CUDA
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    void DeformablePSROIPoolForward(cudaStream_t stream,
+                                    const float *data,
+                                    const float *bbox,
+                                    const float *trans,
+                                    float *out,
+                                    float *top_count,
+                                    const int batch,
+                                    const int channels,
+                                    const int height,
+                                    const int width,
+                                    const int num_bbox,
+                                    const int channels_trans,
+                                    const int no_trans,
+                                    const float spatial_scale,
+                                    const int output_dim,
+                                    const int group_size,
+                                    const int pooled_size,
+                                    const int part_size,
+                                    const int sample_per_part,
+                                    const float trans_std);
+
+    void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
+                                        const float *out_grad,
+                                        const float *data,
+                                        const float *bbox,
+                                        const float *trans,
+                                        const float *top_count,
+                                        float *in_grad,
+                                        float *trans_grad,
+                                        const int batch,
+                                        const int channels,
+                                        const int height,
+                                        const int width,
+                                        const int num_bbox,
+                                        const int channels_trans,
+                                        const int no_trans,
+                                        const float spatial_scale,
+                                        const int output_dim,
+                                        const int group_size,
+                                        const int pooled_size,
+                                        const int part_size,
+                                        const int sample_per_part,
+                                        const float trans_std);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.cu
new file mode 100644
index 0000000..1210bc5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.cu
@@ -0,0 +1,387 @@
+#include "modulated_deform_im2col_cuda.h"
+#include <cstdio>
+#include <algorithm>
+#include <cstring>
+
+#define CUDA_KERNEL_LOOP(i, n)                          \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x;   \
+      i < (n);                                          \
+      i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+inline int GET_BLOCKS(const int N)
+{
+  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
+}
+
+
+__device__ float dmcn_im2col_bilinear(const float *bottom_data, const int data_width,
+                                      const int height, const int width, float h, float w)
+{
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  float lh = h - h_low;
+  float lw = w - w_low;
+  float hh = 1 - lh, hw = 1 - lw;
+
+  float v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  float v2 = 0;
+  if (h_low >= 0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  float v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  float v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+__device__ float dmcn_get_gradient_weight(float argmax_h, float argmax_w,
+                                          const int h, const int w, const int height, const int width)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  float weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+    weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+    weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+    weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+    weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+__device__ float dmcn_get_coordinate_weight(float argmax_h, float argmax_w,
+                                            const int height, const int width, const float *im_data,
+                                            const int data_width, const int bp_dir)
+{
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
+  {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  float weight = 0;
+
+  if (bp_dir == 0)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+  else if (bp_dir == 1)
+  {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+      weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+      weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+      weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+__global__ void modulated_deformable_im2col_gpu_kernel(const int n,
+                                                       const float *data_im, const float *data_offset, const float *data_mask,
+                                                       const int height, const int width, const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int num_channels, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       float *data_col)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+
+    float *data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const float *data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const float *data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+    const float *data_mask_ptr = data_mask + (b_col * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    for (int i = 0; i < kernel_h; ++i)
+    {
+      for (int j = 0; j < kernel_w; ++j)
+      {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+        const float offset_h = data_offset_ptr[data_offset_h_ptr];
+        const float offset_w = data_offset_ptr[data_offset_w_ptr];
+        const float mask = data_mask_ptr[data_mask_hw_ptr];
+        float val = static_cast<float>(0);
+        const float h_im = h_in + i * dilation_h + offset_h;
+        const float w_im = w_in + j * dilation_w + offset_w;
+        //if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+        {
+          //const float map_h = i * dilation_h + offset_h;
+          //const float map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val * mask;
+        data_col_ptr += batch_size * height_col * width_col;
+        //data_col_ptr += height_col * width_col;
+      }
+    }
+  }
+}
+
+__global__ void modulated_deformable_col2im_gpu_kernel(const int n,
+                                                       const float *data_col, const float *data_offset, const float *data_mask,
+                                                       const int channels, const int height, const int width,
+                                                       const int kernel_h, const int kernel_w,
+                                                       const int pad_h, const int pad_w,
+                                                       const int stride_h, const int stride_w,
+                                                       const int dilation_h, const int dilation_w,
+                                                       const int channel_per_deformable_group,
+                                                       const int batch_size, const int deformable_group,
+                                                       const int height_col, const int width_col,
+                                                       float *grad_im)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const int data_mask_hw_ptr = ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+    const float offset_h = data_offset_ptr[data_offset_h_ptr];
+    const float offset_w = data_offset_ptr[data_offset_w_ptr];
+    const float mask = data_mask_ptr[data_mask_hw_ptr];
+    const float cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const float cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const float cur_top_grad = data_col[index] * mask;
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++)
+    {
+      for (int dx = -2; dx <= 2; dx++)
+      {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+            cur_w + dx >= 0 && cur_w + dx < width &&
+            abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+            abs(cur_inv_w_data - (cur_w + dx)) < 1)
+        {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          float weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(const int n,
+                                                             const float *data_col, const float *data_im,
+                                                             const float *data_offset, const float *data_mask,
+                                                             const int channels, const int height, const int width,
+                                                             const int kernel_h, const int kernel_w,
+                                                             const int pad_h, const int pad_w,
+                                                             const int stride_h, const int stride_w,
+                                                             const int dilation_h, const int dilation_w,
+                                                             const int channel_per_deformable_group,
+                                                             const int batch_size, const int offset_channels, const int deformable_group,
+                                                             const int height_col, const int width_col,
+                                                             float *grad_offset, float *grad_mask)
+{
+  CUDA_KERNEL_LOOP(index, n)
+  {
+    float val = 0, mval = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const float *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group * batch_size * width_col * height_col;
+    const float *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) * channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const float *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+    const float *data_mask_ptr = data_mask + (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step)
+    {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const int data_mask_hw_ptr = (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+      const float offset_h = data_offset_ptr[data_offset_h_ptr];
+      const float offset_w = data_offset_ptr[data_offset_w_ptr];
+      const float mask = data_mask_ptr[data_mask_hw_ptr];
+      float inv_h = h_in + i * dilation_h + offset_h;
+      float inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+      {
+        inv_h = inv_w = -2;
+      }
+      else
+      {
+        mval += data_col_ptr[col_pos] * dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width, height, width, inv_h, inv_w);
+      }
+      const float weight = dmcn_get_coordinate_weight(
+          inv_h, inv_w,
+          height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos] * mask;
+      cnt += 1;
+    }
+    // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+    grad_offset[index] = val;
+    if (offset_c % 2 == 0)
+      // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w], mask_req, mval);
+      grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * height_col + h) * width_col + w] = mval;
+  }
+}
+
+void modulated_deformable_im2col_cuda(cudaStream_t stream,
+  const float* data_im, const float* data_offset, const float* data_mask,
+  const int batch_size, const int channels, const int height_im, const int width_im, 
+  const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w, 
+  const int dilation_h, const int dilation_w,
+  const int deformable_group, float* data_col) {
+  // num_axes should be smaller than block size
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * batch_size * height_col * width_col;
+  modulated_deformable_im2col_gpu_kernel
+      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
+          0, stream>>>(
+      num_kernels, data_im, data_offset, data_mask, height_im, width_im, kernel_h, kenerl_w,
+      pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+      batch_size, channels, deformable_group, height_col, width_col, data_col);
+  
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_im2col_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
+
+void modulated_deformable_col2im_cuda(cudaStream_t stream,
+  const float* data_col, const float* data_offset, const float* data_mask,
+  const int batch_size, const int channels, const int height_im, const int width_im, 
+  const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w, 
+  const int dilation_h, const int dilation_w, 
+  const int deformable_group, float* grad_im){
+
+  const int channel_per_deformable_group = channels / deformable_group;
+  const int num_kernels = channels * kernel_h * kernel_w * batch_size * height_col * width_col;
+  modulated_deformable_col2im_gpu_kernel
+      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
+          0, stream>>>(
+        num_kernels, data_col, data_offset, data_mask, channels, height_im, width_im,
+        kernel_h, kernel_w, pad_h, pad_h, stride_h, stride_w,
+        dilation_h, dilation_w, channel_per_deformable_group,
+        batch_size, deformable_group, height_col, width_col, grad_im);
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_cuda: %s\n", cudaGetErrorString(err));
+  }
+
+}
+
+void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
+  const float* data_col, const float* data_im, const float* data_offset, const float* data_mask,
+  const int batch_size, const int channels, const int height_im, const int width_im, 
+  const int height_col, const int width_col, const int kernel_h, const int kernel_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w, 
+  const int dilation_h, const int dilation_w, 
+  const int deformable_group,
+  float* grad_offset, float* grad_mask) {
+  const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * kernel_w * deformable_group;
+  const int channel_per_deformable_group = channels * kernel_h * kernel_w / deformable_group;
+  modulated_deformable_col2im_coord_gpu_kernel
+      <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS,
+        0, stream>>>(
+        num_kernels, data_col, data_im, data_offset, data_mask, channels, height_im, width_im,
+        kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+        dilation_h, dilation_w, channel_per_deformable_group,
+        batch_size, 2 * kernel_h * kernel_w * deformable_group, deformable_group, height_col, width_col, 
+        grad_offset, grad_mask);
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess)
+  {
+    printf("error in modulated_deformable_col2im_coord_cuda: %s\n", cudaGetErrorString(err));
+  }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.h
new file mode 100644
index 0000000..3457e96
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/cuda/modulated_deform_im2col_cuda.h
@@ -0,0 +1,100 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer ********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.h
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1811.11168
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
+ */
+
+/***************** Adapted by Charles Shang *********************/
+
+#ifndef DCN_V2_IM2COL_CUDA
+#define DCN_V2_IM2COL_CUDA
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+  void modulated_deformable_im2col_cuda(cudaStream_t stream,
+                                        const float *data_im, const float *data_offset, const float *data_mask,
+                                        const int batch_size, const int channels, const int height_im, const int width_im,
+                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+                                        const int dilation_h, const int dilation_w,
+                                        const int deformable_group, float *data_col);
+
+  void modulated_deformable_col2im_cuda(cudaStream_t stream,
+                                        const float *data_col, const float *data_offset, const float *data_mask,
+                                        const int batch_size, const int channels, const int height_im, const int width_im,
+                                        const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+                                        const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+                                        const int dilation_h, const int dilation_w,
+                                        const int deformable_group, float *grad_im);
+
+  void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
+                                         const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
+                                         const int batch_size, const int channels, const int height_im, const int width_im,
+                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
+                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+                                         const int dilation_h, const int dilation_w,
+                                         const int deformable_group,
+                                         float *grad_offset, float *grad_mask);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.c b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.c
new file mode 100644
index 0000000..bb67aa4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.c
@@ -0,0 +1,19 @@
+#include <TH/TH.h>
+
+int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset,
+                        THFloatTensor *output)
+{
+  // if (!THFloatTensor_isSameSizeAs(input1, input2))
+    // return 0;
+  // THFloatTensor_resizeAs(output, input);
+  // THFloatTensor_cadd(output, input1, 1.0, input2);
+  return 1;
+}
+
+int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
+                         THFloatTensor *grad_offset)
+{
+  // THFloatTensor_resizeAs(grad_input, grad_output);
+  // THFloatTensor_fill(grad_input, 1);
+  return 1;
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.h
new file mode 100644
index 0000000..079177b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv.h
@@ -0,0 +1,4 @@
+int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset,
+                        THFloatTensor *output);
+int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
+                         THFloatTensor *grad_offset);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.c b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.c
new file mode 100644
index 0000000..0c34dcd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.c
@@ -0,0 +1,491 @@
+#include <THC/THC.h>
+
+#include "deform_conv_cuda_kernel.h"
+
+extern THCState *state;
+
+void shape_check(THCState *state, THCudaTensor *input, THCudaTensor *offset,
+                 THCudaTensor *gradOutput, THCudaTensor *weight, int kH, int kW,
+                 int dH, int dW, int padH, int padW, int dilationH,
+                 int dilationW, int deformable_group) {
+
+//  THArgCheck(weight->nDimension == 4, 5,
+//             "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+//             "but got: %s",
+//             weight->nDimension);
+  THArgCheck(THCudaTensor_nDimension(state, weight) == 4, 5,
+             "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
+             "but got: %s",
+             THCudaTensor_nDimension(state, weight));
+
+  THArgCheck(THCudaTensor_isContiguous(state, weight), 5,
+             "weight tensor has to be contiguous");
+
+  THArgCheck(kW > 0 && kH > 0, 9,
+             "kernel size should be greater than zero, but got kH: %d kW: %d",
+             kH, kW);
+
+//  THArgCheck((weight->size[2] == kH && weight->size[3] == kW), 9,
+//             "kernel size should be consistent with weight, ",
+//             "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
+//             kW, weight->size[2], weight->size[3]);
+  THArgCheck((THCudaTensor_size(state, weight, 2) == kH &&
+             THCudaTensor_size(state, weight, 3) == kW), 9,
+             "kernel size should be consistent with weight, ",
+             "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
+             kW, THCudaTensor_size(state, weight, 2), THCudaTensor_size(state, weight, 3));
+
+
+  THArgCheck(dW > 0 && dH > 0, 11,
+             "stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
+
+  THArgCheck(dilationW > 0 && dilationH > 0, 14,
+      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
+      dilationH, dilationW);
+
+//  int ndim = input->nDimension;
+  int ndim = THCudaTensor_nDimension(state, input);
+  int dimf = 0;
+  int dimh = 1;
+  int dimw = 2;
+
+  if (ndim == 4) {
+    dimf++;
+    dimh++;
+    dimw++;
+  }
+
+  THArgCheck(ndim == 3 || ndim == 4, 2,
+             "3D or 4D input tensor expected but got: %s", ndim);
+
+//  long nInputPlane = weight->size[1];
+//  long inputHeight = input->size[dimh];
+//  long inputWidth = input->size[dimw];
+//  long nOutputPlane = weight->size[0];
+  long nInputPlane = THCudaTensor_size(state, weight, 1);
+  long inputHeight = THCudaTensor_size(state, input, dimh);
+  long inputWidth = THCudaTensor_size(state, input, dimw);
+  long nOutputPlane = THCudaTensor_size(state, weight, 0);
+  long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+  long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+  THArgCheck(nInputPlane % deformable_group == 0, 2,
+             "input channels must divide deformable group size");
+
+  if (outputWidth < 1 || outputHeight < 1)
+    THError(
+        "Given input size: (%ld x %ld x %ld). "
+        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
+        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
+        outputWidth);
+
+  THArgCheck(THCudaTensor_size(state, input, 1) == nInputPlane, 2,
+             "invalid number of input planes, expected: %d, but got: %d",
+             nInputPlane, THCudaTensor_size(state, input, 1));
+
+  THArgCheck((inputHeight >= kH && inputWidth >= kW), 2,
+             "input image is smaller than kernel");
+
+//  THArgCheck(
+//      (offset->size[2] == outputHeight && offset->size[3] == outputWidth), 3,
+//      "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", outputHeight, outputWidth,
+//      offset->size[2], offset->size[3]);
+  THArgCheck(
+      (THCudaTensor_size(state, offset, 2) == outputHeight &&
+      THCudaTensor_size(state, offset, 3) == outputWidth), 3,
+      "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d",
+      outputHeight, outputWidth, THCudaTensor_size(state, offset, 2),
+      THCudaTensor_size(state, offset, 3));
+
+  THArgCheck((THCudaTensor_size(state, offset, 1) == deformable_group * 2 * kH * kW), 3,
+             "invalid number of channels of offset");
+
+  if (gradOutput != NULL) {
+    THArgCheck(THCudaTensor_size(state, gradOutput, dimf) == nOutputPlane, 4,
+               "invalid number of gradOutput planes, expected: %d, but got: %d",
+               nOutputPlane, THCudaTensor_size(state, gradOutput, dimf));
+
+    THArgCheck((THCudaTensor_size(state, gradOutput, dimh) == outputHeight &&
+                THCudaTensor_size(state, gradOutput, dimw) == outputWidth),
+               4, "invalid size of gradOutput, expected height: %d width: %d , but got height: %d width: %d",
+               outputHeight, outputWidth, THCudaTensor_size(state, gradOutput, dimh),
+               THCudaTensor_size(state, gradOutput, dimw));
+  }
+}
+
+int deform_conv_forward_cuda(THCudaTensor *input, THCudaTensor *weight,
+                             THCudaTensor *offset, THCudaTensor *output,
+                             THCudaTensor *columns, THCudaTensor *ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH,
+                             int deformable_group, int im2col_step) {
+
+  // todo: resize columns to include im2col: done
+  // todo: add im2col_step as input
+  // todo: add new output buffer and transpose it to output (or directly transpose output)
+  // todo: possibly change data indexing because of parallel_imgs
+
+  THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, weight, offset,
+                                         output, columns, ones));
+
+  shape_check(state, input, offset, NULL, weight, kH, kW, dH, dW, padH, padW,
+              dilationH, dilationW, deformable_group);
+
+  input = THCudaTensor_newContiguous(state, input);
+  offset = THCudaTensor_newContiguous(state, offset);
+  weight = THCudaTensor_newContiguous(state, weight);
+
+  int batch = 1;
+  if (THCudaTensor_nDimension(state, input) == 3) {
+    // Force batch
+    batch = 0;
+    THCudaTensor_resize4d(state, input, 1, THCudaTensor_size(state, input, 0), THCudaTensor_size(state, input, 1),
+                          THCudaTensor_size(state, input, 2));
+    THCudaTensor_resize4d(state, offset, 1, THCudaTensor_size(state, offset, 0), THCudaTensor_size(state, offset, 1),
+                          THCudaTensor_size(state, offset, 2));
+  }
+
+  // todo: assert batchsize dividable by im2col_step
+
+  long batchSize = THCudaTensor_size(state, input, 0);
+  long nInputPlane = THCudaTensor_size(state, input, 1);
+  long inputHeight = THCudaTensor_size(state, input, 2);
+  long inputWidth = THCudaTensor_size(state, input, 3);
+
+  long nOutputPlane = THCudaTensor_size(state, weight, 0);
+
+  long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  THArgCheck((THCudaTensor_size(state, offset, 0) == batchSize), 3, "invalid batch size of offset");
+
+  // bias = bias ? THCudaTensor_newContiguous(state, bias) : bias;
+
+  THCudaTensor_resize5d(state, output, batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth);
+  THCudaTensor_resize2d(state, columns, nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth);
+
+  if (THCudaTensor_nDimension(state, ones) != 2 || THCudaTensor_size(state, ones, 0) *
+      THCudaTensor_size(state, ones, 1) < outputHeight * outputWidth) {
+    THCudaTensor_resize2d(state, ones, outputHeight, outputWidth);
+    THCudaTensor_fill(state, ones, 1);
+  }
+
+  THCudaTensor *input_n = THCudaTensor_new(state);
+  THCudaTensor *offset_n = THCudaTensor_new(state);
+  THCudaTensor *output_n = THCudaTensor_new(state);
+
+  THCudaTensor *output_buffer = THCudaTensor_new(state);
+  THCudaTensor_resize4d(state, output_buffer, batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth);
+
+  THCudaTensor_resize5d(state, input, batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize5d(state, offset, batchSize / im2col_step, im2col_step,
+      deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+
+    THCudaTensor_select(state, input_n, input, 0, elt);
+    THCudaTensor_select(state, offset_n, offset, 0, elt);
+    THCudaTensor_select(state, output_n, output_buffer, 0, elt);
+
+    // long m_ = nOutputPlane;
+    // long n_ = outputHeight * outputWidth;
+    // long k_ = 1;
+
+    // TODO(BZ) add bias term
+    // if (bias) {
+    //   THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
+    //                    THCudaTensor_data(state, ones), k_,
+    //                    THCudaTensor_data(state, bias), k_, 0.0f,
+    //                    THCudaTensor_data(state, output_n), n_);
+    // } else {
+    //   THCudaTensor_zero(state, output_n);
+    // }
+
+    THCudaTensor_zero(state, output_n);
+
+    deformable_im2col(
+        THCState_getCurrentStream(state), THCudaTensor_data(state, input_n),
+        THCudaTensor_data(state, offset_n), nInputPlane, inputHeight,
+        inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW,
+        im2col_step, deformable_group, THCudaTensor_data(state, columns));
+
+    long m = nOutputPlane;
+    long n = THCudaTensor_size(state, columns, 1); // todo: see if we need to change this
+    long k = nInputPlane * kH * kW;
+
+    // cublas use column major indexing
+    THCudaBlas_Sgemm(state, 'n', 'n', n, m, k, 1.0f,
+                     THCudaTensor_data(state, columns), n,
+                     THCudaTensor_data(state, weight), k, 1.0f,
+                     THCudaTensor_data(state, output_n), n);
+  }
+
+  // the reason I use seemingly redundant output_buffer is that THCudaTensor API handles successive transpose and resize poorly
+  THCudaTensor_resize5d(state, output_buffer, batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth);
+  THCudaTensor_transpose(state, output_buffer, NULL, 1, 2);
+  THCudaTensor_copy(state, output, output_buffer);
+  THCudaTensor_resize4d(state, output, batchSize, nOutputPlane, outputHeight, outputWidth);
+
+  THCudaTensor_resize4d(state, input, batchSize, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize4d(state, offset, batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+  THCudaTensor_free(state, input_n);
+  THCudaTensor_free(state, offset_n);
+  THCudaTensor_free(state, output_n);
+  THCudaTensor_free(state, output_buffer);
+
+  if (batch == 0) {
+    THCudaTensor_resize3d(state, output, nOutputPlane, outputHeight, outputWidth);
+    THCudaTensor_resize3d(state, input, nInputPlane, inputHeight, inputWidth);
+    THCudaTensor_resize3d(state, offset, THCudaTensor_size(state, offset, 1),
+        THCudaTensor_size(state, offset, 2), THCudaTensor_size(state, offset, 3));
+  }
+
+  THCudaTensor_free(state, input);
+  THCudaTensor_free(state, offset);
+  THCudaTensor_free(state, weight);
+  // if (bias) THCudaTensor_free(state, bias);
+
+  return 1;
+}
+
+int deform_conv_backward_input_cuda(
+    THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
+    THCudaTensor *gradInput, THCudaTensor *gradOffset, THCudaTensor *weight,
+    THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH,
+    int dilationW, int dilationH, int deformable_group, int im2col_step) {
+
+  THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, gradOutput, weight,
+                                         offset, columns, gradInput));
+
+  shape_check(state, input, offset, gradOutput, weight, kH, kW, dH, dW, padH,
+              padW, dilationH, dilationW, deformable_group);
+
+  input = THCudaTensor_newContiguous(state, input);
+  offset = THCudaTensor_newContiguous(state, offset);
+  gradOutput = THCudaTensor_newContiguous(state, gradOutput);
+  weight = THCudaTensor_newContiguous(state, weight);
+
+  int batch = 1;
+
+  if (THCudaTensor_nDimension(state, input) == 3) {
+    // Force batch
+    batch = 0;
+    THCudaTensor_resize4d(state, input, 1, THCudaTensor_size(state, input, 0), THCudaTensor_size(state, input, 1),
+                          THCudaTensor_size(state, input, 2));
+    THCudaTensor_resize4d(state, offset, 1, THCudaTensor_size(state, offset, 0), THCudaTensor_size(state, offset, 1),
+                          THCudaTensor_size(state, offset, 2));
+    THCudaTensor_resize4d(state, gradOutput, 1, THCudaTensor_size(state, gradOutput, 0),
+                          THCudaTensor_size(state, gradOutput, 1), THCudaTensor_size(state, gradOutput, 2));
+  }
+
+  long batchSize = THCudaTensor_size(state, input, 0);
+  long nInputPlane = THCudaTensor_size(state, input, 1);
+  long inputHeight = THCudaTensor_size(state, input, 2);
+  long inputWidth = THCudaTensor_size(state, input, 3);
+
+  long nOutputPlane = THCudaTensor_size(state, weight, 0);
+
+  long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  THArgCheck((THCudaTensor_size(state, offset, 0) == batchSize), 3, "invalid batch size of offset");
+  THCudaTensor_resize4d(state, gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize2d(state, columns, nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth);
+
+
+  THCudaTensor *gradInput_n = THCudaTensor_new(state);
+  THCudaTensor *gradOffset_n = THCudaTensor_new(state);
+  THCudaTensor *input_n = THCudaTensor_new(state);
+  THCudaTensor *offset_n = THCudaTensor_new(state);
+  THCudaTensor *gradOutput_n = THCudaTensor_new(state);
+
+  // change order of grad output
+  THCudaTensor_resize5d(state, gradOutput, batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth);
+  THCudaTensor_transpose(state, gradOutput, NULL, 1, 2);
+
+  THCudaTensor *gradOutputBuffer = THCudaTensor_new(state);
+  THCudaTensor_resize5d(state, gradOutputBuffer, batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth);
+  THCudaTensor_copy(state, gradOutputBuffer, gradOutput);
+  THCudaTensor_resize4d(state, gradOutputBuffer, batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth);
+
+  THCudaTensor_transpose(state, gradOutput, NULL, 1, 2);
+  THCudaTensor_resize4d(state, gradOutput, batchSize, nOutputPlane, outputHeight, outputWidth);
+
+  THCudaTensor_resize5d(state, gradInput, batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize5d(state, input, batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize5d(state, gradOffset, batchSize / im2col_step, im2col_step,
+      deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+  THCudaTensor_resize5d(state, offset, batchSize / im2col_step, im2col_step,
+      deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    THCudaTensor_select(state, gradInput_n, gradInput, 0, elt);
+    THCudaTensor_select(state, gradOffset_n, gradOffset, 0, elt);
+    THCudaTensor_select(state, input_n, input, 0, elt);
+    THCudaTensor_select(state, offset_n, offset, 0, elt);
+    THCudaTensor_select(state, gradOutput_n, gradOutputBuffer, 0, elt);
+
+    long m = nInputPlane * kW * kH;
+    long n = THCudaTensor_size(state, columns, 1);
+    long k = nOutputPlane;
+
+    THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,
+                     THCudaTensor_data(state, gradOutput_n), n,
+                     THCudaTensor_data(state, weight), m, 0.0f,
+                     THCudaTensor_data(state, columns), n);
+
+
+    deformable_col2im_coord(
+        THCState_getCurrentStream(state), THCudaTensor_data(state, columns),
+        THCudaTensor_data(state, input_n), THCudaTensor_data(state, offset_n),
+        nInputPlane, inputHeight, inputWidth, kH, kW, padH, padW, dH, dW,
+        dilationH, dilationW, im2col_step, deformable_group,
+        THCudaTensor_data(state, gradOffset_n));
+
+    deformable_col2im(
+        THCState_getCurrentStream(state), THCudaTensor_data(state, columns),
+        THCudaTensor_data(state, offset_n), nInputPlane, inputHeight,
+        inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW, im2col_step,
+        deformable_group, THCudaTensor_data(state, gradInput_n));
+  }
+
+  THCudaTensor_resize4d(state, gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize4d(state, input, batchSize, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize4d(state, gradOffset, batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+  THCudaTensor_resize4d(state, offset, batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+  THCudaTensor_free(state, gradInput_n);
+  THCudaTensor_free(state, gradOffset_n);
+  THCudaTensor_free(state, input_n);
+  THCudaTensor_free(state, offset_n);
+  THCudaTensor_free(state, gradOutput_n);
+  THCudaTensor_free(state, gradOutputBuffer);
+
+  if (batch == 0) {
+    THCudaTensor_resize3d(state, gradOutput, nOutputPlane, outputHeight,
+                          outputWidth);
+    THCudaTensor_resize3d(state, input, nInputPlane, inputHeight, inputWidth);
+    THCudaTensor_resize3d(state, gradInput, nInputPlane, inputHeight,
+                          inputWidth);
+    THCudaTensor_resize3d(state, offset, THCudaTensor_size(state, offset, 1), THCudaTensor_size(state, offset, 2),
+                          THCudaTensor_size(state, offset, 3));
+    THCudaTensor_resize3d(state, gradOffset, THCudaTensor_size(state, offset, 1), THCudaTensor_size(state, offset, 2),
+                          THCudaTensor_size(state, offset, 3));
+  }
+
+  THCudaTensor_free(state, input);
+  THCudaTensor_free(state, offset);
+  THCudaTensor_free(state, gradOutput);
+  THCudaTensor_free(state, weight);
+
+  return 1;
+}
+
+int deform_conv_backward_parameters_cuda(
+    THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
+    THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */
+    THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int deformable_group,
+    float scale, int im2col_step) {
+
+  // todo: transpose and reshape outGrad
+  // todo: reshape columns
+  // todo: add im2col_step as input
+  THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, offset, gradOutput,
+                                         gradWeight, columns));
+
+  shape_check(state, input, offset, gradOutput, gradWeight, kH, kW, dH, dW,
+             padH, padW, dilationH, dilationW, deformable_group);
+
+  input = THCudaTensor_newContiguous(state, input);
+  offset = THCudaTensor_newContiguous(state, offset);
+  gradOutput = THCudaTensor_newContiguous(state, gradOutput);
+
+  int batch = 1;
+
+  if (THCudaTensor_nDimension(state, input) == 3) {
+    // Force batch
+    batch = 0;
+    THCudaTensor_resize4d(state, input, 1, THCudaTensor_size(state, input, 0), THCudaTensor_size(state, input, 1),
+                          THCudaTensor_size(state, input, 2));
+    THCudaTensor_resize4d(state, gradOutput, 1, THCudaTensor_size(state, gradOutput, 0),
+                          THCudaTensor_size(state, gradOutput, 1), THCudaTensor_size(state, gradOutput, 2));
+  }
+
+  long batchSize = THCudaTensor_size(state, input, 0);
+  long nInputPlane = THCudaTensor_size(state, input, 1);
+  long inputHeight = THCudaTensor_size(state, input, 2);
+  long inputWidth = THCudaTensor_size(state, input, 3);
+
+  long nOutputPlane = THCudaTensor_size(state, gradWeight, 0);
+
+  long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+  long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+  THArgCheck((THCudaTensor_size(state, offset, 0) == batchSize), 3, "invalid batch size of offset");
+
+  THCudaTensor_resize2d(state, columns, nInputPlane * kW * kH,
+                        im2col_step * outputHeight * outputWidth);
+
+  THCudaTensor *input_n = THCudaTensor_new(state);
+  THCudaTensor *offset_n = THCudaTensor_new(state);
+  THCudaTensor *gradOutput_n = THCudaTensor_new(state);
+
+  THCudaTensor_resize5d(state, gradOutput, batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth);
+  THCudaTensor_transpose(state, gradOutput, NULL, 1, 2);
+
+  THCudaTensor *gradOutputBuffer = THCudaTensor_new(state);
+  THCudaTensor_resize5d(state, gradOutputBuffer, batchSize / im2col_step, nOutputPlane, im2col_step, outputHeight, outputWidth);
+  THCudaTensor_copy(state, gradOutputBuffer, gradOutput);
+  THCudaTensor_resize4d(state, gradOutputBuffer, batchSize / im2col_step, nOutputPlane, im2col_step * outputHeight, outputWidth);
+
+  THCudaTensor_transpose(state, gradOutput, NULL, 1, 2);
+  THCudaTensor_resize4d(state, gradOutput, batchSize, nOutputPlane, outputHeight, outputWidth);
+
+
+  THCudaTensor_resize5d(state, input, batchSize / im2col_step, im2col_step, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize5d(state, offset, batchSize / im2col_step, im2col_step,
+      deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
+    THCudaTensor_select(state, input_n, input, 0, elt);
+    THCudaTensor_select(state, offset_n, offset, 0, elt);
+    THCudaTensor_select(state, gradOutput_n, gradOutputBuffer, 0, elt);
+
+    deformable_im2col(
+        THCState_getCurrentStream(state), THCudaTensor_data(state, input_n),
+        THCudaTensor_data(state, offset_n), nInputPlane, inputHeight,
+        inputWidth, kH, kW, padH, padW, dH, dW, dilationH, dilationW,
+        im2col_step, deformable_group, THCudaTensor_data(state, columns));
+
+    long m = nOutputPlane;
+    long n = nInputPlane * kW * kH;
+    long k = THCudaTensor_size(state, columns, 1);
+
+    THCudaBlas_Sgemm(state, 't', 'n', n, m, k, scale,
+                     THCudaTensor_data(state, columns), k,
+                     THCudaTensor_data(state, gradOutput_n), k, 1.0f,
+                     THCudaTensor_data(state, gradWeight), n);
+  }
+
+  THCudaTensor_free(state, input_n);
+  THCudaTensor_free(state, offset_n);
+  THCudaTensor_free(state, gradOutput_n);
+  THCudaTensor_free(state, gradOutputBuffer);
+
+  THCudaTensor_resize4d(state, input, batchSize, nInputPlane, inputHeight, inputWidth);
+  THCudaTensor_resize4d(state, offset, batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth);
+
+  if (batch == 0) {
+    THCudaTensor_resize3d(state, gradOutput, nOutputPlane, outputHeight,
+                          outputWidth);
+    THCudaTensor_resize3d(state, input, nInputPlane, inputHeight, inputWidth);
+  }
+
+  THCudaTensor_free(state, input);
+  THCudaTensor_free(state, offset);
+  THCudaTensor_free(state, gradOutput);
+  return 1;
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.h
new file mode 100644
index 0000000..b9bc076
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda.h
@@ -0,0 +1,20 @@
+int deform_conv_forward_cuda(THCudaTensor *input,
+                             THCudaTensor *weight, /*THCudaTensor * bias, */
+                             THCudaTensor *offset, THCudaTensor *output,
+                             THCudaTensor *columns, THCudaTensor *ones, int kW,
+                             int kH, int dW, int dH, int padW, int padH,
+                             int dilationW, int dilationH,
+                             int deformable_group, int im2col_step);
+
+int deform_conv_backward_input_cuda(
+    THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
+    THCudaTensor *gradInput, THCudaTensor *gradOffset, THCudaTensor *weight,
+    THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH,
+    int dilationW, int dilationH, int deformable_group, int im2col_step);
+
+int deform_conv_backward_parameters_cuda(
+    THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
+    THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */
+    THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH,
+    int padW, int padH, int dilationW, int dilationH, int deformable_group,
+    float scale, int im2col_step);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.cu
new file mode 100644
index 0000000..07fe8b3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.cu
@@ -0,0 +1,568 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer ********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+#include "deform_conv_cuda_kernel.h"
+#include <cstdio>
+#include <algorithm>
+
+#define CUDA_KERNEL_LOOP(i, n)                                                 \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);                 \
+       i += blockDim.x * gridDim.x)
+
+const int CUDA_NUM_THREADS = 1024;
+const int kMaxGridNum = 65535;
+inline int GET_BLOCKS(const int N) {
+  return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS);
+}
+
+
+template <typename DType>
+__device__ DType deformable_im2col_bilinear(const DType *bottom_data, const int data_width,
+  const int height, const int width, DType h, DType w) {
+
+  int h_low = floor(h);
+  int w_low = floor(w);
+  int h_high = h_low + 1;
+  int w_high = w_low + 1;
+
+  DType lh = h - h_low;
+  DType lw = w - w_low;
+  DType hh = 1 - lh, hw = 1 - lw;
+
+  DType v1 = 0;
+  if (h_low >= 0 && w_low >= 0)
+    v1 = bottom_data[h_low * data_width + w_low];
+  DType v2 = 0;
+  if (h_low >=0 && w_high <= width - 1)
+    v2 = bottom_data[h_low * data_width + w_high];
+  DType v3 = 0;
+  if (h_high <= height - 1 && w_low >= 0)
+    v3 = bottom_data[h_high * data_width + w_low];
+  DType v4 = 0;
+  if (h_high <= height - 1 && w_high <= width - 1)
+    v4 = bottom_data[h_high * data_width + w_high];
+
+  DType w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+  DType val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+
+
+template <typename DType>
+__device__ DType get_gradient_weight(DType argmax_h, DType argmax_w,
+  const int h, const int w, const int height, const int width) {
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  DType weight = 0;
+  if (h == argmax_h_low && w == argmax_w_low)
+      weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+  if (h == argmax_h_low && w == argmax_w_high)
+      weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+  if (h == argmax_h_high && w == argmax_w_low)
+      weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+  if (h == argmax_h_high && w == argmax_w_high)
+      weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+  return weight;
+}
+
+
+template <typename DType>
+__device__ DType get_coordinate_weight(DType argmax_h, DType argmax_w,
+  const int height, const int width, const DType *im_data,
+  const int data_width, const int bp_dir) {
+
+  if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) {
+    //empty
+    return 0;
+  }
+
+  int argmax_h_low = floor(argmax_h);
+  int argmax_w_low = floor(argmax_w);
+  int argmax_h_high = argmax_h_low + 1;
+  int argmax_w_high = argmax_w_low + 1;
+
+  DType weight = 0;
+
+  if (bp_dir == 0) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+        weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+        weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+        weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+        weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  } else if (bp_dir == 1) {
+    if (argmax_h_low >= 0 && argmax_w_low >= 0)
+        weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
+    if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+        weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
+    if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+        weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
+    if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+        weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
+  }
+
+  return weight;
+}
+
+
+/*!
+ * \brief deformable_im2col gpu kernel.
+ * DO NOT call this directly. Use wrapper function im2col() instead;
+ */
+template <typename DType>
+__global__ void deformable_im2col_gpu_kernel(const int n, const DType *data_im, const DType *data_offset,
+  const int height, const int width, const int kernel_h, const int kernel_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w, const int channel_per_deformable_group,
+  const int batch_size, const int num_channels, const int deformable_group,
+  const int height_col, const int width_col,
+  DType *data_col) {
+  CUDA_KERNEL_LOOP(index, n) {
+    // index index of output matrix
+    const int w_col = index % width_col;
+    const int h_col = (index / width_col) % height_col;
+    const int b_col = (index / width_col / height_col) % batch_size;
+    const int c_im = (index / width_col / height_col) / batch_size;
+    const int c_col = c_im * kernel_h * kernel_w;
+
+    // compute deformable group index
+    const int deformable_group_index = c_im / channel_per_deformable_group;
+
+    const int h_in = h_col * stride_h - pad_h;
+    const int w_in = w_col * stride_w - pad_w;
+    DType* data_col_ptr = data_col + ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+    //const DType* data_im_ptr = data_im + ((b_col * num_channels + c_im) * height + h_in) * width + w_in;
+    const DType* data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width;
+    const DType* data_offset_ptr = data_offset + (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * kernel_w * height_col * width_col;
+
+
+    for (int i = 0; i < kernel_h; ++i) {
+      for (int j = 0; j < kernel_w; ++j) {
+        const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+        const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + w_col;
+        const DType offset_h = data_offset_ptr[data_offset_h_ptr];
+        const DType offset_w = data_offset_ptr[data_offset_w_ptr];
+        DType val = static_cast<DType>(0);
+        const DType h_im = h_in + i * dilation_h + offset_h;
+        const DType w_im = w_in + j * dilation_w + offset_w;
+        if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
+          //const DType map_h = i * dilation_h + offset_h;
+          //const DType map_w = j * dilation_w + offset_w;
+          //const int cur_height = height - h_in;
+          //const int cur_width = width - w_in;
+          //val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, cur_width, map_h, map_w);
+          val = deformable_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
+        }
+        *data_col_ptr = val;
+        data_col_ptr += batch_size * height_col * width_col;
+      }
+    }
+  }
+}
+
+
+
+
+
+
+/*!\brief
+ * cpu function of deformable_im2col algorithm
+ * \param s device stream
+ * \param data_im pointer of images (N, C, H, W, ...) in the image batch
+ * \param data_offset pointer of offsets (N, deformable_group*kernel_h*kernel_w*2, H, W, ...) in the offset batch
+ * \param im_shape input image shape in dimensions (N, C, H, W,)
+ * \param col_shape column buffer shape (#channels, N, output_im_height, output_im_width, ...)
+ * \param kernel_shape kernel filter shape
+ * \param pad pad shape
+ * \param stride stride shape
+ * \param dilation dilation shape
+ * \param deformable_group #offset group that deformable convolution use
+ * \param data_col column buffer pointer
+ */
+template <typename DType>
+inline void deformable_im2col(cudaStream_t stream,
+  const DType *data_im, const DType *data_offset, const int channels,
+  const int height, const int width, const int ksize_h, const int ksize_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w, const int parallel_imgs,
+  const int deformable_group, DType *data_col) {
+  // num_axes should be smaller than block size
+  // todo: check parallel_imgs is correctly passed in
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  //index_t num_spatial_axes = kernel_shape.ndim();
+  //CHECK_LT(num_spatial_axes, mshadow::cuda::kBaseThreadNum);
+  //index_t channel_per_deformable_group = im_shape[1] / deformable_group;
+  //index_t num_kernels = im_shape[1] * col_shape.ProdShape(1, col_shape.ndim());
+  //using namespace mxnet_op;
+  //switch (num_spatial_axes) {
+  //case 2:
+  //  deformable_im2col_gpu_kernel<DType> // NOLINT_NEXT_LINE(whitespace/operators)
+  //      <<<cuda_get_num_blocks(num_kernels), mshadow::cuda::kBaseThreadNum,
+  //         0, mshadow::Stream<gpu>::GetStream(s)>>>(
+  //      num_kernels, data_im, data_offset, im_shape[2], im_shape[3], kernel_shape[0], kernel_shape[1],
+  //      pad[0], pad[1], stride[0], stride[1], dilation[0], dilation[1], channel_per_deformable_group,
+  //      col_shape[1], im_shape[1], deformable_group, col_shape[2], col_shape[3], data_col);
+  //  MSHADOW_CUDA_POST_KERNEL_CHECK(deformable_im2col_gpu_kernel);
+  //  break;
+  //default:
+  //  LOG(FATAL) << "im2col_nd_gpu does not support computation with "
+  //             << num_spatial_axes << " spatial axes";
+
+  deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
+    num_kernels, data_im, data_offset, height, width, ksize_h, ksize_w,
+    pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, channel_per_deformable_group,
+    parallel_imgs, channels, deformable_group, height_col, width_col, data_col);
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf("error in deformable_im2col: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template void deformable_im2col<float>(
+    cudaStream_t stream, const float *data_im, const float *data_offset,
+    const int channels, const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group, float *data_col);
+
+/*!
+* \brief deformable_col2im gpu kernel.
+* \brief DO NOT call this directly. Use wrapper function deformable_col2im() instead;
+*/
+template <typename DType>
+__global__ void deformable_col2im_gpu_kernel(const int n, const DType *data_col, const DType *data_offset,
+  const int channels, const int height, const int width,
+  const int kernel_h, const int kernel_w,
+  const int pad_h, const int pad_w,
+  const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w,
+  const int channel_per_deformable_group,
+  const int batch_size, const int deformable_group,
+  const int height_col, const int width_col,
+  DType *grad_im) {
+  CUDA_KERNEL_LOOP(index, n) {
+    const int j = (index / width_col / height_col / batch_size) % kernel_w;
+    const int i = (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+    const int c = index / width_col / height_col / batch_size / kernel_w / kernel_h;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / channel_per_deformable_group;
+
+    int w_out = index % width_col;
+    int h_out = (index / width_col) % height_col;
+    int b = (index / width_col / height_col) % batch_size;
+    int w_in = w_out * stride_w - pad_w;
+    int h_in = h_out * stride_h - pad_h;
+
+    const DType* data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) *
+        2 * kernel_h * kernel_w * height_col * width_col;
+    const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+    const int data_offset_w_ptr = ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+    const DType offset_h = data_offset_ptr[data_offset_h_ptr];
+    const DType offset_w = data_offset_ptr[data_offset_w_ptr];
+    const DType cur_inv_h_data = h_in + i * dilation_h + offset_h;
+    const DType cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+    const DType cur_top_grad = data_col[index];
+    const int cur_h = (int)cur_inv_h_data;
+    const int cur_w = (int)cur_inv_w_data;
+    for (int dy = -2; dy <= 2; dy++) {
+      for (int dx = -2; dx <= 2; dx++) {
+        if (cur_h + dy >= 0 && cur_h + dy < height &&
+          cur_w + dx >= 0 && cur_w + dx < width &&
+          abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+          abs(cur_inv_w_data - (cur_w + dx)) < 1
+          ) {
+          int cur_bottom_grad_pos = ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+          DType weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
+          atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+        }
+      }
+    }
+  }
+}
+
+
+/*!\brief
+ * gpu function of deformable_col2im algorithm
+ * \param s device stream
+ * \param data_col start pointer of the column buffer to be filled
+ * \param data_offset pointer of offsets (N, deformable_group*kernel_h*kernel_w*2, H, W, ...) in the offset batch
+ * \param im_shape input image shape in dimensions (N, C, H, W,)
+ * \param col_shape column buffer shape
+ * \param kernel_shape kernel filter shape
+ * \param pad pad shape
+ * \param stride stride shape
+ * \param dilation dilation shape
+ * \param deformable_group #offset group that deformable convolution use
+ * \param grad_im pointer of images (N, C, H, W,...) in the image batch
+ */
+template <typename DType>
+inline void deformable_col2im(cudaStream_t stream,
+  const DType *data_col, const DType *data_offset, const int channels,
+  const int height, const int width, const int ksize_h,
+  const int ksize_w, const int pad_h, const int pad_w,
+  const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w,
+  const int parallel_imgs, const int deformable_group,
+  DType* grad_im) {
+
+
+
+  // todo: make sure parallel_imgs is passed in correctly
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs;
+  int channel_per_deformable_group = channels / deformable_group;
+
+  // index_t num_spatial_axes = kernel_shape.ndim();
+  // index_t im_size = im_shape.ProdShape(1, im_shape.ndim());
+  // index_t channel_per_deformable_group = im_shape[1] / deformable_group;
+  // index_t num_kernels = col_shape.ProdShape(0, col_shape.ndim());
+  // num_axes should be smaller than block size
+  // CHECK_LT(num_spatial_axes, mshadow::cuda::kBaseThreadNum);
+  // using namespace mxnet_op;
+  // switch (num_spatial_axes) {
+  // case 2:
+  //   // To avoid involving atomic operations, we will launch one kernel per
+  //   // bottom dimension, and then in the kernel add up the top dimensions.
+  //   // NOLINT_NEXT_LINE(whitespace/operators)
+  //   deformable_col2im_gpu_kernel<DType><<<cuda_get_num_blocks(num_kernels), mshadow::cuda::kBaseThreadNum,
+  //                              0, mshadow::Stream<gpu>::GetStream(s)>>>(
+  //       num_kernels, data_col, data_offset, im_shape[1], im_shape[2], im_shape[3],
+  //       kernel_shape[0], kernel_shape[1], pad[0], pad[1], stride[0], stride[1],
+  //       dilation[0], dilation[1], channel_per_deformable_group,
+  //       col_shape[1], deformable_group, col_shape[2], col_shape[3], grad_im, req);
+  //   MSHADOW_CUDA_POST_KERNEL_CHECK(deformable_col2im_gpu_kernel);
+  //   break;
+  // default:
+  //   LOG(FATAL) << "col2im_nd_gpu does not support computation with "
+  //              << num_spatial_axes << " spatial axes";
+
+  deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
+    num_kernels, data_col, data_offset, channels, height, width, ksize_h,
+    ksize_w, pad_h, pad_w, stride_h, stride_w,
+    dilation_h, dilation_w, channel_per_deformable_group,
+    parallel_imgs, deformable_group, height_col, width_col, grad_im);
+
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    printf("error in deformable_col2im: %s\n", cudaGetErrorString(err));
+  }
+}
+
+template void deformable_col2im<float>(
+    cudaStream_t stream, const float *data_col, const float *data_offset,
+    const int channels, const int height, const int width, const int ksize_h,
+    const int ksize_w, const int pad_h, const int pad_w, const int stride_h,
+    const int stride_w, const int dilation_h, const int dilation_w,
+    const int parallel_imgs, const int deformable_group, float *grad_im);
+
+/*!
+ * \brief deformable_col2im_coord gpu kernel.
+ * \brief DO NOT call this directly. Use wrapper function deformable_col2im_coord() instead;
+ */
+template <typename DType>
+__global__ void deformable_col2im_coord_gpu_kernel(const int n, const DType *data_col,
+  const DType *data_im, const DType *data_offset,
+  const int channels, const int height, const int width,
+  const int kernel_h, const int kernel_w,
+  const int pad_h, const int pad_w,
+  const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w,
+  const int channel_per_deformable_group,
+  const int batch_size, const int offset_channels, const int deformable_group,
+  const int height_col, const int width_col, DType *grad_offset) {
+  CUDA_KERNEL_LOOP(index, n) {
+    DType val = 0;
+    int w = index % width_col;
+    int h = (index / width_col) % height_col;
+    int c = (index / width_col / height_col) % offset_channels;
+    int b = (index / width_col / height_col) / offset_channels;
+    // compute the start and end of the output
+
+    const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+    const int col_step = kernel_h * kernel_w;
+    int cnt = 0;
+    const DType *data_col_ptr = data_col + deformable_group_index * channel_per_deformable_group *
+        batch_size * width_col * height_col;
+    const DType *data_im_ptr = data_im + (b * deformable_group + deformable_group_index) *
+        channel_per_deformable_group / kernel_h / kernel_w * height * width;
+    const DType *data_offset_ptr = data_offset + (b * deformable_group + deformable_group_index) * 2 *
+        kernel_h * kernel_w * height_col * width_col;
+
+    const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+    for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; col_c += col_step) {
+      const int col_pos = (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+      const int bp_dir = offset_c % 2;
+
+      int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+      int i = (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+      int w_out = col_pos % width_col;
+      int h_out = (col_pos / width_col) % height_col;
+      int w_in = w_out * stride_w - pad_w;
+      int h_in = h_out * stride_h - pad_h;
+      const int data_offset_h_ptr = (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+      const int data_offset_w_ptr = (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out);
+      const DType offset_h = data_offset_ptr[data_offset_h_ptr];
+      const DType offset_w = data_offset_ptr[data_offset_w_ptr];
+      DType inv_h = h_in + i * dilation_h + offset_h;
+      DType inv_w = w_in + j * dilation_w + offset_w;
+      if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) {
+        inv_h = inv_w = -2;
+      }
+      const DType weight = get_coordinate_weight(
+        inv_h, inv_w,
+        height, width, data_im_ptr + cnt * height * width, width, bp_dir);
+      val += weight * data_col_ptr[col_pos];
+      cnt += 1;
+    }
+
+    grad_offset[index] = val;
+  }
+}
+
+/*!\brief
+ * gpu function of deformable_col2im_coord algorithm
+ * \param s device stream
+ * \param data_col start pointer of the column buffer to be filled
+ * \param data_im pointer of images (N, C, H, W, ...) in the image batch
+ * \param data_offset pointer of offsets (N, deformable_group*kernel_h*kernel_w*2, H, W, ...) in the offset batch
+ * \param im_shape input image shape in dimensions (N, C, H, W,)
+ * \param col_shape column buffer shape
+ * \param kernel_shape kernel filter shape
+ * \param pad pad shape
+ * \param stride stride shape
+ * \param dilation dilation shape
+ * \param deformable_group #offset group that deformable convolution use
+ * \param grad_offset pointer of the offsets (N, deformable_group*kernel_h*kernel_w*2, H, W,...) in the offset batch
+ */
+template <typename DType>
+inline void deformable_col2im_coord(cudaStream_t stream,
+  const DType *data_col, const DType *data_im, const DType *data_offset, const int channels,
+  const int height, const int width, const int ksize_h, const int ksize_w,
+  const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+  const int dilation_h, const int dilation_w, const int parallel_imgs,
+  const int deformable_group, DType *grad_offset) {
+
+  int height_col = (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1;
+  int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * deformable_group * parallel_imgs;
+  int channel_per_deformable_group = channels * ksize_h * ksize_w / deformable_group;
+
+  // index_t num_spatial_axes = kernel_shape.ndim();
+  // index_t num_kernels = col_shape[1] * col_shape[2] * col_shape[3] * 2 * kernel_shape[0] * kernel_shape[1] * deformable_group;
+  // index_t channel_per_deformable_group = col_shape[0] / deformable_group;
+  // num_axes should be smaller than block size
+  // CHECK_LT(num_spatial_axes, mshadow::cuda::kBaseThreadNum);
+  // using namespace mxnet_op;
+  // switch (num_spatial_axes) {
+  // case 2:
+    // To avoid involving atomic operations, we will launch one kernel per
+    // bottom dimension, and then in the kernel add up the top dimensions.
+    // NOLINT_NEXT_LINE(whitespace/operators)
+
+  //  deformable_col2im_coord_gpu_kernel<DType> << <cuda_get_num_blocks(num_kernels), mshadow::cuda::kBaseThreadNum,
+  //    0, mshadow::Stream<gpu>::GetStream(s) >> >(
+  //      num_kernels, data_col, data_im, data_offset, im_shape[1], im_shape[2], im_shape[3],
+  //      kernel_shape[0], kernel_shape[1], pad[0], pad[1], stride[0], stride[1],
+  //      dilation[0], dilation[1], channel_per_deformable_group,
+  //      col_shape[1], 2 * kernel_shape[0] * kernel_shape[1] * deformable_group, deformable_group, col_shape[2], col_shape[3], grad_offset, req);
+  //   MSHADOW_CUDA_POST_KERNEL_CHECK(deformable_col2im_coord_gpu_kernel);
+  //   break;
+  // default:
+  //   LOG(FATAL) << "col2im_nd_gpu does not support computation with "
+  //     << num_spatial_axes << " spatial axes";
+
+  deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
+    num_kernels, data_col, data_im, data_offset, channels, height, width,
+    ksize_h, ksize_w, pad_h, pad_w, stride_h, stride_w,
+    dilation_h, dilation_w, channel_per_deformable_group,
+    parallel_imgs, 2 * ksize_h * ksize_w * deformable_group, deformable_group,
+    height_col, width_col, grad_offset);
+
+}
+
+template void
+deformable_col2im_coord(cudaStream_t stream, const float *data_col,
+                        const float *data_im, const float *data_offset,
+                        const int channels, const int height, const int width,
+                        const int ksize_h, const int ksize_w, const int pad_h,
+                        const int pad_w, const int stride_h, const int stride_w,
+                        const int dilation_h, const int dilation_w, const int parallel_imgs,
+                        const int deformable_group, float *grad_offset);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.h
new file mode 100644
index 0000000..9a795ed
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/deform_conv_cuda_kernel.h
@@ -0,0 +1,30 @@
+template <typename DType>
+void deformable_im2col(cudaStream_t stream, const DType *data_im,
+                       const DType *data_offset, const int channels,
+                       const int height, const int width, const int ksize_h,
+                       const int ksize_w, const int pad_h, const int pad_w,
+                       const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs,
+                       const int deformable_group, DType *data_col);
+
+template <typename DType>
+void deformable_col2im(cudaStream_t stream, const DType *data_col,
+                       const DType *data_offset, const int channels,
+                       const int height, const int width, const int ksize_h,
+                       const int ksize_w, const int pad_h, const int pad_w,
+                       const int stride_h, const int stride_w,
+                       const int dilation_h, const int dilation_w,
+                       const int parallel_imgs,
+                       const int deformable_group, DType *grad_im);
+
+template <typename DType>
+void deformable_col2im_coord(cudaStream_t stream, const DType *data_col,
+                             const DType *data_im, const DType *data_offset,
+                             const int channels, const int height,
+                             const int width, const int ksize_h,
+                             const int ksize_w, const int pad_h,
+                             const int pad_w, const int stride_h,
+                             const int stride_w, const int dilation_h,
+                             const int dilation_w, const int parallel_imgs,
+                             const int deformable_group, DType *grad_offset);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.c b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.c
new file mode 100644
index 0000000..f1e0891
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.c
@@ -0,0 +1,30 @@
+#include <TH/TH.h>
+#include <stdio.h>
+#include <math.h>
+
+void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight,
+                        THFloatTensor *bias, THFloatTensor *ones,
+                        THFloatTensor *offset, THFloatTensor *mask,
+                        THFloatTensor *output, THFloatTensor *columns,
+                        const int pad_h, const int pad_w,
+                        const int stride_h, const int stride_w,
+                        const int dilation_h, const int dilation_w,
+                        const int deformable_group)
+{
+    printf("only implemented in GPU");
+}
+    void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight,
+                         THFloatTensor *bias, THFloatTensor *ones,
+                         THFloatTensor *offset, THFloatTensor *mask,
+                         THFloatTensor *output, THFloatTensor *columns,
+                         THFloatTensor *grad_input, THFloatTensor *grad_weight,
+                         THFloatTensor *grad_bias, THFloatTensor *grad_offset,
+                         THFloatTensor *grad_mask, THFloatTensor *grad_output,
+                         int kernel_h, int kernel_w,
+                         int stride_h, int stride_w,
+                         int pad_h, int pad_w,
+                         int dilation_h, int dilation_w,
+                         int deformable_group)
+{
+    printf("only implemented in GPU");
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.h
new file mode 100644
index 0000000..26df95c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn.h
@@ -0,0 +1,20 @@
+void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight,
+                        THFloatTensor *bias, THFloatTensor *ones,
+                        THFloatTensor *offset, THFloatTensor *mask,
+                        THFloatTensor *output, THFloatTensor *columns,
+                        const int pad_h, const int pad_w,
+                        const int stride_h, const int stride_w,
+                        const int dilation_h, const int dilation_w,
+                        const int deformable_group);
+void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight,
+                        THFloatTensor *bias, THFloatTensor *ones,
+                        THFloatTensor *offset, THFloatTensor *mask,
+                        THFloatTensor *output, THFloatTensor *columns,
+                        THFloatTensor *grad_input, THFloatTensor *grad_weight,
+                        THFloatTensor *grad_bias, THFloatTensor *grad_offset,
+                        THFloatTensor *grad_mask, THFloatTensor *grad_output,
+                        int kernel_h, int kernel_w,
+                        int stride_h, int stride_w,
+                        int pad_h, int pad_w,
+                        int dilation_h, int dilation_w,
+                        int deformable_group);
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.c b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.c
new file mode 100644
index 0000000..e3d22ab
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.c
@@ -0,0 +1,335 @@
+#include <THC/THC.h>
+#include "cuda/modulated_deform_im2col_cuda.h"
+#include "cuda/deform_psroi_pooling_cuda.h"
+
+extern THCState *state;
+
+// author: Charles Shang
+// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
+
+void modulated_deform_conv_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
+                         THCudaTensor *bias, THCudaTensor *ones,
+                         THCudaTensor *offset, THCudaTensor *mask,
+                         THCudaTensor *output, THCudaTensor *columns,
+                         int kernel_h, int kernel_w,
+                         const int stride_h, const int stride_w,
+                         const int pad_h, const int pad_w,
+                         const int dilation_h, const int dilation_w,
+                         const int deformable_group)
+{
+    THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
+    THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
+    THArgCheck(THCudaTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
+    
+    const int batch = THCudaTensor_size(state, input, 0);
+    const int channels = THCudaTensor_size(state, input, 1);
+    const int height = THCudaTensor_size(state, input, 2);
+    const int width = THCudaTensor_size(state, input, 3);
+
+    const int channels_out = THCudaTensor_size(state, weight, 0);
+    const int channels_kernel = THCudaTensor_size(state, weight, 1);
+    const int kernel_h_ = THCudaTensor_size(state, weight, 2);
+    const int kernel_w_ = THCudaTensor_size(state, weight, 3);
+    if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+        THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", 
+        kernel_h_, kernel_w, kernel_h_, kernel_w_);
+    if (channels != channels_kernel)
+        THError("Input shape and kernel channels wont match: (%d vs %d).", 
+        channels, channels_kernel);
+
+    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+    if (THCudaTensor_nDimension(state, ones) != 2 ||
+        THCudaTensor_size(state, ones, 0) * THCudaTensor_size(state, ones, 1) < height_out * width_out)
+    {
+        // Resize plane and fill with ones...
+        THCudaTensor_resize2d(state, ones, height_out, width_out);
+        THCudaTensor_fill(state, ones, 1);
+    }
+
+    // resize output
+    THCudaTensor_resize4d(state, output, batch, channels_out, height_out, width_out);
+    // resize temporary columns
+    THCudaTensor_resize2d(state, columns, channels * kernel_h * kernel_w, 1 * height_out * width_out);
+
+    THCudaTensor *input_n = THCudaTensor_new(state);
+    THCudaTensor *offset_n = THCudaTensor_new(state);
+    THCudaTensor *mask_n = THCudaTensor_new(state);
+    THCudaTensor *output_n = THCudaTensor_new(state);
+
+    for (int b = 0; b < batch; b++)
+    {
+        THCudaTensor_select(state, input_n, input, 0, b);
+        THCudaTensor_select(state, offset_n, offset, 0, b);
+        THCudaTensor_select(state, mask_n, mask, 0, b);
+        THCudaTensor_select(state, output_n, output, 0, b);
+
+        // Do Bias first:
+        // M,N,K are dims of matrix A and B
+        // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
+        // (N x 1) (1 x M)
+        long m_ = channels_out;
+        long n_ = height_out * width_out;
+        long k_ = 1;
+        THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
+                         THCudaTensor_data(state, ones), k_,
+                         THCudaTensor_data(state, bias), k_, 0.0f,
+                         THCudaTensor_data(state, output_n), n_);
+
+        modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
+                                         THCudaTensor_data(state, input_n), THCudaTensor_data(state, offset_n),
+                                         THCudaTensor_data(state, mask_n),
+                                         1, channels, height, width,
+                                         height_out, width_out, kernel_h, kernel_w,
+                                         pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+                                         deformable_group, THCudaTensor_data(state, columns));
+
+        //(k * m)  x  (m * n)
+        // Y = WC
+        long m = channels_out;
+        long n = height_out * width_out;
+        long k = channels * kernel_h * kernel_w;
+        THCudaBlas_Sgemm(state, 'n', 'n', n, m, k, 1.0f,
+                         THCudaTensor_data(state, columns), n,
+                         THCudaTensor_data(state, weight), k, 1.0f,
+                         THCudaTensor_data(state, output_n), n);
+    }
+    THCudaTensor_free(state, input_n);
+    THCudaTensor_free(state, offset_n);
+    THCudaTensor_free(state, mask_n);
+    THCudaTensor_free(state, output_n);
+}
+
+void modulated_deform_conv_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
+                          THCudaTensor *bias, THCudaTensor *ones,
+                          THCudaTensor *offset, THCudaTensor *mask,
+                          THCudaTensor *columns,
+                          THCudaTensor *grad_input, THCudaTensor *grad_weight,
+                          THCudaTensor *grad_bias, THCudaTensor *grad_offset,
+                          THCudaTensor *grad_mask, THCudaTensor *grad_output,
+                          int kernel_h, int kernel_w,
+                          int stride_h, int stride_w,
+                          int pad_h, int pad_w,
+                          int dilation_h, int dilation_w,
+                          int deformable_group)
+{
+    THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
+                                           grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
+    THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
+    THArgCheck(THCudaTensor_isContiguous(state, weight), 2, "weight tensor has to be contiguous");
+
+    const int batch = THCudaTensor_size(state, input, 0);
+    const int channels = THCudaTensor_size(state, input, 1);
+    const int height = THCudaTensor_size(state, input, 2);
+    const int width = THCudaTensor_size(state, input, 3);
+
+    const int channels_out = THCudaTensor_size(state, weight, 0);
+    const int channels_kernel = THCudaTensor_size(state, weight, 1);
+    const int kernel_h_ = THCudaTensor_size(state, weight, 2);
+    const int kernel_w_ = THCudaTensor_size(state, weight, 3);
+    if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
+        THError("Input shape and kernel shape wont match: (%d x %d vs %d x %d).", 
+        kernel_h_, kernel_w, kernel_h_, kernel_w_);
+    if (channels != channels_kernel)
+        THError("Input shape and kernel channels wont match: (%d vs %d).", 
+        channels, channels_kernel);
+
+    const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+    const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+
+    if (THCudaTensor_nDimension(state, ones) != 2 ||
+        THCudaTensor_size(state, ones, 0) * THCudaTensor_size(state, ones, 1) < height_out * width_out)
+    {
+        // Resize plane and fill with ones...
+        THCudaTensor_resize2d(state, ones, height_out, width_out);
+        THCudaTensor_fill(state, ones, 1.0f);
+    }
+
+    THCudaTensor_resize4d(state, grad_input, batch, channels, height, width);
+    THCudaTensor_resize2d(state, columns, channels * kernel_h * kernel_w, height_out * width_out);
+
+    THCudaTensor *input_n = THCudaTensor_new(state);
+    THCudaTensor *offset_n = THCudaTensor_new(state);
+    THCudaTensor *mask_n = THCudaTensor_new(state);
+
+    THCudaTensor *grad_output_n = THCudaTensor_new(state);
+    THCudaTensor *grad_input_n = THCudaTensor_new(state);
+    THCudaTensor *grad_offset_n = THCudaTensor_new(state);
+    THCudaTensor *grad_mask_n = THCudaTensor_new(state);
+
+    for (int b = 0; b < batch; b++)
+    {
+        THCudaTensor_select(state, input_n, input, 0, b);
+        THCudaTensor_select(state, offset_n, offset, 0, b);
+        THCudaTensor_select(state, mask_n, mask, 0, b);
+        THCudaTensor_select(state, grad_output_n, grad_output, 0, b);
+        THCudaTensor_select(state, grad_input_n, grad_input, 0, b);
+        THCudaTensor_select(state, grad_offset_n, grad_offset, 0, b);
+        THCudaTensor_select(state, grad_mask_n, grad_mask, 0, b);
+
+        long m = channels * kernel_h * kernel_w;
+        long n = height_out * width_out;
+        long k = channels_out;
+
+        THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,
+                         THCudaTensor_data(state, grad_output_n), n,
+                         THCudaTensor_data(state, weight), m, 0.0f,
+                         THCudaTensor_data(state, columns), n);
+
+        // gradient w.r.t. input coordinate data
+        modulated_deformable_col2im_coord_cuda(THCState_getCurrentStream(state),
+                                               THCudaTensor_data(state, columns),
+                                               THCudaTensor_data(state, input_n),
+                                               THCudaTensor_data(state, offset_n),
+                                               THCudaTensor_data(state, mask_n),
+                                               1, channels, height, width,
+                                               height_out, width_out, kernel_h, kernel_w,
+                                               pad_h, pad_w, stride_h, stride_w,
+                                               dilation_h, dilation_w, deformable_group,
+                                               THCudaTensor_data(state, grad_offset_n),
+                                               THCudaTensor_data(state, grad_mask_n));
+        // gradient w.r.t. input data
+        modulated_deformable_col2im_cuda(THCState_getCurrentStream(state),
+                                         THCudaTensor_data(state, columns),
+                                         THCudaTensor_data(state, offset_n),
+                                         THCudaTensor_data(state, mask_n),
+                                         1, channels, height, width,
+                                         height_out, width_out, kernel_h, kernel_w,
+                                         pad_h, pad_w, stride_h, stride_w,
+                                         dilation_h, dilation_w, deformable_group,
+                                         THCudaTensor_data(state, grad_input_n));
+
+        // gradient w.r.t. weight, dWeight should accumulate across the batch and group
+        modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
+                                         THCudaTensor_data(state, input_n),
+                                         THCudaTensor_data(state, offset_n),
+                                         THCudaTensor_data(state, mask_n),
+                                         1, channels, height, width,
+                                         height_out, width_out, kernel_h, kernel_w,
+                                         pad_h, pad_w, stride_h, stride_w,
+                                         dilation_h, dilation_w, deformable_group,
+                                         THCudaTensor_data(state, columns));
+        long m_ = channels_out;
+        long n_ = channels * kernel_h * kernel_w;
+        long k_ = height_out * width_out;
+
+        THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
+                         THCudaTensor_data(state, columns), k_,
+                         THCudaTensor_data(state, grad_output_n), k_, 1.0f,
+                         THCudaTensor_data(state, grad_weight), n_);
+
+        // gradient w.r.t. bias
+        // long m_ = channels_out;
+        // long k__ = height_out * width_out;
+        THCudaBlas_Sgemv(state,
+                         't',
+                         k_, m_, 1.0f,
+                         THCudaTensor_data(state, grad_output_n), k_,
+                         THCudaTensor_data(state, ones), 1, 1.0f,
+                         THCudaTensor_data(state, grad_bias), 1);
+    }
+
+    THCudaTensor_free(state, input_n);
+    THCudaTensor_free(state, offset_n);
+    THCudaTensor_free(state, mask_n);
+
+    THCudaTensor_free(state, grad_output_n);
+    THCudaTensor_free(state, grad_input_n);
+    THCudaTensor_free(state, grad_offset_n);
+    THCudaTensor_free(state, grad_mask_n);
+}
+
+void deform_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
+                                       THCudaTensor * trans, 
+                                       THCudaTensor * out, THCudaTensor * top_count,
+                                       const int no_trans,
+                                       const float spatial_scale,
+                                       const int output_dim,
+                                       const int group_size,
+                                       const int pooled_size,
+                                       const int part_size,
+                                       const int sample_per_part,
+                                       const float trans_std)
+{
+    THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
+    THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
+
+    const int batch = THCudaTensor_size(state, input, 0);
+    const int channels = THCudaTensor_size(state, input, 1);
+    const int height = THCudaTensor_size(state, input, 2);
+    const int width = THCudaTensor_size(state, input, 3);
+    const int channels_trans = no_trans? 2 : THCudaTensor_size(state, trans, 1);
+
+    const int num_bbox = THCudaTensor_size(state, bbox, 0);
+    if (num_bbox != THCudaTensor_size(state, out, 0))
+        THError("Output shape and bbox number wont match: (%d vs %d).", 
+                THCudaTensor_size(state, out, 0), num_bbox);
+
+    DeformablePSROIPoolForward(THCState_getCurrentStream(state),
+                               THCudaTensor_data(state, input),
+                               THCudaTensor_data(state, bbox),
+                               THCudaTensor_data(state, trans),
+                               THCudaTensor_data(state, out),
+                               THCudaTensor_data(state, top_count),
+                               batch, channels, height, width,
+                               num_bbox, 
+                               channels_trans, 
+                               no_trans, 
+                               spatial_scale,
+                               output_dim, 
+                               group_size, 
+                               pooled_size, 
+                               part_size,
+                               sample_per_part, 
+                               trans_std);
+}
+
+void deform_psroi_pooling_cuda_backward(THCudaTensor * out_grad,
+                                        THCudaTensor * input, THCudaTensor * bbox,
+                                        THCudaTensor * trans, THCudaTensor * top_count,
+                                        THCudaTensor * input_grad, THCudaTensor * trans_grad,
+                                        const int no_trans,
+                                        const float spatial_scale,
+                                        const int output_dim,
+                                        const int group_size,
+                                        const int pooled_size,
+                                        const int part_size,
+                                        const int sample_per_part,
+                                        const float trans_std)
+{
+    THArgCheck(THCudaTensor_isContiguous(state, out_grad), 0, "out_grad tensor has to be contiguous");
+    THArgCheck(THCudaTensor_isContiguous(state, input), 1, "input tensor has to be contiguous");
+    THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
+                    input_grad, trans_grad));
+
+    const int batch = THCudaTensor_size(state, input, 0);
+    const int channels = THCudaTensor_size(state, input, 1);
+    const int height = THCudaTensor_size(state, input, 2);
+    const int width = THCudaTensor_size(state, input, 3);
+    const int channels_trans = no_trans? 2 : THCudaTensor_size(state, trans, 1);
+
+    const int num_bbox = THCudaTensor_size(state, bbox, 0);
+    if (num_bbox != THCudaTensor_size(state, out_grad, 0))
+        THError("Output shape and bbox number wont match: (%d vs %d).", 
+                THCudaTensor_size(state, out_grad, 0), num_bbox);
+
+    DeformablePSROIPoolBackwardAcc(THCState_getCurrentStream(state),
+                                   THCudaTensor_data(state, out_grad),
+                                   THCudaTensor_data(state, input),
+                                   THCudaTensor_data(state, bbox),
+                                   THCudaTensor_data(state, trans),
+                                   THCudaTensor_data(state, top_count),
+                                   THCudaTensor_data(state, input_grad),
+                                   THCudaTensor_data(state, trans_grad),
+                                   batch, channels, height, width, num_bbox,
+                                   channels_trans, 
+                                   no_trans, 
+                                   spatial_scale, 
+                                   output_dim,
+                                   group_size, 
+                                   pooled_size, 
+                                   part_size,
+                                   sample_per_part, 
+                                   trans_std);
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.h
new file mode 100644
index 0000000..a9b873a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/src/modulated_dcn_cuda.h
@@ -0,0 +1,54 @@
+// #ifndef DCN_V2_CUDA
+// #define DCN_V2_CUDA
+
+// #ifdef __cplusplus
+// extern "C"
+// {
+// #endif
+
+void modulated_deform_conv_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
+                         THCudaTensor *bias, THCudaTensor *ones,
+                         THCudaTensor *offset, THCudaTensor *mask,
+                         THCudaTensor *output, THCudaTensor *columns,
+                         int kernel_h, int kernel_w,
+                         const int stride_h, const int stride_w,
+                         const int pad_h, const int pad_w,
+                         const int dilation_h, const int dilation_w,
+                         const int deformable_group);
+void modulated_deform_conv_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
+                          THCudaTensor *bias, THCudaTensor *ones,
+                          THCudaTensor *offset, THCudaTensor *mask,
+                          THCudaTensor *columns,
+                          THCudaTensor *grad_input, THCudaTensor *grad_weight,
+                          THCudaTensor *grad_bias, THCudaTensor *grad_offset,
+                          THCudaTensor *grad_mask, THCudaTensor *grad_output,
+                          int kernel_h, int kernel_w,
+                          int stride_h, int stride_w,
+                          int pad_h, int pad_w,
+                          int dilation_h, int dilation_w,
+                          int deformable_group);
+
+void deform_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
+                                       THCudaTensor * trans, 
+                                       THCudaTensor * out, THCudaTensor * top_count,
+                                       const int no_trans,
+                                       const float spatial_scale,
+                                       const int output_dim,
+                                       const int group_size,
+                                       const int pooled_size,
+                                       const int part_size,
+                                       const int sample_per_part,
+                                       const float trans_std);
+
+void deform_psroi_pooling_cuda_backward(THCudaTensor * out_grad,
+                                        THCudaTensor * input, THCudaTensor * bbox,
+                                        THCudaTensor * trans, THCudaTensor * top_count,
+                                        THCudaTensor * input_grad, THCudaTensor * trans_grad,
+                                        const int no_trans,
+                                        const float spatial_scale,
+                                        const int output_dim,
+                                        const int group_size,
+                                        const int pooled_size,
+                                        const int part_size,
+                                        const int sample_per_part,
+                                        const float trans_std);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test.py
new file mode 100644
index 0000000..53b5eca
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test.py
@@ -0,0 +1,34 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from modules import DeformConv
+
+num_deformable_groups = 2
+
+N, inC, inH, inW = 2, 6, 512, 512
+outC, outH, outW = 4, 512, 512
+kH, kW = 3, 3
+
+conv = nn.Conv2d(
+    inC,
+    num_deformable_groups * 2 * kH * kW,
+    kernel_size=(kH, kW),
+    stride=(1, 1),
+    padding=(1, 1),
+    bias=False).cuda()
+
+conv_offset2d = DeformConv(
+    inC,
+    outC, (kH, kW),
+    stride=1,
+    padding=1,
+    num_deformable_groups=num_deformable_groups).cuda()
+
+inputs = Variable(torch.randn(N, inC, inH, inW).cuda(), requires_grad=True)
+offset = conv(inputs)
+#offset = Variable(torch.randn(N, num_deformable_groups * 2 * kH * kW, inH, inW).cuda(), requires_grad=True)
+output = conv_offset2d(inputs, offset)
+output.backward(output.data)
+print(output.size())
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test_modulated.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test_modulated.py
new file mode 100644
index 0000000..da03fee
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/dcn/test_modulated.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import time
+import torch
+import torch.nn as nn
+from torch.autograd import gradcheck
+
+from modules.modulated_dcn import ModulatedDeformConvPack
+from modules.modulated_dcn import DeformRoIPooling
+from modules.modulated_dcn import ModulatedDeformRoIPoolingPack
+
+deformable_groups = 1
+N, inC, inH, inW = 2, 2, 4, 4
+outC = 2
+kH, kW = 3, 3
+
+
+def example_dconv():
+    from modules.modulated_dcn import ModulatedDeformConv
+    input = torch.randn(2, 64, 128, 128).cuda()
+    # wrap all things (offset and mask) in DCN
+    dcn = ModulatedDeformConvPack(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2, no_bias=True).cuda()
+    output = dcn(input)
+    targert = output.new(*output.size())
+    targert.data.uniform_(-0.01, 0.01)
+    error = (targert - output).mean()
+    error.backward()
+    print(output.shape)
+
+def example_dpooling():
+    from modules.modulated_dcn import ModulatedDeformRoIPoolingPack
+    input = torch.randn(2, 32, 64, 64).cuda()
+    batch_inds = torch.randint(2, (20, 1)).cuda().float()
+    x = torch.randint(256, (20, 1)).cuda().float()
+    y = torch.randint(256, (20, 1)).cuda().float()
+    w = torch.randint(64, (20, 1)).cuda().float()
+    h = torch.randint(64, (20, 1)).cuda().float()
+    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
+    offset = torch.randn(20, 2, 7, 7).cuda()
+    input.requires_grad = True
+    offset.requires_grad = True
+
+    # normal roi_align
+    pooling = DeformRoIPooling(spatial_scale=1.0 / 4,
+                           pooled_size=7,
+                           output_dim=32,
+                           no_trans=True,
+                           group_size=1,
+                           trans_std=0.1).cuda()
+
+    # deformable pooling
+    dpooling = DeformRoIPooling(spatial_scale=1.0 / 4,
+                            pooled_size=7,
+                            output_dim=32,
+                            no_trans=False,
+                            group_size=1,
+                            trans_std=0.1).cuda()
+
+    out = pooling(input, rois, offset)
+    dout = dpooling(input, rois, offset)
+    print(out.shape)
+    print(dout.shape)
+
+    target_out = out.new(*out.size())
+    target_out.data.uniform_(-0.01, 0.01)
+    target_dout = dout.new(*dout.size())
+    target_dout.data.uniform_(-0.01, 0.01)
+    e = (target_out - out).mean()
+    e.backward()
+    e = (target_dout - dout).mean()
+    e.backward()
+
+def example_mdpooling():
+    from modules.modulated_dcn import ModulatedDeformRoIPoolingPack
+    input = torch.randn(2, 32, 64, 64).cuda()
+    input.requires_grad = True
+    batch_inds = torch.randint(2, (20, 1)).cuda().float()
+    x = torch.randint(256, (20, 1)).cuda().float()
+    y = torch.randint(256, (20, 1)).cuda().float()
+    w = torch.randint(64, (20, 1)).cuda().float()
+    h = torch.randint(64, (20, 1)).cuda().float()
+    rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
+
+    # mdformable pooling (V2)
+    dpooling = ModulatedDeformRoIPoolingPack(spatial_scale=1.0 / 4,
+                         pooled_size=7,
+                         output_dim=32,
+                         no_trans=False,
+                         group_size=1,
+                         trans_std=0.1).cuda()
+
+    for i in range(2):
+        dout = dpooling(input, rois)
+        target = dout.new(*dout.size())
+        target.data.uniform_(-0.1, 0.1)
+        error = (target - dout).mean()
+        error.backward()
+        print(dout.shape)
+
+if __name__ == '__main__':
+
+    example_dconv()
+    example_dpooling()
+    example_mdpooling()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/frn/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/frn/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/frn/frn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/frn/frn.py
new file mode 100644
index 0000000..ee1b77e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/frn/frn.py
@@ -0,0 +1,34 @@
+import torch
+import torch.nn as nn
+
+class FilterResponseNormalization(nn.Module):
+    def __init__(self, beta, gamma, tau, eps=1e-6):
+        """
+        Input Variables:
+        ----------------
+            beta, gamma, tau: Variables of shape [1, C, 1, 1].
+            eps: A scalar constant or learnable variable.
+        """
+
+        super(FilterResponseNormalization, self).__init__()
+        self.beta = beta
+        self.gamma = gamma
+        self.tau = tau
+        self.eps = torch.Tensor([eps])
+
+    def forward(self, x):
+        """
+        Input Variables:
+        ----------------
+            x: Input tensor of shape [NxCxHxW]
+        """
+
+        n, c, h, w = x.shape
+        assert (self.gamma.shape[1], self.beta.shape[1], self.tau.shape[1]) == (c, c, c)
+
+        # Compute the mean norm of activations per channel
+        nu2 = torch.mean(x.pow(2), (2,3), keepdims=True)
+        # Perform FRN
+        x = x * torch.rsqrt(nu2 + torch.abs(self.eps))
+        # Return after applying the Offset-ReLU non-linearity
+        return torch.max(self.gamma*x + self.beta, self.tau)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/bn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/bn.py
new file mode 100644
index 0000000..a4118ce
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/bn.py
@@ -0,0 +1,174 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as functional
+
+try:
+    from queue import Queue
+except ImportError:
+    from Queue import Queue
+
+from lib.extensions.inplace_abn.functions import *
+
+
+class ABN(nn.Module):
+    """Activated Batch Normalization
+
+    This gathers a `BatchNorm2d` and an activation function in a single module
+    """
+
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+        """Creates an Activated Batch Normalization module
+
+        Parameters
+        ----------
+        num_features : int
+            Number of feature channels in the input and output.
+        eps : float
+            Small constant to prevent numerical issues.
+        momentum : float
+            Momentum factor applied to compute running statistics as.
+        affine : bool
+            If `True` apply learned scale and shift transformation after normalization.
+        activation : str
+            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+        slope : float
+            Negative slope for the `leaky_relu` activation.
+        """
+        super(ABN, self).__init__()
+        self.num_features = num_features
+        self.affine = affine
+        self.eps = eps
+        self.momentum = momentum
+        self.activation = activation
+        self.slope = slope
+        if self.affine:
+            self.weight = nn.Parameter(torch.ones(num_features))
+            self.bias = nn.Parameter(torch.zeros(num_features))
+        else:
+            self.register_parameter('weight', None)
+            self.register_parameter('bias', None)
+        self.register_buffer('running_mean', torch.zeros(num_features))
+        self.register_buffer('running_var', torch.ones(num_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.constant_(self.running_mean, 0)
+        nn.init.constant_(self.running_var, 1)
+        if self.affine:
+            nn.init.constant_(self.weight, 1)
+            nn.init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
+                                  self.training, self.momentum, self.eps)
+
+        if self.activation == ACT_RELU:
+            return functional.relu(x, inplace=True)
+        elif self.activation == ACT_LEAKY_RELU:
+            return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
+        elif self.activation == ACT_ELU:
+            return functional.elu(x, inplace=True)
+        else:
+            return x
+
+    def __repr__(self):
+        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+              ' affine={affine}, activation={activation}'
+        if self.activation == "leaky_relu":
+            rep += ', slope={slope})'
+        else:
+            rep += ')'
+        return rep.format(name=self.__class__.__name__, **self.__dict__)
+
+
+class InPlaceABN(ABN):
+    """InPlace Activated Batch Normalization"""
+
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+        """Creates an InPlace Activated Batch Normalization module
+
+        Parameters
+        ----------
+        num_features : int
+            Number of feature channels in the input and output.
+        eps : float
+            Small constant to prevent numerical issues.
+        momentum : float
+            Momentum factor applied to compute running statistics as.
+        affine : bool
+            If `True` apply learned scale and shift transformation after normalization.
+        activation : str
+            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+        slope : float
+            Negative slope for the `leaky_relu` activation.
+        """
+        super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
+
+    def forward(self, x):
+        return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
+                           self.training, self.momentum, self.eps, self.activation, self.slope)
+
+
+class InPlaceABNSync(ABN):
+    """InPlace Activated Batch Normalization with cross-GPU synchronization
+
+    This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`.
+    """
+
+    def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu",
+                 slope=0.01):
+        """Creates a synchronized, InPlace Activated Batch Normalization module
+
+        Parameters
+        ----------
+        num_features : int
+            Number of feature channels in the input and output.
+        devices : list of int or None
+            IDs of the GPUs that will run the replicas of this module.
+        eps : float
+            Small constant to prevent numerical issues.
+        momentum : float
+            Momentum factor applied to compute running statistics as.
+        affine : bool
+            If `True` apply learned scale and shift transformation after normalization.
+        activation : str
+            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+        slope : float
+            Negative slope for the `leaky_relu` activation.
+        """
+        super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope)
+        self.devices = devices if devices else list(range(torch.cuda.device_count()))
+
+        # Initialize queues
+        self.worker_ids = self.devices[1:]
+        self.master_queue = Queue(len(self.worker_ids))
+        self.worker_queues = [Queue(1) for _ in self.worker_ids]
+
+    def forward(self, x):
+        if x.get_device() == self.devices[0]:
+            # Master mode
+            extra = {
+                "is_master": True,
+                "master_queue": self.master_queue,
+                "worker_queues": self.worker_queues,
+                "worker_ids": self.worker_ids
+            }
+        else:
+            # Worker mode
+            extra = {
+                "is_master": False,
+                "master_queue": self.master_queue,
+                "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())]
+            }
+
+        return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
+                                extra, self.training, self.momentum, self.eps, self.activation, self.slope)
+
+    def __repr__(self):
+        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+              ' affine={affine}, devices={devices}, activation={activation}'
+        if self.activation == "leaky_relu":
+            rep += ', slope={slope})'
+        else:
+            rep += ')'
+        return rep.format(name=self.__class__.__name__, **self.__dict__)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/functions.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/functions.py
new file mode 100644
index 0000000..17d1107
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/functions.py
@@ -0,0 +1,256 @@
+from os import path
+
+import torch.autograd as autograd
+import torch.cuda.comm as comm
+from torch.autograd.function import once_differentiable
+from torch.utils.cpp_extension import load
+
+_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
+_backend = load(name="inplace_abn",
+                extra_cflags=["-O3"],
+                sources=[path.join(_src_path, f) for f in [
+                    "inplace_abn.cpp",
+                    "inplace_abn_cpu.cpp",
+                    "inplace_abn_cuda.cu"
+                ]],
+                extra_cuda_cflags=["--expt-extended-lambda"])
+
+# Activation names
+ACT_RELU = "relu"
+ACT_LEAKY_RELU = "leaky_relu"
+ACT_ELU = "elu"
+ACT_NONE = "none"
+
+
+def _check(fn, *args, **kwargs):
+    success = fn(*args, **kwargs)
+    if not success:
+        raise RuntimeError("CUDA Error encountered in {}".format(fn))
+
+
+def _broadcast_shape(x):
+    out_size = []
+    for i, s in enumerate(x.size()):
+        if i != 1:
+            out_size.append(1)
+        else:
+            out_size.append(s)
+    return out_size
+
+
+def _reduce(x):
+    if len(x.size()) == 2:
+        return x.sum(dim=0)
+    else:
+        n, c = x.size()[0:2]
+        return x.contiguous().view((n, c, -1)).sum(2).sum(0)
+
+
+def _count_samples(x):
+    count = 1
+    for i, s in enumerate(x.size()):
+        if i != 1:
+            count *= s
+    return count
+
+
+def _act_forward(ctx, x):
+    if ctx.activation == ACT_LEAKY_RELU:
+        _backend.leaky_relu_forward(x, ctx.slope)
+    elif ctx.activation == ACT_ELU:
+        _backend.elu_forward(x)
+    elif ctx.activation == ACT_NONE:
+        pass
+
+
+def _act_backward(ctx, x, dx):
+    if ctx.activation == ACT_LEAKY_RELU:
+        _backend.leaky_relu_backward(x, dx, ctx.slope)
+    elif ctx.activation == ACT_ELU:
+        _backend.elu_backward(x, dx)
+    elif ctx.activation == ACT_NONE:
+        pass
+
+
+class InPlaceABN(autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, running_mean, running_var,
+                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+        # Save context
+        ctx.training = training
+        ctx.momentum = momentum
+        ctx.eps = eps
+        ctx.activation = activation
+        ctx.slope = slope
+        ctx.affine = weight is not None and bias is not None
+
+        # Prepare inputs
+        count = _count_samples(x)
+        x = x.contiguous()
+        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
+        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
+
+        if ctx.training:
+            mean, var = _backend.mean_var(x)
+
+            # Update running stats
+            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+
+            # Mark in-place modified tensors
+            ctx.mark_dirty(x, running_mean, running_var)
+        else:
+            mean, var = running_mean.contiguous(), running_var.contiguous()
+            ctx.mark_dirty(x)
+
+        # BN forward + activation
+        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+        _act_forward(ctx, x)
+
+        # Output
+        ctx.var = var
+        ctx.save_for_backward(x, var, weight, bias)
+        return x
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dz):
+        z, var, weight, bias = ctx.saved_tensors
+        dz = dz.contiguous()
+
+        # Undo activation
+        _act_backward(ctx, z, dz)
+
+        if ctx.training:
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+        else:
+            # TODO: implement simplified CUDA backward for inference mode
+            edz = dz.new_zeros(dz.size(1))
+            eydz = dz.new_zeros(dz.size(1))
+
+        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        dweight = dweight if ctx.affine else None
+        dbias = dbias if ctx.affine else None
+
+        return dx, dweight, dbias, None, None, None, None, None, None, None
+
+
+class InPlaceABNSync(autograd.Function):
+    @classmethod
+    def forward(cls, ctx, x, weight, bias, running_mean, running_var,
+                extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+        # Save context
+        cls._parse_extra(ctx, extra)
+        ctx.training = training
+        ctx.momentum = momentum
+        ctx.eps = eps
+        ctx.activation = activation
+        ctx.slope = slope
+        ctx.affine = weight is not None and bias is not None
+
+        # Prepare inputs
+        count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
+        x = x.contiguous()
+        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
+        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
+
+        if ctx.training:
+            mean, var = _backend.mean_var(x)
+
+            if ctx.is_master:
+                means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
+                for _ in range(ctx.master_queue.maxsize):
+                    mean_w, var_w = ctx.master_queue.get()
+                    ctx.master_queue.task_done()
+                    means.append(mean_w.unsqueeze(0))
+                    vars.append(var_w.unsqueeze(0))
+
+                means = comm.gather(means)
+                vars = comm.gather(vars)
+
+                mean = means.mean(0)
+                var = (vars + (mean - means) ** 2).mean(0)
+
+                tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
+                for ts, queue in zip(tensors[1:], ctx.worker_queues):
+                    queue.put(ts)
+            else:
+                ctx.master_queue.put((mean, var))
+                mean, var = ctx.worker_queue.get()
+                ctx.worker_queue.task_done()
+
+            # Update running stats
+            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+
+            # Mark in-place modified tensors
+            ctx.mark_dirty(x, running_mean, running_var)
+        else:
+            mean, var = running_mean.contiguous(), running_var.contiguous()
+            ctx.mark_dirty(x)
+
+        # BN forward + activation
+        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+        _act_forward(ctx, x)
+
+        # Output
+        ctx.var = var
+        ctx.save_for_backward(x, var, weight, bias)
+        return x
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dz):
+        z, var, weight, bias = ctx.saved_tensors
+        dz = dz.contiguous()
+
+        # Undo activation
+        _act_backward(ctx, z, dz)
+
+        if ctx.training:
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+
+            if ctx.is_master:
+                edzs, eydzs = [edz], [eydz]
+                for _ in range(len(ctx.worker_queues)):
+                    edz_w, eydz_w = ctx.master_queue.get()
+                    ctx.master_queue.task_done()
+                    edzs.append(edz_w)
+                    eydzs.append(eydz_w)
+
+                edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
+                eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
+
+                tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
+                for ts, queue in zip(tensors[1:], ctx.worker_queues):
+                    queue.put(ts)
+            else:
+                ctx.master_queue.put((edz, eydz))
+                edz, eydz = ctx.worker_queue.get()
+                ctx.worker_queue.task_done()
+        else:
+            edz = dz.new_zeros(dz.size(1))
+            eydz = dz.new_zeros(dz.size(1))
+
+        dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        dweight = dweight if ctx.affine else None
+        dbias = dbias if ctx.affine else None
+
+        return dx, dweight, dbias, None, None, None, None, None, None, None, None
+
+    @staticmethod
+    def _parse_extra(ctx, extra):
+        ctx.is_master = extra["is_master"]
+        if ctx.is_master:
+            ctx.master_queue = extra["master_queue"]
+            ctx.worker_queues = extra["worker_queues"]
+            ctx.worker_ids = extra["worker_ids"]
+        else:
+            ctx.master_queue = extra["master_queue"]
+            ctx.worker_queue = extra["worker_queue"]
+
+
+inplace_abn = InPlaceABN.apply
+inplace_abn_sync = InPlaceABNSync.apply
+
+__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/common.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/common.h
new file mode 100644
index 0000000..9533470
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/common.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <cuda_runtime_api.h>
+
+/*
+ * General settings
+ */
+const int WARP_SIZE = 32;
+const int MAX_BLOCK_SIZE = 512;
+
+template<typename T>
+struct Pair {
+  T v1, v2;
+  __device__ Pair() {}
+  __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
+  __device__ Pair(T v) : v1(v), v2(v) {}
+  __device__ Pair(int v) : v1(v), v2(v) {}
+  __device__ Pair &operator+=(const Pair<T> &a) {
+    v1 += a.v1;
+    v2 += a.v2;
+    return *this;
+  }
+};
+
+/*
+ * Utility functions
+ */
+template <typename T>
+__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
+                                           unsigned int mask = 0xffffffff) {
+#if CUDART_VERSION >= 9000
+  return __shfl_xor_sync(mask, value, laneMask, width);
+#else
+  return __shfl_xor(value, laneMask, width);
+#endif
+}
+
+__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
+
+static int getNumThreads(int nElem) {
+  int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
+  for (int i = 0; i != 5; ++i) {
+    if (nElem <= threadSizes[i]) {
+      return threadSizes[i];
+    }
+  }
+  return MAX_BLOCK_SIZE;
+}
+
+template<typename T>
+static __device__ __forceinline__ T warpSum(T val) {
+#if __CUDA_ARCH__ >= 300
+  for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
+    val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
+  }
+#else
+  __shared__ T values[MAX_BLOCK_SIZE];
+  values[threadIdx.x] = val;
+  __threadfence_block();
+  const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
+  for (int i = 1; i < WARP_SIZE; i++) {
+    val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
+  }
+#endif
+  return val;
+}
+
+template<typename T>
+static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
+  value.v1 = warpSum(value.v1);
+  value.v2 = warpSum(value.v2);
+  return value;
+}
+
+template <typename T, typename Op>
+__device__ T reduce(Op op, int plane, int N, int C, int S) {
+  T sum = (T)0;
+  for (int batch = 0; batch < N; ++batch) {
+    for (int x = threadIdx.x; x < S; x += blockDim.x) {
+      sum += op(batch, plane, x);
+    }
+  }
+
+  // sum over NumThreads within a warp
+  sum = warpSum(sum);
+
+  // 'transpose', and reduce within warp again
+  __shared__ T shared[32];
+  __syncthreads();
+  if (threadIdx.x % WARP_SIZE == 0) {
+    shared[threadIdx.x / WARP_SIZE] = sum;
+  }
+  if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
+    // zero out the other entries in shared
+    shared[threadIdx.x] = (T)0;
+  }
+  __syncthreads();
+  if (threadIdx.x / WARP_SIZE == 0) {
+    sum = warpSum(shared[threadIdx.x]);
+    if (threadIdx.x == 0) {
+      shared[0] = sum;
+    }
+  }
+  __syncthreads();
+
+  // Everyone picks it up, should be broadcast into the whole gradInput
+  return shared[0];
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.cpp
new file mode 100644
index 0000000..e0c7d46
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.cpp
@@ -0,0 +1,75 @@
+#include <torch/torch.h>
+
+#include <vector>
+
+#include "inplace_abn.h"
+
+std::vector<at::Tensor> mean_var(at::Tensor x) {
+  if (x.is_cuda()) {
+    return mean_var_cuda(x);
+  } else {
+    return mean_var_cpu(x);
+  }
+}
+
+at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                   bool affine, float eps) {
+  if (x.is_cuda()) {
+    return forward_cuda(x, mean, var, weight, bias, affine, eps);
+  } else {
+    return forward_cpu(x, mean, var, weight, bias, affine, eps);
+  }
+}
+
+std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                 bool affine, float eps) {
+  if (z.is_cuda()) {
+    return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
+  } else {
+    return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
+  }
+}
+
+std::vector<at::Tensor> backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                 at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  if (z.is_cuda()) {
+    return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
+  } else {
+    return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
+  }
+}
+
+void leaky_relu_forward(at::Tensor z, float slope) {
+  at::leaky_relu_(z, slope);
+}
+
+void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
+  if (z.is_cuda()) {
+    return leaky_relu_backward_cuda(z, dz, slope);
+  } else {
+    return leaky_relu_backward_cpu(z, dz, slope);
+  }
+}
+
+void elu_forward(at::Tensor z) {
+  at::elu_(z);
+}
+
+void elu_backward(at::Tensor z, at::Tensor dz) {
+  if (z.is_cuda()) {
+    return elu_backward_cuda(z, dz);
+  } else {
+    return elu_backward_cpu(z, dz);
+  }
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("mean_var", &mean_var, "Mean and variance computation");
+  m.def("forward", &forward, "In-place forward computation");
+  m.def("edz_eydz", &edz_eydz, "First part of backward computation");
+  m.def("backward", &backward, "Second part of backward computation");
+  m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
+  m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
+  m.def("elu_forward", &elu_forward, "Elu forward computation");
+  m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.h
new file mode 100644
index 0000000..ee4019d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+#include <vector>
+
+std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
+std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                       bool affine, float eps);
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                        bool affine, float eps);
+
+std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                     bool affine, float eps);
+std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                      bool affine, float eps);
+
+std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz);
+void elu_backward_cuda(at::Tensor z, at::Tensor dz);
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cpu.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cpu.cpp
new file mode 100644
index 0000000..d00fc92
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cpu.cpp
@@ -0,0 +1,120 @@
+#include <ATen/ATen.h>
+
+#include <vector>
+
+#include "inplace_abn.h"
+
+at::Tensor reduce_sum(at::Tensor x) {
+  if (x.ndimension() == 2) {
+    return x.sum(0);
+  } else {
+    auto x_view = x.view({x.size(0), x.size(1), -1});
+    return x_view.sum(-1).sum(0);
+  }
+}
+
+at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
+  if (x.ndimension() == 2) {
+    return v;
+  } else {
+    std::vector<int64_t> broadcast_size = {1, -1};
+    for (int64_t i = 2; i < x.ndimension(); ++i)
+      broadcast_size.push_back(1);
+
+    return v.view(broadcast_size);
+  }
+}
+
+int64_t count(at::Tensor x) {
+  int64_t count = x.size(0);
+  for (int64_t i = 2; i < x.ndimension(); ++i)
+    count *= x.size(i);
+
+  return count;
+}
+
+at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
+  if (affine) {
+    return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
+  } else {
+    return z;
+  }
+}
+
+std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
+  auto num = count(x);
+  auto mean = reduce_sum(x) / num;
+  auto diff = x - broadcast_to(mean, x);
+  auto var = reduce_sum(diff.pow(2)) / num;
+
+  return {mean, var};
+}
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                       bool affine, float eps) {
+  auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
+  auto mul = at::rsqrt(var + eps) * gamma;
+
+  x.sub_(broadcast_to(mean, x));
+  x.mul_(broadcast_to(mul, x));
+  if (affine) x.add_(broadcast_to(bias, x));
+
+  return x;
+}
+
+std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                     bool affine, float eps) {
+  auto edz = reduce_sum(dz);
+  auto y = invert_affine(z, weight, bias, affine, eps);
+  auto eydz = reduce_sum(y * dz);
+
+  return {edz, eydz};
+}
+
+std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  auto y = invert_affine(z, weight, bias, affine, eps);
+  auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
+
+  auto num = count(z);
+  auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
+
+  auto dweight = at::empty(z.type(), {0});
+  auto dbias = at::empty(z.type(), {0});
+  if (affine) {
+    dweight = eydz * at::sign(weight);
+    dbias = edz;
+  }
+
+  return {dx, dweight, dbias};
+}
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
+    int64_t count = z.numel();
+    auto *_z = z.data<scalar_t>();
+    auto *_dz = dz.data<scalar_t>();
+
+    for (int64_t i = 0; i < count; ++i) {
+      if (_z[i] < 0) {
+        _z[i] *= 1 / slope;
+        _dz[i] *= slope;
+      }
+    }
+  }));
+}
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
+    int64_t count = z.numel();
+    auto *_z = z.data<scalar_t>();
+    auto *_dz = dz.data<scalar_t>();
+
+    for (int64_t i = 0; i < count; ++i) {
+      if (_z[i] < 0) {
+        _z[i] = log1p(_z[i]);
+        _dz[i] *= (_z[i] + 1.f);
+      }
+    }
+  }));
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cuda.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cuda.cu
new file mode 100644
index 0000000..9d4719c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn/src/inplace_abn_cuda.cu
@@ -0,0 +1,349 @@
+#include <ATen/ATen.h>
+
+#include <thrust/device_ptr.h>
+#include <thrust/transform.h>
+
+#include <vector>
+
+#include "common.h"
+#include "inplace_abn.h"
+
+// Checks
+#ifndef AT_CHECK
+  #define AT_CHECK AT_ASSERT
+#endif
+#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+
+// Utilities
+void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
+  num = x.size(0);
+  chn = x.size(1);
+  sp = 1;
+  for (int64_t i = 2; i < x.ndimension(); ++i)
+    sp *= x.size(i);
+}
+
+// Operations for reduce
+template<typename T>
+struct SumOp {
+  __device__ SumOp(const T *t, int c, int s)
+      : tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+    return tensor[(batch * chn + plane) * sp + n];
+  }
+  const T *tensor;
+  const int chn;
+  const int sp;
+};
+
+template<typename T>
+struct VarOp {
+  __device__ VarOp(T m, const T *t, int c, int s)
+      : mean(m), tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+    T val = tensor[(batch * chn + plane) * sp + n];
+    return (val - mean) * (val - mean);
+  }
+  const T mean;
+  const T *tensor;
+  const int chn;
+  const int sp;
+};
+
+template<typename T>
+struct GradOp {
+  __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
+      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
+  __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
+    T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
+    T _dz = dz[(batch * chn + plane) * sp + n];
+    return Pair<T>(_dz, _y * _dz);
+  }
+  const T weight;
+  const T bias;
+  const T *z;
+  const T *dz;
+  const int chn;
+  const int sp;
+};
+
+/***********
+ * mean_var
+ ***********/
+
+template<typename T>
+__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+  T norm = T(1) / T(num * sp);
+
+  T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, chn, sp) * norm;
+  __syncthreads();
+  T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, chn, sp) * norm;
+
+  if (threadIdx.x == 0) {
+    mean[plane] = _mean;
+    var[plane] = _var;
+  }
+}
+
+std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
+  CHECK_INPUT(x);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Prepare output tensors
+  auto mean = at::empty(x.type(), {chn});
+  auto var = at::empty(x.type(), {chn});
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
+    mean_var_kernel<scalar_t><<<blocks, threads>>>(
+        x.data<scalar_t>(),
+        mean.data<scalar_t>(),
+        var.data<scalar_t>(),
+        num, chn, sp);
+  }));
+
+  return {mean, var};
+}
+
+/**********
+ * forward
+ **********/
+
+template<typename T>
+__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
+                               bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _mean = mean[plane];
+  T _var = var[plane];
+  T _weight = affine ? abs(weight[plane]) + eps : T(1);
+  T _bias = affine ? bias[plane] : T(0);
+
+  T mul = rsqrt(_var + eps) * _weight;
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      T _x = x[(batch * chn + plane) * sp + n];
+      T _y = (_x - _mean) * mul + _bias;
+
+      x[(batch * chn + plane) * sp + n] = _y;
+    }
+  }
+}
+
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                        bool affine, float eps) {
+  CHECK_INPUT(x);
+  CHECK_INPUT(mean);
+  CHECK_INPUT(var);
+  CHECK_INPUT(weight);
+  CHECK_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
+    forward_kernel<scalar_t><<<blocks, threads>>>(
+        x.data<scalar_t>(),
+        mean.data<scalar_t>(),
+        var.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return x;
+}
+
+/***********
+ * edz_eydz
+ ***********/
+
+template<typename T>
+__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
+                                T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  T _bias = affine ? bias[plane] : 0.f;
+
+  Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp);
+  __syncthreads();
+
+  if (threadIdx.x == 0) {
+    edz[plane] = res.v1;
+    eydz[plane] = res.v2;
+  }
+}
+
+std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                      bool affine, float eps) {
+  CHECK_INPUT(z);
+  CHECK_INPUT(dz);
+  CHECK_INPUT(weight);
+  CHECK_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto edz = at::empty(z.type(), {chn});
+  auto eydz = at::empty(z.type(), {chn});
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
+    edz_eydz_kernel<scalar_t><<<blocks, threads>>>(
+        z.data<scalar_t>(),
+        dz.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        edz.data<scalar_t>(),
+        eydz.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return {edz, eydz};
+}
+
+/***********
+ * backward
+ ***********/
+
+template<typename T>
+__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
+                                const T *eydz, T *dx, T *dweight, T *dbias,
+                                bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  T _bias = affine ? bias[plane] : 0.f;
+  T _var = var[plane];
+  T _edz = edz[plane];
+  T _eydz = eydz[plane];
+
+  T _mul = _weight * rsqrt(_var + eps);
+  T count = T(num * sp);
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      T _dz = dz[(batch * chn + plane) * sp + n];
+      T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
+
+      dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
+    }
+  }
+
+  if (threadIdx.x == 0) {
+    if (affine) {
+      dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz;
+      dbias[plane] = _edz;
+    }
+  }
+}
+
+std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  CHECK_INPUT(z);
+  CHECK_INPUT(dz);
+  CHECK_INPUT(var);
+  CHECK_INPUT(weight);
+  CHECK_INPUT(bias);
+  CHECK_INPUT(edz);
+  CHECK_INPUT(eydz);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto dx = at::zeros_like(z);
+  auto dweight = at::zeros_like(weight);
+  auto dbias = at::zeros_like(bias);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
+    backward_kernel<scalar_t><<<blocks, threads>>>(
+        z.data<scalar_t>(),
+        dz.data<scalar_t>(),
+        var.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        edz.data<scalar_t>(),
+        eydz.data<scalar_t>(),
+        dx.data<scalar_t>(),
+        dweight.data<scalar_t>(),
+        dbias.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return {dx, dweight, dbias};
+}
+
+/**************
+ * activations
+ **************/
+
+template<typename T>
+inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
+  // Create thrust pointers
+  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
+  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
+
+  thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
+                       [slope] __device__ (const T& dz) { return dz * slope; },
+                       [] __device__ (const T& z) { return z < 0; });
+  thrust::transform_if(th_z, th_z + count, th_z,
+                       [slope] __device__ (const T& z) { return z / slope; },
+                       [] __device__ (const T& z) { return z < 0; });
+}
+
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
+  CHECK_INPUT(z);
+  CHECK_INPUT(dz);
+
+  int64_t count = z.numel();
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+    leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
+  }));
+}
+
+template<typename T>
+inline void elu_backward_impl(T *z, T *dz, int64_t count) {
+  // Create thrust pointers
+  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
+  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
+
+  thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz,
+                       [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
+                       [] __device__ (const T& z) { return z < 0; });
+  thrust::transform_if(th_z, th_z + count, th_z,
+                       [] __device__ (const T& z) { return log1p(z); },
+                       [] __device__ (const T& z) { return z < 0; });
+}
+
+void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
+  CHECK_INPUT(z);
+  CHECK_INPUT(dz);
+
+  int64_t count = z.numel();
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+    elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
+  }));
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/__init__.py
new file mode 100644
index 0000000..692bcf4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/__init__.py
@@ -0,0 +1,2 @@
+from .bn import ABN, InPlaceABN, InPlaceABNSync
+from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/bn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/bn.py
new file mode 100644
index 0000000..6196e22
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/bn.py
@@ -0,0 +1,129 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as functional
+
+try:
+    from queue import Queue
+except ImportError:
+    from Queue import Queue
+
+from lib.extensions.inplace_abn_1.functions import *
+
+class ABN(nn.Module):
+    """Activated Batch Normalization
+
+    This gathers a `BatchNorm2d` and an activation function in a single module
+    """
+
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+        """Creates an Activated Batch Normalization module
+
+        Parameters
+        ----------
+        num_features : int
+            Number of feature channels in the input and output.
+        eps : float
+            Small constant to prevent numerical issues.
+        momentum : float
+            Momentum factor applied to compute running statistics as.
+        affine : bool
+            If `True` apply learned scale and shift transformation after normalization.
+        activation : str
+            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+        slope : float
+            Negative slope for the `leaky_relu` activation.
+        """
+        super(ABN, self).__init__()
+        self.num_features = num_features
+        self.affine = affine
+        self.eps = eps
+        self.momentum = momentum
+        self.activation = activation
+        self.slope = slope
+        if self.affine:
+            self.weight = nn.Parameter(torch.ones(num_features))
+            self.bias = nn.Parameter(torch.zeros(num_features))
+        else:
+            self.register_parameter('weight', None)
+            self.register_parameter('bias', None)
+        self.register_buffer('running_mean', torch.zeros(num_features))
+        self.register_buffer('running_var', torch.ones(num_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.constant_(self.running_mean, 0)
+        nn.init.constant_(self.running_var, 1)
+        if self.affine:
+            nn.init.constant_(self.weight, 1)
+            nn.init.constant_(self.bias, 0)
+
+    def forward(self, x):
+        x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
+                                  self.training, self.momentum, self.eps)
+
+        if self.activation == ACT_RELU:
+            return functional.relu(x, inplace=True)
+        elif self.activation == ACT_LEAKY_RELU:
+            return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
+        elif self.activation == ACT_ELU:
+            return functional.elu(x, inplace=True)
+        else:
+            return x
+
+    def __repr__(self):
+        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+              ' affine={affine}, activation={activation}'
+        if self.activation == "leaky_relu":
+            rep += ', slope={slope})'
+        else:
+            rep += ')'
+        return rep.format(name=self.__class__.__name__, **self.__dict__)
+
+
+class InPlaceABN(ABN):
+    """InPlace Activated Batch Normalization"""
+
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+        """Creates an InPlace Activated Batch Normalization module
+
+        Parameters
+        ----------
+        num_features : int
+            Number of feature channels in the input and output.
+        eps : float
+            Small constant to prevent numerical issues.
+        momentum : float
+            Momentum factor applied to compute running statistics as.
+        affine : bool
+            If `True` apply learned scale and shift transformation after normalization.
+        activation : str
+            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+        slope : float
+            Negative slope for the `leaky_relu` activation.
+        """
+        super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
+
+    def forward(self, x):
+        return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
+                           self.training, self.momentum, self.eps, self.activation, self.slope)
+
+
+class InPlaceABNSync(ABN):
+    """InPlace Activated Batch Normalization with cross-GPU synchronization
+    This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
+    """
+
+    def forward(self, x):
+        return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
+                                   self.training, self.momentum, self.eps, self.activation, self.slope)
+
+    def __repr__(self):
+        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+              ' affine={affine}, activation={activation}'
+        if self.activation == "leaky_relu":
+            rep += ', slope={slope})'
+        else:
+            rep += ')'
+        return rep.format(name=self.__class__.__name__, **self.__dict__)
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/functions.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/functions.py
new file mode 100644
index 0000000..30df2c7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/functions.py
@@ -0,0 +1,242 @@
+from os import path
+import torch
+import torch.distributed as dist
+import torch.autograd as autograd
+import torch.cuda.comm as comm
+from torch.autograd.function import once_differentiable
+from torch.utils.cpp_extension import load
+
+_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
+_backend = load(name="inplace_abn",
+                extra_cflags=["-O3"],
+                sources=[path.join(_src_path, f) for f in [
+                    "inplace_abn.cpp",
+                    "inplace_abn_cpu.cpp",
+                    "inplace_abn_cuda.cu",
+                    "inplace_abn_cuda_half.cu"
+                ]],
+                extra_cuda_cflags=["--expt-extended-lambda"])
+
+# Activation names
+ACT_RELU = "relu"
+ACT_LEAKY_RELU = "leaky_relu"
+ACT_ELU = "elu"
+ACT_NONE = "none"
+
+
+def _check(fn, *args, **kwargs):
+    success = fn(*args, **kwargs)
+    if not success:
+        raise RuntimeError("CUDA Error encountered in {}".format(fn))
+
+
+def _broadcast_shape(x):
+    out_size = []
+    for i, s in enumerate(x.size()):
+        if i != 1:
+            out_size.append(1)
+        else:
+            out_size.append(s)
+    return out_size
+
+
+def _reduce(x):
+    if len(x.size()) == 2:
+        return x.sum(dim=0)
+    else:
+        n, c = x.size()[0:2]
+        return x.contiguous().view((n, c, -1)).sum(2).sum(0)
+
+
+def _count_samples(x):
+    count = 1
+    for i, s in enumerate(x.size()):
+        if i != 1:
+            count *= s
+    return count
+
+
+def _act_forward(ctx, x):
+    if ctx.activation == ACT_LEAKY_RELU:
+        _backend.leaky_relu_forward(x, ctx.slope)
+    elif ctx.activation == ACT_ELU:
+        _backend.elu_forward(x)
+    elif ctx.activation == ACT_NONE:
+        pass
+
+
+def _act_backward(ctx, x, dx):
+    if ctx.activation == ACT_LEAKY_RELU:
+        _backend.leaky_relu_backward(x, dx, ctx.slope)
+    elif ctx.activation == ACT_ELU:
+        _backend.elu_backward(x, dx)
+    elif ctx.activation == ACT_NONE:
+        pass
+
+
+class InPlaceABN(autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, running_mean, running_var,
+                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+        # Save context
+        ctx.training = training
+        ctx.momentum = momentum
+        ctx.eps = eps
+        ctx.activation = activation
+        ctx.slope = slope
+        ctx.affine = weight is not None and bias is not None
+
+        # Prepare inputs
+        count = _count_samples(x)
+        x = x.contiguous()
+        weight = weight.contiguous() if ctx.affine else x.new_empty(0, dtype=torch.float32)
+        bias = bias.contiguous() if ctx.affine else x.new_empty(0, dtype=torch.float32)
+
+        if ctx.training:
+            mean, var = _backend.mean_var(x)
+
+            # Update running stats
+            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+
+            # Mark in-place modified tensors
+            ctx.mark_dirty(x, running_mean, running_var)
+        else:
+            mean, var = running_mean.contiguous(), running_var.contiguous()
+            ctx.mark_dirty(x)
+
+        # BN forward + activation
+        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+        _act_forward(ctx, x)
+
+        # Output
+        ctx.var = var
+        ctx.save_for_backward(x, var, weight, bias)
+        return x
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dz):
+        z, var, weight, bias = ctx.saved_tensors
+        dz = dz.contiguous()
+
+        # Undo activation
+        _act_backward(ctx, z, dz)
+
+        if ctx.training:
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+        else:
+            # TODO: implement simplified CUDA backward for inference mode
+            edz = dz.new_zeros(dz.size(1))
+            eydz = dz.new_zeros(dz.size(1))
+
+        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        # dweight = eydz * weight.sign() if ctx.affine else None
+        dweight = eydz if ctx.affine else None
+        if dweight is not None:
+            dweight[weight < 0] *= -1
+        dbias = edz if ctx.affine else None
+
+        return dx, dweight, dbias, None, None, None, None, None, None, None
+
+
+class InPlaceABNSync(autograd.Function):
+    @classmethod
+    def forward(cls, ctx, x, weight, bias, running_mean, running_var,
+                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
+        # Save context
+        ctx.training = training
+        ctx.momentum = momentum
+        ctx.eps = eps
+        ctx.activation = activation
+        ctx.slope = slope
+        ctx.affine = weight is not None and bias is not None
+
+        # Prepare inputs
+        ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
+
+        # count = _count_samples(x)
+        batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)
+
+        x = x.contiguous()
+        weight = weight.contiguous() if ctx.affine else x.new_empty(0, dtype=torch.float32)
+        bias = bias.contiguous() if ctx.affine else x.new_empty(0, dtype=torch.float32)
+
+        if ctx.training:
+            mean, var = _backend.mean_var(x)
+            if ctx.world_size > 1:
+                # get global batch size
+                if equal_batches:
+                    batch_size *= ctx.world_size
+                else:
+                    dist.all_reduce(batch_size, dist.ReduceOp.SUM)
+
+                ctx.factor = x.shape[0] / float(batch_size.item())
+
+                mean_all = mean.clone() * ctx.factor
+                dist.all_reduce(mean_all, dist.ReduceOp.SUM)
+
+                var_all = (var + (mean - mean_all) ** 2) * ctx.factor
+                dist.all_reduce(var_all, dist.ReduceOp.SUM)
+
+                mean = mean_all
+                var = var_all
+
+            # Update running stats
+            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+            count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
+            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
+
+            # Mark in-place modified tensors
+            ctx.mark_dirty(x, running_mean, running_var)
+        else:
+            mean, var = running_mean.contiguous(), running_var.contiguous()
+            ctx.mark_dirty(x)
+
+        # BN forward + activation
+        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+        _act_forward(ctx, x)
+
+        # Output
+        ctx.var = var
+        ctx.save_for_backward(x, var, weight, bias)
+        return x
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, dz):
+        z, var, weight, bias = ctx.saved_tensors
+        dz = dz.contiguous()
+
+        # Undo activation
+        _act_backward(ctx, z, dz)
+
+        if ctx.training:
+            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+            edz_local = edz.clone()
+            eydz_local = eydz.clone()
+
+            if ctx.world_size > 1:
+                edz *= ctx.factor
+                dist.all_reduce(edz, dist.ReduceOp.SUM)
+
+                eydz *= ctx.factor
+                dist.all_reduce(eydz, dist.ReduceOp.SUM)
+        else:
+            edz_local = edz = dz.new_zeros(dz.size(1))
+            eydz_local = eydz = dz.new_zeros(dz.size(1))
+
+        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+        # dweight = eydz_local * weight.sign() if ctx.affine else None
+        dweight = eydz_local if ctx.affine else None
+        if dweight is not None:
+            dweight[weight < 0] *= -1
+        dbias = edz_local if ctx.affine else None
+
+        return dx, dweight, dbias, None, None, None, None, None, None, None
+
+
+inplace_abn = InPlaceABN.apply
+inplace_abn_sync = InPlaceABNSync.apply
+
+__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/misc.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/misc.py
new file mode 100644
index 0000000..3c50b69
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/misc.py
@@ -0,0 +1,21 @@
+import torch.nn as nn
+import torch
+import torch.distributed as dist
+
+class GlobalAvgPool2d(nn.Module):
+    def __init__(self):
+        """Global average pooling over the input's spatial dimensions"""
+        super(GlobalAvgPool2d, self).__init__()
+
+    def forward(self, inputs):
+        in_size = inputs.size()
+        return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
+
+class SingleGPU(nn.Module):
+    def __init__(self, module):
+        super(SingleGPU, self).__init__()
+        self.module=module
+
+    def forward(self, input):
+        return self.module(input.cuda(non_blocking=True))
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/checks.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/checks.h
new file mode 100644
index 0000000..e761a6f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/checks.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
+#ifndef AT_CHECK
+#define AT_CHECK AT_ASSERT
+#endif
+
+#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
+
+#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.cpp
new file mode 100644
index 0000000..0a6b112
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.cpp
@@ -0,0 +1,95 @@
+#include <torch/extension.h>
+
+#include <vector>
+
+#include "inplace_abn.h"
+
+std::vector<at::Tensor> mean_var(at::Tensor x) {
+  if (x.is_cuda()) {
+    if (x.type().scalarType() == at::ScalarType::Half) {
+      return mean_var_cuda_h(x);
+    } else {
+      return mean_var_cuda(x);
+    }
+  } else {
+    return mean_var_cpu(x);
+  }
+}
+
+at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                   bool affine, float eps) {
+  if (x.is_cuda()) {
+    if (x.type().scalarType() == at::ScalarType::Half) {
+      return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
+    } else {
+      return forward_cuda(x, mean, var, weight, bias, affine, eps);
+    }
+  } else {
+    return forward_cpu(x, mean, var, weight, bias, affine, eps);
+  }
+}
+
+std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                 bool affine, float eps) {
+  if (z.is_cuda()) {
+    if (z.type().scalarType() == at::ScalarType::Half) {
+      return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
+    } else {
+      return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
+	}
+  } else {
+    return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
+  }
+}
+
+at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                 at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  if (z.is_cuda()) {
+    if (z.type().scalarType() == at::ScalarType::Half) {
+      return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
+	} else {
+      return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
+    }
+  } else {
+    return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
+  }
+}
+
+void leaky_relu_forward(at::Tensor z, float slope) {
+  at::leaky_relu_(z, slope);
+}
+
+void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
+  if (z.is_cuda()) {
+    if (z.type().scalarType() == at::ScalarType::Half) {
+      return leaky_relu_backward_cuda_h(z, dz, slope);
+	} else {
+      return leaky_relu_backward_cuda(z, dz, slope);
+    }
+  } else {
+    return leaky_relu_backward_cpu(z, dz, slope);
+  }
+}
+
+void elu_forward(at::Tensor z) {
+  at::elu_(z);
+}
+
+void elu_backward(at::Tensor z, at::Tensor dz) {
+  if (z.is_cuda()) {
+    return elu_backward_cuda(z, dz);
+  } else {
+    return elu_backward_cpu(z, dz);
+  }
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("mean_var", &mean_var, "Mean and variance computation");
+  m.def("forward", &forward, "In-place forward computation");
+  m.def("edz_eydz", &edz_eydz, "First part of backward computation");
+  m.def("backward", &backward, "Second part of backward computation");
+  m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
+  m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
+  m.def("elu_forward", &elu_forward, "Elu forward computation");
+  m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.h
new file mode 100644
index 0000000..17afd11
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+#include <vector>
+
+std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
+std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
+std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                       bool affine, float eps);
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                        bool affine, float eps);
+at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                          bool affine, float eps);
+
+std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                     bool affine, float eps);
+std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                      bool affine, float eps);
+std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                        bool affine, float eps);
+
+at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                        at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
+void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz);
+void elu_backward_cuda(at::Tensor z, at::Tensor dz);
+
+static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
+  num = x.size(0);
+  chn = x.size(1);
+  sp = 1;
+  for (int64_t i = 2; i < x.ndimension(); ++i)
+    sp *= x.size(i);
+}
+
+/*
+ * Specialized CUDA reduction functions for BN
+ */
+#ifdef __CUDACC__
+
+#include "utils/cuda.cuh"
+
+template <typename T, typename Op>
+__device__ T reduce(Op op, int plane, int N, int S) {
+  T sum = (T)0;
+  for (int batch = 0; batch < N; ++batch) {
+    for (int x = threadIdx.x; x < S; x += blockDim.x) {
+      sum += op(batch, plane, x);
+    }
+  }
+
+  // sum over NumThreads within a warp
+  sum = warpSum(sum);
+
+  // 'transpose', and reduce within warp again
+  __shared__ T shared[32];
+  __syncthreads();
+  if (threadIdx.x % WARP_SIZE == 0) {
+    shared[threadIdx.x / WARP_SIZE] = sum;
+  }
+  if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
+    // zero out the other entries in shared
+    shared[threadIdx.x] = (T)0;
+  }
+  __syncthreads();
+  if (threadIdx.x / WARP_SIZE == 0) {
+    sum = warpSum(shared[threadIdx.x]);
+    if (threadIdx.x == 0) {
+      shared[0] = sum;
+    }
+  }
+  __syncthreads();
+
+  // Everyone picks it up, should be broadcast into the whole gradInput
+  return shared[0];
+}
+#endif
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cpu.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cpu.cpp
new file mode 100644
index 0000000..ffc6d38
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cpu.cpp
@@ -0,0 +1,119 @@
+#include <ATen/ATen.h>
+
+#include <vector>
+
+#include "utils/checks.h"
+#include "inplace_abn.h"
+
+at::Tensor reduce_sum(at::Tensor x) {
+  if (x.ndimension() == 2) {
+    return x.sum(0);
+  } else {
+    auto x_view = x.view({x.size(0), x.size(1), -1});
+    return x_view.sum(-1).sum(0);
+  }
+}
+
+at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
+  if (x.ndimension() == 2) {
+    return v;
+  } else {
+    std::vector<int64_t> broadcast_size = {1, -1};
+    for (int64_t i = 2; i < x.ndimension(); ++i)
+      broadcast_size.push_back(1);
+
+    return v.view(broadcast_size);
+  }
+}
+
+int64_t count(at::Tensor x) {
+  int64_t count = x.size(0);
+  for (int64_t i = 2; i < x.ndimension(); ++i)
+    count *= x.size(i);
+
+  return count;
+}
+
+at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
+  if (affine) {
+    return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
+  } else {
+    return z;
+  }
+}
+
+std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
+  auto num = count(x);
+  auto mean = reduce_sum(x) / num;
+  auto diff = x - broadcast_to(mean, x);
+  auto var = reduce_sum(diff.pow(2)) / num;
+
+  return {mean, var};
+}
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                       bool affine, float eps) {
+  auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
+  auto mul = at::rsqrt(var + eps) * gamma;
+
+  x.sub_(broadcast_to(mean, x));
+  x.mul_(broadcast_to(mul, x));
+  if (affine) x.add_(broadcast_to(bias, x));
+
+  return x;
+}
+
+std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                     bool affine, float eps) {
+  auto edz = reduce_sum(dz);
+  auto y = invert_affine(z, weight, bias, affine, eps);
+  auto eydz = reduce_sum(y * dz);
+
+  return {edz, eydz};
+}
+
+at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  auto y = invert_affine(z, weight, bias, affine, eps);
+  auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
+
+  auto num = count(z);
+  auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
+  return dx;
+}
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
+  CHECK_CPU_INPUT(z);
+  CHECK_CPU_INPUT(dz);
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
+    int64_t count = z.numel();
+    auto *_z = z.data<scalar_t>();
+    auto *_dz = dz.data<scalar_t>();
+
+    for (int64_t i = 0; i < count; ++i) {
+      if (_z[i] < 0) {
+        _z[i] *= 1 / slope;
+        _dz[i] *= slope;
+      }
+    }
+  }));
+}
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
+  CHECK_CPU_INPUT(z);
+  CHECK_CPU_INPUT(dz);
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
+    int64_t count = z.numel();
+    auto *_z = z.data<scalar_t>();
+    auto *_dz = dz.data<scalar_t>();
+
+    for (int64_t i = 0; i < count; ++i) {
+      if (_z[i] < 0) {
+        _z[i] = log1p(_z[i]);
+        _dz[i] *= (_z[i] + 1.f);
+      }
+    }
+  }));
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda.cu
new file mode 100644
index 0000000..b157b06
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda.cu
@@ -0,0 +1,333 @@
+#include <ATen/ATen.h>
+
+#include <thrust/device_ptr.h>
+#include <thrust/transform.h>
+
+#include <vector>
+
+#include "utils/checks.h"
+#include "utils/cuda.cuh"
+#include "inplace_abn.h"
+
+#include <ATen/cuda/CUDAContext.h>
+
+// Operations for reduce
+template<typename T>
+struct SumOp {
+  __device__ SumOp(const T *t, int c, int s)
+      : tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+    return tensor[(batch * chn + plane) * sp + n];
+  }
+  const T *tensor;
+  const int chn;
+  const int sp;
+};
+
+template<typename T>
+struct VarOp {
+  __device__ VarOp(T m, const T *t, int c, int s)
+      : mean(m), tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+    T val = tensor[(batch * chn + plane) * sp + n];
+    return (val - mean) * (val - mean);
+  }
+  const T mean;
+  const T *tensor;
+  const int chn;
+  const int sp;
+};
+
+template<typename T>
+struct GradOp {
+  __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
+      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
+  __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
+    T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
+    T _dz = dz[(batch * chn + plane) * sp + n];
+    return Pair<T>(_dz, _y * _dz);
+  }
+  const T weight;
+  const T bias;
+  const T *z;
+  const T *dz;
+  const int chn;
+  const int sp;
+};
+
+/***********
+ * mean_var
+ ***********/
+
+template<typename T>
+__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+  T norm = T(1) / T(num * sp);
+
+  T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
+  __syncthreads();
+  T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
+
+  if (threadIdx.x == 0) {
+    mean[plane] = _mean;
+    var[plane] = _var;
+  }
+}
+
+std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
+  CHECK_CUDA_INPUT(x);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Prepare output tensors
+  auto mean = at::empty({chn}, x.options());
+  auto var = at::empty({chn}, x.options());
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
+    mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        x.data<scalar_t>(),
+        mean.data<scalar_t>(),
+        var.data<scalar_t>(),
+        num, chn, sp);
+  }));
+
+  return {mean, var};
+}
+
+/**********
+ * forward
+ **********/
+
+template<typename T>
+__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
+                               bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _mean = mean[plane];
+  T _var = var[plane];
+  T _weight = affine ? abs(weight[plane]) + eps : T(1);
+  T _bias = affine ? bias[plane] : T(0);
+
+  T mul = rsqrt(_var + eps) * _weight;
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      T _x = x[(batch * chn + plane) * sp + n];
+      T _y = (_x - _mean) * mul + _bias;
+
+      x[(batch * chn + plane) * sp + n] = _y;
+    }
+  }
+}
+
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                        bool affine, float eps) {
+  CHECK_CUDA_INPUT(x);
+  CHECK_CUDA_INPUT(mean);
+  CHECK_CUDA_INPUT(var);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
+    forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        x.data<scalar_t>(),
+        mean.data<scalar_t>(),
+        var.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return x;
+}
+
+/***********
+ * edz_eydz
+ ***********/
+
+template<typename T>
+__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
+                                T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  T _bias = affine ? bias[plane] : 0.f;
+
+  Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
+  __syncthreads();
+
+  if (threadIdx.x == 0) {
+    edz[plane] = res.v1;
+    eydz[plane] = res.v2;
+  }
+}
+
+std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                      bool affine, float eps) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto edz = at::empty({chn}, z.options());
+  auto eydz = at::empty({chn}, z.options());
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
+    edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        z.data<scalar_t>(),
+        dz.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        edz.data<scalar_t>(),
+        eydz.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return {edz, eydz};
+}
+
+/***********
+ * backward
+ ***********/
+
+template<typename T>
+__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
+	                        const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  T _bias = affine ? bias[plane] : 0.f;
+  T _var = var[plane];
+  T _edz = edz[plane];
+  T _eydz = eydz[plane];
+
+  T _mul = _weight * rsqrt(_var + eps);
+  T count = T(num * sp);
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      T _dz = dz[(batch * chn + plane) * sp + n];
+      T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
+
+      dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
+    }
+  }
+}
+
+at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+  CHECK_CUDA_INPUT(var);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+  CHECK_CUDA_INPUT(edz);
+  CHECK_CUDA_INPUT(eydz);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto dx = at::zeros_like(z);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
+    backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        z.data<scalar_t>(),
+        dz.data<scalar_t>(),
+        var.data<scalar_t>(),
+        weight.data<scalar_t>(),
+        bias.data<scalar_t>(),
+        edz.data<scalar_t>(),
+        eydz.data<scalar_t>(),
+        dx.data<scalar_t>(),
+        affine, eps, num, chn, sp);
+  }));
+
+  return dx;
+}
+
+/**************
+ * activations
+ **************/
+
+template<typename T>
+inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
+  // Create thrust pointers
+  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
+  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
+
+  auto stream = at::cuda::getCurrentCUDAStream();
+  thrust::transform_if(thrust::cuda::par.on(stream),
+                       th_dz, th_dz + count, th_z, th_dz,
+                       [slope] __device__ (const T& dz) { return dz * slope; },
+                       [] __device__ (const T& z) { return z < 0; });
+  thrust::transform_if(thrust::cuda::par.on(stream),
+                       th_z, th_z + count, th_z,
+                       [slope] __device__ (const T& z) { return z / slope; },
+                       [] __device__ (const T& z) { return z < 0; });
+}
+
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+
+  int64_t count = z.numel();
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+    leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
+  }));
+}
+
+template<typename T>
+inline void elu_backward_impl(T *z, T *dz, int64_t count) {
+  // Create thrust pointers
+  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
+  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
+
+  auto stream = at::cuda::getCurrentCUDAStream();
+  thrust::transform_if(thrust::cuda::par.on(stream),
+                       th_dz, th_dz + count, th_z, th_z, th_dz,
+                       [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
+                       [] __device__ (const T& z) { return z < 0; });
+  thrust::transform_if(thrust::cuda::par.on(stream),
+                       th_z, th_z + count, th_z,
+                       [] __device__ (const T& z) { return log1p(z); },
+                       [] __device__ (const T& z) { return z < 0; });
+}
+
+void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+
+  int64_t count = z.numel();
+
+  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+    elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
+  }));
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda_half.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda_half.cu
new file mode 100644
index 0000000..bb63e73
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/inplace_abn_cuda_half.cu
@@ -0,0 +1,275 @@
+#include <ATen/ATen.h>
+
+#include <cuda_fp16.h>
+
+#include <vector>
+
+#include "utils/checks.h"
+#include "utils/cuda.cuh"
+#include "inplace_abn.h"
+
+#include <ATen/cuda/CUDAContext.h>
+
+// Operations for reduce
+struct SumOpH {
+  __device__ SumOpH(const half *t, int c, int s)
+      : tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
+    return __half2float(tensor[(batch * chn + plane) * sp + n]);
+  }
+  const half *tensor;
+  const int chn;
+  const int sp;
+};
+
+struct VarOpH {
+  __device__ VarOpH(float m, const half *t, int c, int s)
+      : mean(m), tensor(t), chn(c), sp(s) {}
+  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
+    const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
+    return (t - mean) * (t - mean);
+  }
+  const float mean;
+  const half *tensor;
+  const int chn;
+  const int sp;
+};
+
+struct GradOpH {
+  __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
+      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
+  __device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
+    float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
+    float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
+    return Pair<float>(_dz, _y * _dz);
+  }
+  const float weight;
+  const float bias;
+  const half *z;
+  const half *dz;
+  const int chn;
+  const int sp;
+};
+
+/***********
+ * mean_var
+ ***********/
+
+__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+  float norm = 1.f / static_cast<float>(num * sp);
+
+  float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
+  __syncthreads();
+  float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
+
+  if (threadIdx.x == 0) {
+    mean[plane] = _mean;
+    var[plane] = _var;
+  }
+}
+
+std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
+  CHECK_CUDA_INPUT(x);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Prepare output tensors
+  auto mean = at::empty({chn},x.options().dtype(at::kFloat));
+  auto var = at::empty({chn},x.options().dtype(at::kFloat));
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
+      reinterpret_cast<half*>(x.data<at::Half>()),
+      mean.data<float>(),
+      var.data<float>(),
+      num, chn, sp);
+
+  return {mean, var};
+}
+
+/**********
+ * forward
+ **********/
+
+__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
+                                 bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  const float _mean = mean[plane];
+  const float _var = var[plane];
+  const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  const float _bias = affine ? bias[plane] : 0.f;
+
+  const float mul = rsqrt(_var + eps) * _weight;
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      half *x_ptr = x + (batch * chn + plane) * sp + n;
+      float _x = __half2float(*x_ptr);
+      float _y = (_x - _mean) * mul + _bias;
+
+      *x_ptr = __float2half(_y);
+    }
+  }
+}
+
+at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                        bool affine, float eps) {
+  CHECK_CUDA_INPUT(x);
+  CHECK_CUDA_INPUT(mean);
+  CHECK_CUDA_INPUT(var);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(x, num, chn, sp);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  forward_kernel_h<<<blocks, threads, 0, stream>>>(
+      reinterpret_cast<half*>(x.data<at::Half>()),
+      mean.data<float>(),
+      var.data<float>(),
+      weight.data<float>(),
+      bias.data<float>(),
+      affine, eps, num, chn, sp);
+
+  return x;
+}
+
+__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
+                                float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  float _bias = affine ? bias[plane] : 0.f;
+
+  Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
+  __syncthreads();
+
+  if (threadIdx.x == 0) {
+    edz[plane] = res.v1;
+    eydz[plane] = res.v2;
+  }
+}
+
+std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+                                      bool affine, float eps) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto edz = at::empty({chn},z.options().dtype(at::kFloat));
+  auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
+        reinterpret_cast<half*>(z.data<at::Half>()),
+        reinterpret_cast<half*>(dz.data<at::Half>()),
+        weight.data<float>(),
+        bias.data<float>(),
+        edz.data<float>(),
+        eydz.data<float>(),
+        affine, eps, num, chn, sp);
+ 
+  return {edz, eydz};
+}
+
+__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
+                                  const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
+  int plane = blockIdx.x;
+
+  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+  float _bias = affine ? bias[plane] : 0.f;
+  float _var = var[plane];
+  float _edz = edz[plane];
+  float _eydz = eydz[plane];
+
+  float _mul = _weight * rsqrt(_var + eps);
+  float count = float(num * sp);
+
+  for (int batch = 0; batch < num; ++batch) {
+    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+      float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
+      float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
+
+      dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
+    }
+  }
+}
+
+at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+  CHECK_CUDA_INPUT(var);
+  CHECK_CUDA_INPUT(weight);
+  CHECK_CUDA_INPUT(bias);
+  CHECK_CUDA_INPUT(edz);
+  CHECK_CUDA_INPUT(eydz);
+
+  // Extract dimensions
+  int64_t num, chn, sp;
+  get_dims(z, num, chn, sp);
+
+  auto dx = at::zeros_like(z);
+
+  // Run kernel
+  dim3 blocks(chn);
+  dim3 threads(getNumThreads(sp));
+  auto stream = at::cuda::getCurrentCUDAStream();
+  backward_kernel_h<<<blocks, threads, 0, stream>>>(
+        reinterpret_cast<half*>(z.data<at::Half>()),
+        reinterpret_cast<half*>(dz.data<at::Half>()),
+        var.data<float>(),
+        weight.data<float>(),
+        bias.data<float>(),
+        edz.data<float>(),
+        eydz.data<float>(),
+        reinterpret_cast<half*>(dx.data<at::Half>()),
+        affine, eps, num, chn, sp);
+
+  return dx;
+}
+
+__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count;  i += blockDim.x * gridDim.x){
+    float _z = __half2float(z[i]);
+    if (_z < 0) {
+      dz[i] = __float2half(__half2float(dz[i]) * slope);
+      z[i] = __float2half(_z / slope);
+    }
+  }
+}
+
+void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
+  CHECK_CUDA_INPUT(z);
+  CHECK_CUDA_INPUT(dz);
+
+  int64_t count = z.numel();
+  dim3 threads(getNumThreads(count));
+  dim3 blocks = (count + threads.x - 1) / threads.x;
+  auto stream = at::cuda::getCurrentCUDAStream();
+  leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
+      reinterpret_cast<half*>(z.data<at::Half>()),
+      reinterpret_cast<half*>(dz.data<at::Half>()),
+      slope, count);
+}
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/checks.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/checks.h
new file mode 100644
index 0000000..e761a6f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/checks.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
+#ifndef AT_CHECK
+#define AT_CHECK AT_ASSERT
+#endif
+
+#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
+
+#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/common.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/common.h
new file mode 100644
index 0000000..e8403ee
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/common.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+/*
+ * Functions to share code between CPU and GPU
+ */
+
+#ifdef __CUDACC__
+// CUDA versions
+
+#define HOST_DEVICE __host__ __device__
+#define INLINE_HOST_DEVICE __host__ __device__ inline
+#define FLOOR(x) floor(x)
+
+#if __CUDA_ARCH__ >= 600
+// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
+#define ACCUM(x,y) atomicAdd_block(&(x),(y))
+#else
+// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
+// and use the known atomicCAS-based implementation for double
+template<typename data_t>
+__device__ inline data_t atomic_add(data_t *address, data_t val) {
+  return atomicAdd(address, val);
+}
+
+template<>
+__device__ inline double atomic_add(double *address, double val) {
+  unsigned long long int* address_as_ull = (unsigned long long int*)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+
+#define ACCUM(x,y) atomic_add(&(x),(y))
+#endif // #if __CUDA_ARCH__ >= 600
+
+#else
+// CPU versions
+
+#define HOST_DEVICE
+#define INLINE_HOST_DEVICE inline
+#define FLOOR(x) std::floor(x)
+#define ACCUM(x,y) (x) += (y)
+
+#endif // #ifdef __CUDACC__
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/cuda.cuh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/cuda.cuh
new file mode 100644
index 0000000..60c0023
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/inplace_abn_1/src/utils/cuda.cuh
@@ -0,0 +1,71 @@
+#pragma once
+
+/*
+ * General settings and functions
+ */
+const int WARP_SIZE = 32;
+const int MAX_BLOCK_SIZE = 1024;
+
+static int getNumThreads(int nElem) {
+  int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
+  for (int i = 0; i < 6; ++i) {
+    if (nElem <= threadSizes[i]) {
+      return threadSizes[i];
+    }
+  }
+  return MAX_BLOCK_SIZE;
+}
+
+/*
+ * Reduction utilities
+ */
+template <typename T>
+__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
+                                           unsigned int mask = 0xffffffff) {
+#if CUDART_VERSION >= 9000
+  return __shfl_xor_sync(mask, value, laneMask, width);
+#else
+  return __shfl_xor(value, laneMask, width);
+#endif
+}
+
+__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
+
+template<typename T>
+struct Pair {
+  T v1, v2;
+  __device__ Pair() {}
+  __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
+  __device__ Pair(T v) : v1(v), v2(v) {}
+  __device__ Pair(int v) : v1(v), v2(v) {}
+  __device__ Pair &operator+=(const Pair<T> &a) {
+    v1 += a.v1;
+    v2 += a.v2;
+    return *this;
+  }
+};
+
+template<typename T>
+static __device__ __forceinline__ T warpSum(T val) {
+#if __CUDA_ARCH__ >= 300
+  for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
+    val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
+  }
+#else
+  __shared__ T values[MAX_BLOCK_SIZE];
+  values[threadIdx.x] = val;
+  __threadfence_block();
+  const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
+  for (int i = 1; i < WARP_SIZE; i++) {
+    val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
+  }
+#endif
+  return val;
+}
+
+template<typename T>
+static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
+  value.v1 = warpSum(value.v1);
+  value.v2 = warpSum(value.v2);
+  return value;
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/pac.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/pac.py
new file mode 100644
index 0000000..e622f03
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/pac.py
@@ -0,0 +1,844 @@
+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+__all__ = ['PacConv2d', 'PacConvTranspose2d', 'PacPool2d',
+           'pacconv2d', 'pacconv_transpose2d', 'pacpool2d', 'packernel2d', 'nd2col']
+
+import math
+from numbers import Number
+from itertools import repeat
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd.function import Function, once_differentiable
+from torch.nn.parameter import Parameter
+from torch.nn.modules.utils import _pair
+from torch._thnn import type2backend
+
+try:
+    import pyinn as P
+
+    has_pyinn = True
+except ImportError:
+    P = None
+    has_pyinn = False
+    pass
+
+
+def _neg_idx(idx):
+    return None if idx == 0 else -idx
+
+
+def np_gaussian_2d(width, sigma=-1):
+    '''Truncated 2D Gaussian filter'''
+    assert width % 2 == 1
+    if sigma <= 0:
+        sigma = float(width) / 4
+
+    r = np.arange(-(width // 2), (width // 2) + 1, dtype=np.float32)
+    gaussian_1d = np.exp(-0.5 * r * r / (sigma * sigma))
+    gaussian_2d = gaussian_1d.reshape(-1, 1) * gaussian_1d
+    gaussian_2d /= gaussian_2d.sum()
+
+    return gaussian_2d
+
+
+def nd2col(input_nd, kernel_size, stride=1, padding=0, output_padding=0, dilation=1, transposed=False,
+           use_pyinn_if_possible=False):
+    """
+    Shape:
+        - Input: :math:`(N, C, L_{in})`
+        - Output: :math:`(N, C, *kernel_size, *L_{out})` where
+          :math:`L_{out} = floor((L_{in} + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)` for non-transposed
+          :math:`L_{out} = (L_{in} - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + 1 + output_padding` for transposed
+    """
+    n_dims = len(input_nd.shape[2:])
+    kernel_size = (kernel_size,) * n_dims if isinstance(kernel_size, Number) else kernel_size
+    stride = (stride,) * n_dims if isinstance(stride, Number) else stride
+    padding = (padding,) * n_dims if isinstance(padding, Number) else padding
+    output_padding = (output_padding,) * n_dims if isinstance(output_padding, Number) else output_padding
+    dilation = (dilation,) * n_dims if isinstance(dilation, Number) else dilation
+
+    if transposed:
+        assert n_dims == 2, 'Only 2D is supported for fractional strides.'
+        w_one = input_nd.new_ones(1, 1, 1, 1)
+        pad = [(k - 1) * d - p for (k, d, p) in zip(kernel_size, dilation, padding)]
+        input_nd = F.conv_transpose2d(input_nd, w_one, stride=stride)
+        input_nd = F.pad(input_nd, (pad[1], pad[1] + output_padding[1], pad[0], pad[0] + output_padding[0]))
+        stride = _pair(1)
+        padding = _pair(0)
+
+    (bs, nch), in_sz = input_nd.shape[:2], input_nd.shape[2:]
+    out_sz = tuple([((i + 2 * p - d * (k - 1) - 1) // s + 1)
+                    for (i, k, d, p, s) in zip(in_sz, kernel_size, dilation, padding, stride)])
+    # Use PyINN if possible (about 15% faster) TODO confirm the speed-up
+    if n_dims == 2 and dilation == 1 and has_pyinn and torch.cuda.is_available() and use_pyinn_if_possible:
+        output = P.im2col(input_nd, kernel_size, stride, padding)
+    else:
+        output = F.unfold(input_nd, kernel_size, dilation, padding, stride)
+        out_shape = (bs, nch) + tuple(kernel_size) + out_sz
+        output = output.view(*out_shape).contiguous()
+    return output
+
+
+class GaussKernel2dFn(Function):
+    @staticmethod
+    def forward(ctx, input, kernel_size, stride, padding, dilation, channel_wise):
+        ctx.kernel_size = _pair(kernel_size)
+        ctx.dilation = _pair(dilation)
+        ctx.padding = _pair(padding)
+        ctx.stride = _pair(stride)
+        bs, ch, in_h, in_w = input.shape
+        out_h = (in_h + 2 * ctx.padding[0] - ctx.dilation[0] * (ctx.kernel_size[0] - 1) - 1) // ctx.stride[0] + 1
+        out_w = (in_w + 2 * ctx.padding[1] - ctx.dilation[1] * (ctx.kernel_size[1] - 1) - 1) // ctx.stride[1] + 1
+        cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+        cols = cols.view(bs, ch, ctx.kernel_size[0], ctx.kernel_size[1], out_h, out_w)
+        center_y, center_x = ctx.kernel_size[0] // 2, ctx.kernel_size[1] // 2
+        feat_0 = cols.contiguous()[:, :, center_y:center_y + 1, center_x:center_x + 1, :, :]
+        diff_sq = (cols - feat_0).pow(2)
+        if not channel_wise:
+            diff_sq = diff_sq.sum(dim=1, keepdim=True)
+        output = torch.exp(-0.5 * diff_sq)
+        ctx._backend = type2backend[input.type()]
+        ctx.save_for_backward(input, output)
+
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, output = ctx.saved_tensors
+        bs, ch, in_h, in_w = input.shape
+        out_h, out_w = output.shape[-2:]
+        cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+        cols = cols.view(bs, ch, ctx.kernel_size[0], ctx.kernel_size[1], out_h, out_w)
+        center_y, center_x = ctx.kernel_size[0] // 2, ctx.kernel_size[1] // 2
+        feat_0 = cols.contiguous()[:, :, center_y:center_y + 1, center_x:center_x + 1, :, :]
+        diff = cols - feat_0
+        grad = -0.5 * grad_output * output
+        grad_diff = grad.expand_as(cols) * (2 * diff)
+        grad_diff[:, :, center_y:center_y + 1, center_x:center_x + 1, :, :] -= \
+            grad_diff.sum(dim=2, keepdim=True).sum(dim=3, keepdim=True)
+        grad_input = grad_output.new()
+        ctx._backend.Im2Col_updateGradInput(ctx._backend.library_state,
+                                            grad_diff.view(bs, ch * ctx.kernel_size[0] * ctx.kernel_size[1], -1),
+                                            grad_input,
+                                            in_h, in_w,
+                                            ctx.kernel_size[0], ctx.kernel_size[1],
+                                            ctx.dilation[0], ctx.dilation[1],
+                                            ctx.padding[0], ctx.padding[1],
+                                            ctx.stride[0], ctx.stride[1])
+
+        return grad_input, None, None, None, None, None
+
+
+class PacConv2dFn(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, weight, bias=None, stride=1, padding=0, dilation=1, shared_filters=False):
+        (bs, ch), in_sz = input.shape[:2], input.shape[2:]
+        if kernel.size(1) > 1:
+            raise ValueError('Non-singleton channel is not allowed for kernel.')
+        ctx.input_size = in_sz
+        ctx.in_ch = ch
+        ctx.kernel_size = tuple(weight.shape[-2:])
+        ctx.dilation = _pair(dilation)
+        ctx.padding = _pair(padding)
+        ctx.stride = _pair(stride)
+        ctx.shared_filters = shared_filters
+        ctx.save_for_backward(input if (ctx.needs_input_grad[1] or ctx.needs_input_grad[2]) else None,
+                              kernel if (ctx.needs_input_grad[0] or ctx.needs_input_grad[2]) else None,
+                              weight if (ctx.needs_input_grad[0] or ctx.needs_input_grad[1]) else None)
+        ctx._backend = type2backend[input.type()]
+
+        cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+
+        in_mul_k = cols.view(bs, ch, *kernel.shape[2:]) * kernel
+
+        # matrix multiplication, written as an einsum to avoid repeated view() and permute()
+        if shared_filters:
+            output = torch.einsum('ijklmn,zykl->ijmn', (in_mul_k, weight))
+        else:
+            output = torch.einsum('ijklmn,ojkl->iomn', (in_mul_k, weight))
+
+        if bias is not None:
+            output += bias.view(1, -1, 1, 1)
+
+        return output.clone()  # TODO understand why a .clone() is needed here
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        grad_input = grad_kernel = grad_weight = grad_bias = None
+        (bs, out_ch), out_sz = grad_output.shape[:2], grad_output.shape[2:]
+        in_ch = ctx.in_ch
+
+        input, kernel, weight = ctx.saved_tensors
+        if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+            if ctx.shared_filters:
+                grad_in_mul_k = grad_output.view(bs, out_ch, 1, 1, out_sz[0], out_sz[1]) \
+                                * weight.view(ctx.kernel_size[0], ctx.kernel_size[1], 1, 1)
+            else:
+                grad_in_mul_k = torch.einsum('iomn,ojkl->ijklmn', (grad_output, weight))
+        if ctx.needs_input_grad[1] or ctx.needs_input_grad[2]:
+            in_cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+            in_cols = in_cols.view(bs, in_ch, ctx.kernel_size[0], ctx.kernel_size[1], out_sz[0], out_sz[1])
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_output.new()
+            grad_im2col_output = grad_in_mul_k * kernel
+            grad_im2col_output = grad_im2col_output.view(bs, -1, out_sz[0] * out_sz[1])
+            ctx._backend.Im2Col_updateGradInput(ctx._backend.library_state,
+                                                grad_im2col_output,
+                                                grad_input,
+                                                ctx.input_size[0], ctx.input_size[1],
+                                                ctx.kernel_size[0], ctx.kernel_size[1],
+                                                ctx.dilation[0], ctx.dilation[1],
+                                                ctx.padding[0], ctx.padding[1],
+                                                ctx.stride[0], ctx.stride[1])
+        if ctx.needs_input_grad[1]:
+            grad_kernel = in_cols * grad_in_mul_k
+            grad_kernel = grad_kernel.sum(dim=1, keepdim=True)
+        if ctx.needs_input_grad[2]:
+            in_mul_k = in_cols * kernel
+            if ctx.shared_filters:
+                grad_weight = torch.einsum('ijmn,ijklmn->kl', (grad_output, in_mul_k))
+                grad_weight = grad_weight.view(1, 1, ctx.kernel_size[0], ctx.kernel_size[1]).contiguous()
+            else:
+                grad_weight = torch.einsum('iomn,ijklmn->ojkl', (grad_output, in_mul_k))
+        if ctx.needs_input_grad[3]:
+            grad_bias = torch.einsum('iomn->o', (grad_output,))
+
+        return grad_input, grad_kernel, grad_weight, grad_bias, None, None, None, None
+
+
+class PacConvTranspose2dFn(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, weight, bias=None, stride=1, padding=0, output_padding=0, dilation=1,
+                shared_filters=False):
+        (bs, ch), in_sz = input.shape[:2], input.shape[2:]
+        if kernel.size(1) > 1:
+            raise ValueError('Non-singleton channel is not allowed for kernel.')
+        ctx.in_ch = ch
+        ctx.kernel_size = tuple(weight.shape[-2:])
+        ctx.dilation = _pair(dilation)
+        ctx.padding = _pair(padding)
+        ctx.output_padding = _pair(output_padding)
+        ctx.stride = _pair(stride)
+        ctx.shared_filters = shared_filters
+        ctx.save_for_backward(input if (ctx.needs_input_grad[1] or ctx.needs_input_grad[2]) else None,
+                              kernel if (ctx.needs_input_grad[0] or ctx.needs_input_grad[2]) else None,
+                              weight if (ctx.needs_input_grad[0] or ctx.needs_input_grad[1]) else None)
+        ctx._backend = type2backend[input.type()]
+
+        w = input.new_ones((ch, 1, 1, 1))
+        x = F.conv_transpose2d(input, w, stride=stride, groups=ch)
+        pad = [(k - 1) * d - p for (k, d, p) in zip(ctx.kernel_size, ctx.dilation, ctx.padding)]
+        x = F.pad(x, (pad[1], pad[1] + ctx.output_padding[1], pad[0], pad[0] + ctx.output_padding[0]))
+
+        cols = F.unfold(x, ctx.kernel_size, ctx.dilation, _pair(0), _pair(1))
+
+        in_mul_k = cols.view(bs, ch, *kernel.shape[2:]) * kernel
+
+        # matrix multiplication, written as an einsum to avoid repeated view() and permute()
+        if shared_filters:
+            output = torch.einsum('ijklmn,jokl->iomn', (in_mul_k, weight))
+        else:
+            output = torch.einsum('ijklmn,jokl->iomn', (in_mul_k, weight))
+
+        if bias is not None:
+            output += bias.view(1, -1, 1, 1)
+
+        return output.clone()  # TODO understand why a .clone() is needed here
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        grad_input = grad_kernel = grad_weight = grad_bias = None
+        (bs, out_ch), out_sz = grad_output.shape[:2], grad_output.shape[2:]
+        in_ch = ctx.in_ch
+        pad = [(k - 1) * d - p for (k, d, p) in zip(ctx.kernel_size, ctx.dilation, ctx.padding)]
+        pad = [(p, p + op) for (p, op) in zip(pad, ctx.output_padding)]
+
+        input, kernel, weight = ctx.saved_tensors
+        if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
+            if ctx.shared_filters:
+                grad_in_mul_k = grad_output.view(bs, out_ch, 1, 1, out_sz[0], out_sz[1]) \
+                                * weight.view(ctx.kernel_size[0], ctx.kernel_size[1], 1, 1)
+            else:
+                grad_in_mul_k = torch.einsum('iomn,jokl->ijklmn', (grad_output, weight))
+        if ctx.needs_input_grad[1] or ctx.needs_input_grad[2]:
+            w = input.new_ones((in_ch, 1, 1, 1))
+            x = F.conv_transpose2d(input, w, stride=ctx.stride, groups=in_ch)
+            x = F.pad(x, (pad[1][0], pad[1][1], pad[0][0], pad[0][1]))
+            in_cols = F.unfold(x, ctx.kernel_size, ctx.dilation, _pair(0), _pair(1))
+            in_cols = in_cols.view(bs, in_ch, ctx.kernel_size[0], ctx.kernel_size[1], out_sz[0], out_sz[1])
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_output.new()
+            grad_im2col_output = grad_in_mul_k * kernel
+            grad_im2col_output = grad_im2col_output.view(bs, -1, out_sz[0] * out_sz[1])
+            im2col_input_sz = [o + (k - 1) * d for (o, k, d) in zip(out_sz, ctx.kernel_size, ctx.dilation)]
+            ctx._backend.Im2Col_updateGradInput(ctx._backend.library_state,
+                                                grad_im2col_output,
+                                                grad_input,
+                                                im2col_input_sz[0], im2col_input_sz[1],
+                                                ctx.kernel_size[0], ctx.kernel_size[1],
+                                                ctx.dilation[0], ctx.dilation[1],
+                                                0, 0,
+                                                1, 1)
+            grad_input = grad_input[:, :, pad[0][0]:-pad[0][1]:ctx.stride[0], pad[1][0]:-pad[1][1]:ctx.stride[1]]
+        if ctx.needs_input_grad[1]:
+            grad_kernel = in_cols * grad_in_mul_k
+            grad_kernel = grad_kernel.sum(dim=1, keepdim=True)
+        if ctx.needs_input_grad[2]:
+            in_mul_k = in_cols * kernel
+            if ctx.shared_filters:
+                grad_weight = torch.einsum('ijmn,ijklmn->kl', (grad_output, in_mul_k))
+                grad_weight = grad_weight.view(1, 1, ctx.kernel_size[0], ctx.kernel_size[1]).contiguous()
+            else:
+                grad_weight = torch.einsum('iomn,ijklmn->jokl', (grad_output, in_mul_k))
+        if ctx.needs_input_grad[3]:
+            grad_bias = torch.einsum('iomn->o', (grad_output,))
+        return grad_input, grad_kernel, grad_weight, grad_bias, None, None, None, None, None
+
+
+class PacPool2dFn(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, kernel_size, stride=1, padding=0, dilation=1):
+        (bs, ch), in_sz = input.shape[:2], input.shape[2:]
+        if kernel.size(1) > 1 and kernel.size(1) != ch:
+            raise ValueError('Incompatible input and kernel sizes.')
+        ctx.input_size = in_sz
+        ctx.kernel_size = _pair(kernel_size)
+        ctx.kernel_ch = kernel.size(1)
+        ctx.dilation = _pair(dilation)
+        ctx.padding = _pair(padding)
+        ctx.stride = _pair(stride)
+        ctx.save_for_backward(input if ctx.needs_input_grad[1] else None,
+                              kernel if ctx.needs_input_grad[0] else None)
+        ctx._backend = type2backend[input.type()]
+
+        cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+
+        output = cols.view(bs, ch, *kernel.shape[2:]) * kernel
+        output = torch.einsum('ijklmn->ijmn', (output,))
+
+        return output.clone()  # TODO check whether a .clone() is needed here
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        input, kernel = ctx.saved_tensors
+        grad_input = grad_kernel = None
+        (bs, ch), out_sz = grad_output.shape[:2], grad_output.shape[2:]
+        if ctx.needs_input_grad[0]:
+            grad_input = grad_output.new()
+            grad_im2col_output = torch.einsum('ijmn,izklmn->ijklmn', (grad_output, kernel))
+            grad_im2col_output = grad_im2col_output.view(bs, -1, out_sz[0] * out_sz[1])
+            ctx._backend.Im2Col_updateGradInput(ctx._backend.library_state,
+                                                grad_im2col_output,
+                                                grad_input,
+                                                ctx.input_size[0], ctx.input_size[1],
+                                                ctx.kernel_size[0], ctx.kernel_size[1],
+                                                ctx.dilation[0], ctx.dilation[1],
+                                                ctx.padding[0], ctx.padding[1],
+                                                ctx.stride[0], ctx.stride[1])
+        if ctx.needs_input_grad[1]:
+            cols = F.unfold(input, ctx.kernel_size, ctx.dilation, ctx.padding, ctx.stride)
+            cols = cols.view(bs, ch, ctx.kernel_size[0], ctx.kernel_size[1], out_sz[0], out_sz[1])
+            grad_kernel = torch.einsum('ijmn,ijklmn->ijklmn', (grad_output, cols))
+            if ctx.kernel_ch == 1:
+                grad_kernel = grad_kernel.sum(dim=1, keepdim=True)
+
+        return grad_input, grad_kernel, None, None, None, None
+
+
+def packernel2d(input, mask=None, kernel_size=0, stride=1, padding=0, output_padding=0, dilation=1,
+                kernel_type='gaussian', smooth_kernel_type='none', smooth_kernel=None, inv_alpha=None, inv_lambda=None,
+                channel_wise=False, normalize_kernel=False, transposed=False, native_impl=False):
+    kernel_size = _pair(kernel_size)
+    dilation = _pair(dilation)
+    padding = _pair(padding)
+    output_padding = _pair(output_padding)
+    stride = _pair(stride)
+    output_mask = False if mask is None else True
+    norm = None
+
+    if mask is not None and mask.dtype != input.dtype:
+        mask = torch.tensor(mask, dtype=input.dtype, device=input.device)
+
+    if transposed:
+        in_sz = tuple(int((o - op - 1 - (k - 1) * d + 2 * p) // s) + 1 for (o, k, s, p, op, d) in
+                      zip(input.shape[-2:], kernel_size, stride, padding, output_padding, dilation))
+    else:
+        in_sz = input.shape[-2:]
+
+    if mask is not None or normalize_kernel:
+        mask_pattern = input.new_ones(1, 1, *in_sz)
+        mask_pattern = nd2col(mask_pattern, kernel_size, stride=stride, padding=padding, output_padding=output_padding,
+                              dilation=dilation, transposed=transposed)
+        if mask is not None:
+            mask = nd2col(mask, kernel_size, stride=stride, padding=padding, output_padding=output_padding,
+                          dilation=dilation, transposed=transposed)
+            if not normalize_kernel:
+                norm = mask.sum(dim=2, keepdim=True).sum(dim=3, keepdim=True) \
+                       / mask_pattern.sum(dim=2, keepdim=True).sum(dim=3, keepdim=True)
+        else:
+            mask = mask_pattern
+
+    if transposed:
+        stride = _pair(1)
+        padding = tuple((k - 1) * d // 2 for (k, d) in zip(kernel_size, dilation))
+
+    if native_impl:
+        bs, k_ch, in_h, in_w = input.shape
+
+        x = nd2col(input, kernel_size, stride=stride, padding=padding, dilation=dilation)
+        x = x.view(bs, k_ch, -1, *x.shape[-2:]).contiguous()
+
+        if smooth_kernel_type == 'none':
+            self_idx = kernel_size[0] * kernel_size[1] // 2
+            feat_0 = x[:, :, self_idx:self_idx + 1, :, :]
+        else:
+            smooth_kernel_size = smooth_kernel.shape[2:]
+            smooth_padding = (int(padding[0] - (kernel_size[0] - smooth_kernel_size[0]) / 2),
+                              int(padding[1] - (kernel_size[1] - smooth_kernel_size[1]) / 2))
+            crop = tuple(-1 * np.minimum(0, smooth_padding))
+            input_for_kernel_crop = input.view(-1, 1, in_h, in_w)[:, :,
+                                    crop[0]:_neg_idx(crop[0]), crop[1]:_neg_idx(crop[1])]
+            smoothed = F.conv2d(input_for_kernel_crop, smooth_kernel,
+                                stride=stride, padding=tuple(np.maximum(0, smooth_padding)))
+            feat_0 = smoothed.view(bs, k_ch, 1, *x.shape[-2:])
+        x = x - feat_0
+        if kernel_type.find('_asym') >= 0:
+            x = F.relu(x, inplace=True)
+        # x.pow_(2)  # this causes an autograd issue in pytorch>0.4
+        x = x * x
+        if not channel_wise:
+            x = torch.sum(x, dim=1, keepdim=True)
+        if kernel_type == 'gaussian':
+            x = torch.exp_(x.mul_(-0.5))  # TODO profiling for identifying the culprit of 5x slow down
+            # x = torch.exp(-0.5 * x)
+        elif kernel_type.startswith('inv_'):
+            epsilon = 1e-4
+            x = inv_alpha.view(1, -1, 1, 1, 1) \
+                + torch.pow(x + epsilon, 0.5 * inv_lambda.view(1, -1, 1, 1, 1))
+        else:
+            raise ValueError()
+        output = x.view(*(x.shape[:2] + tuple(kernel_size) + x.shape[-2:])).contiguous()
+    else:
+        assert (smooth_kernel_type == 'none' and
+                kernel_type == 'gaussian')
+        output = GaussKernel2dFn.apply(input, kernel_size, stride, padding, dilation, channel_wise)
+
+    if mask is not None:
+        output = output * mask  # avoid numerical issue on masked positions
+
+    if normalize_kernel:
+        norm = output.sum(dim=2, keepdim=True).sum(dim=3, keepdim=True)
+
+    if norm is not None:
+        empty_mask = (norm == 0)
+        output = output / (norm + torch.tensor(empty_mask, dtype=input.dtype, device=input.device))
+        output_mask = (1 - empty_mask) if output_mask else None
+    else:
+        output_mask = None
+
+    return output, output_mask
+
+
+def pacconv2d(input, kernel, weight, bias=None, stride=1, padding=0, dilation=1, shared_filters=False,
+              native_impl=False):
+    kernel_size = tuple(weight.shape[-2:])
+    stride = _pair(stride)
+    padding = _pair(padding)
+    dilation = _pair(dilation)
+
+    if native_impl:
+        # im2col on input
+        im_cols = nd2col(input, kernel_size, stride=stride, padding=padding, dilation=dilation)
+
+        # main computation
+        if shared_filters:
+            output = torch.einsum('ijklmn,zykl->ijmn', (im_cols * kernel, weight))
+        else:
+            output = torch.einsum('ijklmn,ojkl->iomn', (im_cols * kernel, weight))
+
+        if bias is not None:
+            output += bias.view(1, -1, 1, 1)
+    else:
+        output = PacConv2dFn.apply(input, kernel, weight, bias, stride, padding, dilation, shared_filters)
+
+    return output
+
+
+def pacconv_transpose2d(input, kernel, weight, bias=None, stride=1, padding=0, output_padding=0, dilation=1,
+                        shared_filters=False, native_impl=False):
+    kernel_size = tuple(weight.shape[-2:])
+    stride = _pair(stride)
+    padding = _pair(padding)
+    output_padding = _pair(output_padding)
+    dilation = _pair(dilation)
+
+    if native_impl:
+        ch = input.shape[1]
+        w = input.new_ones((ch, 1, 1, 1))
+        x = F.conv_transpose2d(input, w, stride=stride, groups=ch)
+        pad = [(kernel_size[i] - 1) * dilation[i] - padding[i] for i in range(2)]
+        x = F.pad(x, (pad[1], pad[1] + output_padding[1], pad[0], pad[0] + output_padding[0]))
+        output = pacconv2d(x, kernel, weight.permute(1, 0, 2, 3), bias, dilation=dilation,
+                           shared_filters=shared_filters, native_impl=True)
+    else:
+        output = PacConvTranspose2dFn.apply(input, kernel, weight, bias, stride, padding, output_padding, dilation,
+                                            shared_filters)
+
+    return output
+
+
+def pacpool2d(input, kernel, kernel_size, stride=1, padding=0, dilation=1, native_impl=False):
+    kernel_size = _pair(kernel_size)
+    stride = _pair(stride)
+    padding = _pair(padding)
+    dilation = _pair(dilation)
+
+    if native_impl:
+        bs, in_ch, in_h, in_w = input.shape
+        out_h = (in_h + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[0] + 1
+        out_w = (in_w + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[1] + 1
+
+        # im2col on input
+        im_cols = nd2col(input, kernel_size, stride=stride, padding=padding, dilation=dilation)
+
+        # main computation
+        im_cols *= kernel
+        output = im_cols.view(bs, in_ch, -1, out_h, out_w).sum(dim=2, keepdim=False)
+    else:
+        output = PacPool2dFn.apply(input, kernel, kernel_size, stride, padding, dilation)
+
+    return output
+
+
+class _PacConvNd(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride,
+                 padding, dilation, transposed, output_padding, bias,
+                 pool_only, kernel_type, smooth_kernel_type,
+                 channel_wise, normalize_kernel, shared_filters, filler):
+        super(_PacConvNd, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.transposed = transposed
+        self.output_padding = output_padding
+        self.pool_only = pool_only
+        self.kernel_type = kernel_type
+        self.smooth_kernel_type = smooth_kernel_type
+        self.channel_wise = channel_wise
+        self.normalize_kernel = normalize_kernel
+        self.shared_filters = shared_filters
+        self.filler = filler
+        if any([k % 2 != 1 for k in kernel_size]):
+            raise ValueError('kernel_size only accept odd numbers')
+        if smooth_kernel_type.find('_') >= 0 and int(smooth_kernel_type[smooth_kernel_type.rfind('_') + 1:]) % 2 != 1:
+            raise ValueError('smooth_kernel_type only accept kernels of odd widths')
+        if shared_filters:
+            assert in_channels == out_channels, 'when specifying shared_filters, number of channels should not change'
+        if any([p > d * (k - 1) / 2 for (p, d, k) in zip(padding, dilation, kernel_size)]):
+            # raise ValueError('padding ({}) too large'.format(padding))
+            pass  # TODO verify that this indeed won't cause issues
+        if not pool_only:
+            if self.filler in {'pool', 'crf_pool'}:
+                assert shared_filters
+                self.register_buffer('weight', torch.ones(1, 1, *kernel_size))
+                if self.filler == 'crf_pool':
+                    self.weight[(0, 0) + tuple(k // 2 for k in kernel_size)] = 0  # Eq.5, DenseCRF
+            elif shared_filters:
+                self.weight = Parameter(torch.Tensor(1, 1, *kernel_size))
+            elif transposed:
+                self.weight = Parameter(torch.Tensor(in_channels, out_channels, *kernel_size))
+            else:
+                self.weight = Parameter(torch.Tensor(out_channels, in_channels, *kernel_size))
+            if bias:
+                self.bias = Parameter(torch.Tensor(out_channels))
+            else:
+                self.register_parameter('bias', None)
+        if kernel_type.startswith('inv_'):
+            self.inv_alpha_init = float(kernel_type.split('_')[1])
+            self.inv_lambda_init = float(kernel_type.split('_')[2])
+            if self.channel_wise and kernel_type.find('_fixed') < 0:
+                if out_channels <= 0:
+                    raise ValueError('out_channels needed for channel_wise {}'.format(kernel_type))
+                inv_alpha = self.inv_alpha_init * torch.ones(out_channels)
+                inv_lambda = self.inv_lambda_init * torch.ones(out_channels)
+            else:
+                inv_alpha = torch.tensor(float(self.inv_alpha_init))
+                inv_lambda = torch.tensor(float(self.inv_lambda_init))
+            if kernel_type.find('_fixed') < 0:
+                self.register_parameter('inv_alpha', Parameter(inv_alpha))
+                self.register_parameter('inv_lambda', Parameter(inv_lambda))
+            else:
+                self.register_buffer('inv_alpha', inv_alpha)
+                self.register_buffer('inv_lambda', inv_lambda)
+        elif kernel_type != 'gaussian':
+            raise ValueError('kernel_type set to invalid value ({})'.format(kernel_type))
+        if smooth_kernel_type.startswith('full_'):
+            smooth_kernel_size = int(smooth_kernel_type.split('_')[-1])
+            self.smooth_kernel = Parameter(torch.Tensor(1, 1, *repeat(smooth_kernel_size, len(kernel_size))))
+        elif smooth_kernel_type == 'gaussian':
+            smooth_1d = torch.tensor([.25, .5, .25])
+            smooth_kernel = smooth_1d
+            for d in range(1, len(kernel_size)):
+                smooth_kernel = smooth_kernel * smooth_1d.view(-1, *repeat(1, d))
+            self.register_buffer('smooth_kernel', smooth_kernel.unsqueeze(0).unsqueeze(0))
+        elif smooth_kernel_type.startswith('average_'):
+            smooth_kernel_size = int(smooth_kernel_type.split('_')[-1])
+            smooth_1d = torch.tensor((1.0 / smooth_kernel_size,) * smooth_kernel_size)
+            smooth_kernel = smooth_1d
+            for d in range(1, len(kernel_size)):
+                smooth_kernel = smooth_kernel * smooth_1d.view(-1, *repeat(1, d))
+            self.register_buffer('smooth_kernel', smooth_kernel.unsqueeze(0).unsqueeze(0))
+        elif smooth_kernel_type != 'none':
+            raise ValueError('smooth_kernel_type set to invalid value ({})'.format(smooth_kernel_type))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if not (self.pool_only or self.filler in {'pool', 'crf_pool'}):
+            if self.filler == 'uniform':
+                n = self.in_channels
+                for k in self.kernel_size:
+                    n *= k
+                stdv = 1. / math.sqrt(n)
+                if self.shared_filters:
+                    stdv *= self.in_channels
+                self.weight.data.uniform_(-stdv, stdv)
+                if self.bias is not None:
+                    self.bias.data.uniform_(-stdv, stdv)
+            elif self.filler == 'linear':
+                effective_kernel_size = tuple(2 * s - 1 for s in self.stride)
+                pad = tuple(int((k - ek) // 2) for k, ek in zip(self.kernel_size, effective_kernel_size))
+                assert self.transposed and self.in_channels == self.out_channels
+                assert all(k >= ek for k, ek in zip(self.kernel_size, effective_kernel_size))
+                w = 1.0
+                for i, (p, s, k) in enumerate(zip(pad, self.stride, self.kernel_size)):
+                    d = len(pad) - i - 1
+                    w = w * (np.array((0.0,) * p + tuple(range(1, s)) + tuple(range(s, 0, -1)) + (0,) * p) / s).reshape(
+                        (-1,) + (1,) * d)
+                    if self.normalize_kernel:
+                        w = w * np.array(tuple(((k - j - 1) // s) + (j // s) + 1.0 for j in range(k))).reshape(
+                            (-1,) + (1,) * d)
+                self.weight.data.fill_(0.0)
+                for c in range(1 if self.shared_filters else self.in_channels):
+                    self.weight.data[c, c, :] = torch.tensor(w)
+                if self.bias is not None:
+                    self.bias.data.fill_(0.0)
+            elif self.filler in {'crf', 'crf_perturbed'}:
+                assert len(self.kernel_size) == 2 and self.kernel_size[0] == self.kernel_size[1] \
+                       and self.in_channels == self.out_channels
+                perturb_range = 0.001
+                n_classes = self.in_channels
+                gauss = np_gaussian_2d(self.kernel_size[0]) * self.kernel_size[0] * self.kernel_size[0]
+                gauss[self.kernel_size[0] // 2, self.kernel_size[1] // 2] = 0
+                if self.shared_filters:
+                    self.weight.data[0, 0, :] = torch.tensor(gauss)
+                else:
+                    compat = 1.0 - np.eye(n_classes, dtype=np.float32)
+                    self.weight.data[:] = torch.tensor(compat.reshape(n_classes, n_classes, 1, 1) * gauss)
+                if self.filler == 'crf_perturbed':
+                    self.weight.data.add_((torch.rand_like(self.weight.data) - 0.5) * perturb_range)
+                if self.bias is not None:
+                    self.bias.data.fill_(0.0)
+            else:
+                raise ValueError('Initialization method ({}) not supported.'.format(self.filler))
+        if hasattr(self, 'inv_alpha') and isinstance(self.inv_alpha, Parameter):
+            self.inv_alpha.data.fill_(self.inv_alpha_init)
+            self.inv_lambda.data.fill_(self.inv_lambda_init)
+        if hasattr(self, 'smooth_kernel') and isinstance(self.smooth_kernel, Parameter):
+            self.smooth_kernel.data.fill_(1.0 / np.multiply.reduce(self.smooth_kernel.shape))
+
+    def extra_repr(self):
+        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
+             ', kernel_type={kernel_type}')
+        if self.stride != (1,) * len(self.stride):
+            s += ', stride={stride}'
+        if self.padding != (0,) * len(self.padding):
+            s += ', padding={padding}'
+        if self.dilation != (1,) * len(self.dilation):
+            s += ', dilation={dilation}'
+        if self.output_padding != (0,) * len(self.output_padding):
+            s += ', output_padding={output_padding}'
+        if self.bias is None:
+            s += ', bias=False'
+        if self.smooth_kernel_type != 'none':
+            s += ', smooth_kernel_type={smooth_kernel_type}'
+        if self.channel_wise:
+            s += ', channel_wise=True'
+        if self.normalize_kernel:
+            s += ', normalize_kernel=True'
+        if self.shared_filters:
+            s += ', shared_filters=True'
+        return s.format(**self.__dict__)
+
+
+class PacConv2d(_PacConvNd):
+    r"""
+    Args (in addition to those of Conv2d):
+        kernel_type (str): 'gaussian' | 'inv_{alpha}_{lambda}[_asym][_fixed]'. Default: 'gaussian'
+        smooth_kernel_type (str): 'none' | 'gaussian' | 'average_{sz}' | 'full_{sz}'. Default: 'none'
+        normalize_kernel (bool): Default: False
+        shared_filters (bool): Default: False
+        filler (str): 'uniform'. Default: 'uniform'
+
+    Note:
+        - kernel_size only accepts odd numbers
+        - padding should not be larger than :math:`dilation * (kernel_size - 1) / 2`
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True,
+                 kernel_type='gaussian', smooth_kernel_type='none', normalize_kernel=False, shared_filters=False,
+                 filler='uniform', native_impl=False):
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        super(PacConv2d, self).__init__(
+            in_channels, out_channels, kernel_size, stride,
+            padding, dilation, False, _pair(0), bias,
+            False, kernel_type, smooth_kernel_type, False, normalize_kernel, shared_filters, filler)
+
+        self.native_impl = native_impl
+
+    def compute_kernel(self, input_for_kernel, input_mask=None):
+        return packernel2d(input_for_kernel, input_mask,
+                           kernel_size=self.kernel_size, stride=self.stride, padding=self.padding,
+                           dilation=self.dilation, kernel_type=self.kernel_type,
+                           smooth_kernel_type=self.smooth_kernel_type,
+                           smooth_kernel=self.smooth_kernel if hasattr(self, 'smooth_kernel') else None,
+                           inv_alpha=self.inv_alpha if hasattr(self, 'inv_alpha') else None,
+                           inv_lambda=self.inv_lambda if hasattr(self, 'inv_lambda') else None,
+                           channel_wise=False, normalize_kernel=self.normalize_kernel, transposed=False,
+                           native_impl=self.native_impl)
+
+    def forward(self, input_2d, input_for_kernel, kernel=None, mask=None):
+        output_mask = None
+        if kernel is None:
+            kernel, output_mask = self.compute_kernel(input_for_kernel, mask)
+
+        output = pacconv2d(input_2d, kernel, self.weight, self.bias, self.stride, self.padding, self.dilation,
+                           self.shared_filters, self.native_impl)
+
+        return output if output_mask is None else (output, output_mask)
+
+
+class PacConvTranspose2d(_PacConvNd):
+    r"""
+    Args (in addition to those of ConvTranspose2d):
+        kernel_type (str): 'gaussian' | 'inv_{alpha}_{lambda}[_asym][_fixed]'. Default: 'gaussian'
+        smooth_kernel_type (str): 'none' | 'gaussian' | 'average_{sz}' | 'full_{sz}'. Default: 'none'
+        normalize_kernel (bool): Default: False
+        shared_filters (bool): Default: False
+        filler (str): 'uniform' | 'linear'. Default: 'uniform'
+
+    Note:
+        - kernel_size only accepts odd numbers
+        - padding should not be larger than :math:`dilation * (kernel_size - 1) / 2`
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, dilation=1,
+                 bias=True, kernel_type='gaussian', smooth_kernel_type='none', normalize_kernel=False,
+                 shared_filters=False, filler='uniform', native_impl=False):
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        output_padding = _pair(output_padding)
+        dilation = _pair(dilation)
+        super(PacConvTranspose2d, self).__init__(
+            in_channels, out_channels, kernel_size, stride,
+            padding, dilation, True, output_padding, bias,
+            False, kernel_type, smooth_kernel_type, False, normalize_kernel, shared_filters, filler)
+
+        self.native_impl = native_impl
+
+    def compute_kernel(self, input_for_kernel, input_mask=None):
+        return packernel2d(input_for_kernel, input_mask,
+                           kernel_size=self.kernel_size, stride=self.stride, padding=self.padding,
+                           output_padding=self.output_padding, dilation=self.dilation, kernel_type=self.kernel_type,
+                           smooth_kernel_type=self.smooth_kernel_type,
+                           smooth_kernel=self.smooth_kernel if hasattr(self, 'smooth_kernel') else None,
+                           inv_alpha=self.inv_alpha if hasattr(self, 'inv_alpha') else None,
+                           inv_lambda=self.inv_lambda if hasattr(self, 'inv_lambda') else None,
+                           channel_wise=False, normalize_kernel=self.normalize_kernel, transposed=True,
+                           native_impl=self.native_impl)
+
+    def forward(self, input_2d, input_for_kernel, kernel=None, mask=None):
+        output_mask = None
+        if kernel is None:
+            kernel, output_mask = self.compute_kernel(input_for_kernel, mask)
+
+        output = pacconv_transpose2d(input_2d, kernel, self.weight, self.bias, self.stride, self.padding,
+                                     self.output_padding, self.dilation, self.shared_filters, self.native_impl)
+
+        return output if output_mask is None else (output, output_mask)
+
+
+class PacPool2d(_PacConvNd):
+    r"""
+    Args:
+        kernel_size, stride, padding, dilation
+        kernel_type (str): 'gaussian' | 'inv_{alpha}_{lambda}[_asym][_fixed]'. Default: 'gaussian'
+        smooth_kernel_type (str): 'none' | 'gaussian' | 'average_{sz}' | 'full_{sz}'. Default: 'none'
+        channel_wise (bool): Default: False
+        normalize_kernel (bool): Default: False
+        out_channels (int): needs to be specified for channel_wise 'inv_*' (non-fixed) kernels. Default: -1
+
+    Note:
+        - kernel_size only accepts odd numbers
+        - padding should not be larger than :math:`dilation * (kernel_size - 1) / 2`
+    """
+
+    def __init__(self, kernel_size, stride=1, padding=0, dilation=1,
+                 kernel_type='gaussian', smooth_kernel_type='none',
+                 channel_wise=False, normalize_kernel=False, out_channels=-1, native_impl=False):
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        super(PacPool2d, self).__init__(
+            -1, out_channels, kernel_size, stride,
+            padding, dilation, False, _pair(0), False,
+            True, kernel_type, smooth_kernel_type, channel_wise, normalize_kernel, False, None)
+
+        self.native_impl = native_impl
+
+    def compute_kernel(self, input_for_kernel, input_mask=None):
+        return packernel2d(input_for_kernel, input_mask,
+                           kernel_size=self.kernel_size, stride=self.stride, padding=self.padding,
+                           dilation=self.dilation, kernel_type=self.kernel_type,
+                           smooth_kernel_type=self.smooth_kernel_type,
+                           smooth_kernel=self.smooth_kernel if hasattr(self, 'smooth_kernel') else None,
+                           inv_alpha=self.inv_alpha if hasattr(self, 'inv_alpha') else None,
+                           inv_lambda=self.inv_lambda if hasattr(self, 'inv_lambda') else None,
+                           channel_wise=self.channel_wise, normalize_kernel=self.normalize_kernel, transposed=False,
+                           native_impl=self.native_impl)
+
+    def forward(self, input_2d, input_for_kernel, kernel=None, mask=None):
+        output_mask = None
+        if kernel is None:
+            kernel, output_mask = self.compute_kernel(input_for_kernel, mask)
+
+        bs, in_ch, in_h, in_w = input_2d.shape
+        if self.channel_wise and (kernel.shape[1] != in_ch):
+            raise ValueError('input and kernel must have the same number of channels when channel_wise=True')
+        assert self.out_channels <= 0 or self.out_channels == in_ch
+
+        output = pacpool2d(input_2d, kernel, self.kernel_size, self.stride, self.padding, self.dilation,
+                           self.native_impl)
+
+        return output if output_mask is None else (output, output_mask)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/paccrf.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/paccrf.py
new file mode 100644
index 0000000..2c24330
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/paccrf.py
@@ -0,0 +1,301 @@
+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+import numpy as np
+import torch as th
+from torch import nn
+import torch.nn.functional as F
+
+from lib.extensions.pacnet import pac
+
+
+def create_position_feats(shape, scales=None, bs=1, device=None):
+    cord_range = [range(s) for s in shape]
+    mesh = np.array(np.meshgrid(*cord_range, indexing='ij'), dtype=np.float32)
+    mesh = th.from_numpy(mesh)
+    if device is not None:
+        mesh = mesh.to(device)
+    if scales is not None:
+        if not isinstance(scales, th.Tensor):
+            scales = th.tensor(scales, dtype=th.float32, device=device)
+        mesh = mesh * (1.0 / scales.view(-1, 1, 1))
+    return th.stack(bs * [mesh])
+
+
+def create_YXRGB(img, yx_scale=None, rgb_scale=None, scales=None):
+    img = img.view(-1, *img.shape[-3:])
+    if scales is not None:
+        assert yx_scale == None and rgb_scale == None
+        yx_scale = scales[:2]
+        rgb_scale = scales[2:]
+    mesh = create_position_feats(img.shape[-2:], yx_scale, img.shape[0], img.device)
+    if rgb_scale is not None:
+        if not isinstance(rgb_scale, th.Tensor):
+            rgb_scale = th.tensor(rgb_scale, dtype=th.float32, device=img.device)
+        img = img * (1.0 / rgb_scale.view(-1, 1, 1))
+    feats = th.cat([mesh, img], dim=1)
+    return feats
+
+
+def _ceil_pad_factor(sizes, factor):
+    offs = tuple((factor - sz % factor) % factor for sz in sizes)
+    pad = tuple((off + 1) // 2 for off in offs)
+    return pad
+
+
+class PacCRF(nn.Module):
+    r"""
+    Args:
+        channels (int): number of categories.
+        num_steps (int): number of mean-field update steps.
+        final_output (str): 'log_softmax' | 'softmax' | 'log_Q'. Default: 'log_Q'
+        perturbed_init (bool): whether to perturb initialization. Default: True
+        native_impl (bool): Default: False
+        fixed_weighting (bool): whether to use fixed weighting for unary/pairwise terms. Default: False
+        unary_weight (float): Default: 1.0
+        pairwise_kernels (dict or list): pairwise kernels, see add_pairwise_kernel() for details. Default: None
+    """
+    def __init__(self, channels, num_steps, final_output='log_Q', perturbed_init=True, native_impl=False,
+                 fixed_weighting=False, unary_weight=1.0, pairwise_kernels=None):
+        super(PacCRF, self).__init__()
+        self.channels = channels
+        self.num_steps = num_steps
+        self.final_output = final_output  # 'log_softmax', 'softmax', 'log_Q'
+        self.perturbed_init = perturbed_init
+        self.native_impl = native_impl
+        self.fixed_weighting = fixed_weighting
+        self.init_unary_weight = unary_weight
+
+        self.messengers = nn.ModuleList()
+        self.compat = nn.ModuleList()
+        self.init_pairwise_weights = []
+        self.pairwise_weights = nn.ParameterList()
+        self._use_pairwise_weights = []
+        self.unary_weight = unary_weight if self.fixed_weighting else nn.Parameter(th.tensor(float(unary_weight)))
+        self.blur = []
+        self.pairwise_repr = []
+
+        if pairwise_kernels is not None:
+            if type(pairwise_kernels) == dict:
+                self.add_pairwise_kernel(**pairwise_kernels)
+            else:
+                for k in pairwise_kernels:
+                    self.add_pairwise_kernel(**k)
+
+    def reset_parameters(self, pairwise_idx=None):
+        if pairwise_idx is None:
+            idxs = range(len(self.messengers))
+            if not self.fixed_weighting:
+                self.unary_weight.data.fill_(self.init_unary_weight)
+        else:
+            idxs = [pairwise_idx]
+
+        for i in idxs:
+            self.messengers[i].reset_parameters()
+            if isinstance(self.messengers[i], nn.Conv2d):
+                # TODO: gaussian initialization for XY kernels?
+                pass
+            if self.compat[i] is not None:
+                self.compat[i].weight.data[:, :, 0, 0] = 1.0 - th.eye(self.channels, dtype=th.float32)
+                if self.perturbed_init:
+                    perturb_range = 0.001
+                    self.compat[i].weight.data.add_((th.rand_like(self.compat[i].weight.data) - 0.5) * perturb_range)
+            self.pairwise_weights[i].data = th.ones_like(self.pairwise_weights[i]) * self.init_pairwise_weights[i]
+
+    def extra_repr(self):
+        s = ('categories={channels}'
+             ', num_steps={num_steps}'
+             ', final_output={final_output}')
+        if self.perturbed_init:
+            s += ', perturbed_init=True'
+        if self.fixed_weighting:
+            s += ', fixed_weighting=True'
+        if self.pairwise_repr:
+            s += ', pairwise_kernels=({})'.format(', '.join(self.pairwise_repr))
+        return s.format(**self.__dict__)
+
+    def add_pairwise_kernel(self, kernel_size=3, dilation=1, blur=1, compat_type='4d', spatial_filter=True,
+                            pairwise_weight=1.0):
+        assert kernel_size % 2 == 1
+        self.pairwise_repr.append('{}{}_{}_{}_{}'.format('0d' if compat_type == 'potts' else compat_type,
+                                                         's' if spatial_filter else '',
+                                                         kernel_size, dilation, blur))
+
+        if compat_type == 'potts':
+            pairwise_weight *= -1.0
+
+        if compat_type == 'potts' and (not spatial_filter) and (not self.fixed_weighting):
+            self._use_pairwise_weights.append(True)
+        else:
+            self._use_pairwise_weights.append(False)
+        self.pairwise_weights.append(nn.Parameter(th.tensor(pairwise_weight, dtype=th.float32)))
+        self.init_pairwise_weights.append(pairwise_weight)
+        self.blur.append(blur)
+        self.compat.append(nn.Conv2d(self.channels, self.channels, kernel_size=1, bias=False) if compat_type == '2d'
+                           else None)
+
+        pad = int(kernel_size // 2) * dilation
+
+        if compat_type == 'na':
+            messenger = nn.Conv2d(self.channels, self.channels, kernel_size, padding=pad, dilation=dilation, bias=False)
+        elif compat_type == '4d':
+            messenger = pac.PacConv2d(self.channels, self.channels, kernel_size, padding=pad, dilation=dilation,
+                                          bias=False, shared_filters=False, native_impl=self.native_impl,
+                                          filler=('crf_perturbed' if self.perturbed_init else 'crf'))
+        elif spatial_filter:
+            messenger = pac.PacConv2d(self.channels, self.channels, kernel_size, padding=pad, dilation=dilation,
+                                          bias=False, shared_filters=True, native_impl=self.native_impl,
+                                          filler=('crf_perturbed' if self.perturbed_init else 'crf'))
+        else:
+            messenger = pac.PacConv2d(self.channels, self.channels, kernel_size, padding=pad, dilation=dilation,
+                                          bias=False, shared_filters=True, native_impl=self.native_impl,
+                                          filler='crf_pool')
+
+        self.messengers.append(messenger)
+        self.reset_parameters(-1)
+
+    def num_pairwise_kernels(self):
+        return len(self.messengers)
+
+    def forward(self, unary, edge_feat, edge_kernel=None, logQ=None):
+        n_kernels = len(self.messengers)
+        edge_kernel = [edge_kernel] * n_kernels if isinstance(edge_kernel, th.Tensor) else edge_kernel
+
+        if edge_kernel is None:
+            edge_kernel = [None] * n_kernels
+            _shared = isinstance(edge_feat, th.Tensor)
+            if _shared:
+                edge_feat = {1 : edge_feat}
+            for i in range(n_kernels):
+                if isinstance(self.messengers[i], nn.Conv2d):
+                    continue
+                if _shared and self.blur[i] in edge_feat:
+                    feat = edge_feat[self.blur[i]]
+                elif self.blur[i] == 1:
+                    feat = edge_feat[i]
+                else:
+                    feat = edge_feat[1] if _shared else edge_feat[i]
+                    pad = _ceil_pad_factor(feat.shape[2:], self.blur[i])
+                    feat = F.avg_pool2d(feat,
+                                        kernel_size=self.blur[i],
+                                        padding=pad,
+                                        count_include_pad=False)
+                    if _shared:
+                        edge_feat[self.blur[i]] = feat
+                edge_kernel[i], _ = self.messengers[i].compute_kernel(feat)
+                del feat
+            del edge_feat
+
+        if logQ is None:
+            logQ = unary
+        for step in range(self.num_steps):
+            Q = F.softmax(logQ, dim=1)
+            Q_blur = {1 : Q}
+            logQ = unary * self.unary_weight
+            for i in range(n_kernels):
+                pad = _ceil_pad_factor(Q.shape[2:], self.blur[i])
+                if self.blur[i] not in Q_blur:
+                    Q_blur[self.blur[i]] = F.avg_pool2d(Q,
+                                                        kernel_size=self.blur[i],
+                                                        padding=pad,
+                                                        count_include_pad=False)
+                if isinstance(self.messengers[i], nn.Conv2d):
+                    msg = self.messengers[i](Q_blur[self.blur[i]])
+                else:
+                    msg = self.messengers[i](Q_blur[self.blur[i]], None, edge_kernel[i])
+                if self.compat[i] is not None:
+                    msg = self.compat[i](msg)
+                if self.blur[i] > 1:
+                    msg = F.interpolate(msg, scale_factor=self.blur[i], mode='bilinear', align_corners=False)
+                    msg = msg[:, :, pad[0]:pad[0] + unary.shape[2], pad[1]:pad[1] + unary.shape[3]].contiguous()
+                pw = self.pairwise_weights[i] if self._use_pairwise_weights[i] else self.init_pairwise_weights[i]
+                logQ = logQ - msg * pw
+
+        if self.final_output == 'softmax':
+            out = F.softmax(logQ, dim=1)
+        elif self.final_output == 'log_softmax':
+            out = F.log_softmax(logQ, dim=1)
+        elif self.final_output == 'log_Q':
+            out = logQ
+        else:
+            raise ValueError('Unknown value for final_output: {}'.format(self.final_output))
+
+        return out
+
+
+class PacCRFLoose(nn.Module):
+    def __init__(self, channels, num_steps, final_output='log_Q', perturbed_init=True, native_impl=False,
+                 fixed_weighting=False, unary_weight=1.0, pairwise_kernels=None):
+        super(PacCRFLoose, self).__init__()
+        self.channels = channels
+        self.num_steps = num_steps
+        self.final_output = final_output  # 'log_softmax', 'softmax', 'log_Q'
+
+        self.steps = nn.ModuleList()
+        for i in range(num_steps):
+            self.steps.append(PacCRF(channels, 1, 'log_Q', perturbed_init, native_impl, fixed_weighting, unary_weight,
+                                     pairwise_kernels))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        for i in range(self.num_steps):
+            self.steps[i].reset_parameters()
+
+    def extra_repr(self):
+        s = ('categories={channels}'
+             ', num_steps={num_steps}'
+             ', final_output={final_output}')
+        return s.format(**self.__dict__)
+
+    def add_pairwise_kernel(self, kernel_size=3, dilation=1, blur=1, compat_type='4d', spatial_filter=True,
+                            pairwise_weight=1.0):
+        for i in range(self.num_steps):
+            self.steps[i].add_pairwise_kernel(kernel_size, dilation, blur, compat_type, spatial_filter, pairwise_weight)
+
+    def num_pairwise_kernels(self):
+        return self.steps[0].num_pairwise_kernels()
+
+    def forward(self, unary, edge_feat, edge_kernel=None):
+        n_kernels = self.num_pairwise_kernels()
+        edge_kernel = [edge_kernel] * n_kernels if isinstance(edge_kernel, th.Tensor) else edge_kernel
+        blurs = self.steps[0].blur
+
+        if edge_kernel is None:
+            edge_kernel = [None] * n_kernels
+            _shared = isinstance(edge_feat, th.Tensor)
+            if _shared:
+                edge_feat = {1 : edge_feat}
+            for i in range(n_kernels):
+                if _shared and blurs[i] in edge_feat:
+                    feat = edge_feat[blurs[i]]
+                elif blurs[i] == 1:
+                    feat = edge_feat[i]
+                else:
+                    feat = edge_feat[1] if _shared else edge_feat[i]
+                    pad = _ceil_pad_factor(feat.shape[2:], blurs[i])
+                    feat = F.avg_pool2d(feat,
+                                        kernel_size=blurs[i],
+                                        padding=pad,
+                                        count_include_pad=False)
+                    if _shared:
+                        edge_feat[blurs[i]] = feat
+                edge_kernel[i], _ = self.steps[0].messengers[i].compute_kernel(feat)
+                del feat
+            del edge_feat
+
+        logQ = unary
+        for step in self.steps:
+            logQ = step(unary, None, edge_kernel, logQ)
+
+        if self.final_output == 'softmax':
+            out = F.softmax(logQ, dim=1)
+        elif self.final_output == 'log_softmax':
+            out = F.log_softmax(logQ, dim=1)
+        elif self.final_output == 'log_Q':
+            out = logQ
+        else:
+            raise ValueError('Unknown value for final_output: {}'.format(self.final_output))
+
+        return out
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/test_pac.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/test_pac.py
new file mode 100644
index 0000000..5c86180
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/pacnet/test_pac.py
@@ -0,0 +1,423 @@
+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+import unittest
+from functools import wraps
+
+import numpy as np
+import torch as th
+from torch import nn
+from torch.autograd import gradcheck
+
+import pac
+
+
+def _allclose(x1, x2, rtol=1e-5, atol=1e-10):
+    return np.allclose(x1.cpu(), x2.cpu(), rtol=rtol, atol=atol)
+
+
+def _gradcheck(f, x0, rtol=1e-3, atol=1e-8):
+    return gradcheck(f, x0, rtol=rtol, atol=atol)
+
+
+# test both native autograd version and Function version
+def repeat_impl_types(f):
+    @wraps(f)
+    def call_wrapped(self, *args):
+        f(self, *args, native_impl=True)
+        f(self, *args, native_impl=False)
+
+    return call_wrapped
+
+
+# some features are not yet implemented using custom Function
+def use_only_native_impl(f):
+    @wraps(f)
+    def call_wrapped(self, *args):
+        f(self, *args, native_impl=True)
+
+    return call_wrapped
+
+
+# test only the version with custom Function
+def use_only_custom_impl(f):
+    @wraps(f)
+    def call_wrapped(self, *args):
+        f(self, *args, native_impl=False)
+
+    return call_wrapped
+
+
+class PacConvTest(unittest.TestCase):
+    def setUp(self):
+        self.device = th.device('cuda:0')
+        th.cuda.set_device(0)
+
+    @repeat_impl_types
+    def test_conv_forward_const_kernel(self, native_impl):
+        bs, sz, k_ch = 2, 111, 5
+        args = dict(in_channels=4, out_channels=3, kernel_size=5, stride=2, padding=4, dilation=2)
+        im = th.rand(bs, args['in_channels'], sz, sz).to(self.device)
+        im_th = im.clone()
+        im_k = th.ones(bs, k_ch, sz, sz).to(self.device)
+        conv_w = th.rand(args['out_channels'], args['in_channels'],
+                         args['kernel_size'], args['kernel_size']).to(self.device)
+        conv_b = th.rand(args['out_channels']).to(self.device)
+        conv = pac.PacConv2d(native_impl=native_impl, **args).to(self.device)
+        conv_th = nn.Conv2d(**args).to(self.device)
+        conv.weight.data[:] = conv_th.weight.data[:] = conv_w
+        conv.bias.data[:] = conv_th.bias.data[:] = conv_b
+
+        _allclose(conv(im, im_k).detach(), conv_th(im_th).detach())
+
+    @repeat_impl_types
+    def test_conv_transpose_forward_const_kernel(self, native_impl):
+        bs, sz, k_ch = 4, 128, 5
+        args = dict(in_channels=4, out_channels=3, kernel_size=5, stride=2, padding=2, output_padding=1, dilation=1)
+        k_with_d = (args['kernel_size'] - 1) * args['dilation'] + 1
+        sz_out = (sz - 1) * args['stride'] - 2 * args['padding'] + k_with_d + args['output_padding']
+        im = th.rand(bs, args['in_channels'], sz, sz).to(self.device)
+        im_th = im.clone()
+        im_k = th.ones(bs, k_ch, sz_out, sz_out).to(self.device)
+        conv_w = th.rand(args['in_channels'], args['out_channels'],
+                         args['kernel_size'], args['kernel_size']).to(self.device)
+        conv_b = th.rand(args['out_channels']).to(self.device)
+        conv = pac.PacConvTranspose2d(native_impl=native_impl, **args).to(self.device)
+        conv_th = nn.ConvTranspose2d(**args).to(self.device)
+        conv.weight.data[:] = conv_th.weight.data[:] = conv_w
+        conv.bias.data[:] = conv_th.bias.data[:] = conv_b
+
+        _allclose(conv(im, im_k).detach(), conv_th(im_th).detach())
+
+    @repeat_impl_types
+    def test_pool_forward_const_kernel(self, native_impl):
+        bs, sz, in_ch, k_ch = 2, 9, 4, 5
+        dilation = 1
+        args = dict(kernel_size=5, stride=2, padding=2)
+        im = th.rand(bs, in_ch, sz, sz).to(self.device)
+        im_th = im.clone()
+        im_k = th.ones(bs, k_ch, sz, sz).to(self.device)
+        pool = pac.PacPool2d(dilation=dilation, native_impl=native_impl, **args).to(self.device)
+        pool_th = nn.AvgPool2d(**args).to(self.device)
+
+        _allclose(pool(im, im_k).detach(), pool_th(im_th).detach())
+
+    @repeat_impl_types
+    def test_conv_input_grad(self, native_impl):
+        bs, sz, k_ch = 2, 8, 3
+        args = dict(in_channels=4, out_channels=2, kernel_size=3, stride=2, padding=1, dilation=1)
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        conv = pac.PacConv2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(conv, (im, im_k)))
+
+    @use_only_native_impl
+    def test_conv_inv_kernel_input_grad(self, native_impl):
+        bs, sz, k_ch = 2, 8, 3
+        args = dict(in_channels=4, out_channels=2, kernel_size=3, stride=2, padding=1, dilation=1,
+                    kernel_type='inv_0.2_0.2_asym', smooth_kernel_type='average_5', normalize_kernel=True)
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        conv = pac.PacConv2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(conv, (im, im_k)))
+
+    @repeat_impl_types
+    def test_conv_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 10, 3, 5, 2, 4
+        conv_args = dict(stride=1, padding=2, dilation=2)
+        kernel_args = dict(kernel_size=f_sz, smooth_kernel=None, inv_alpha=None, inv_lambda=None,
+                           kernel_type='gaussian', smooth_kernel_type='none',
+                           channel_wise=False, normalize_kernel=False, transposed=False,
+                           **conv_args)
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(out_ch, in_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv2d(in0,
+                                                 pac.packernel2d(in1, **kernel_args)[0],
+                                                 w, b, native_impl=native_impl, **conv_args),
+            (im, im_k, conv_w, conv_b)))
+
+    @repeat_impl_types
+    def test_conv_transpose_input_grad(self, native_impl):
+        bs, sz, k_ch = 1, 4, 2
+        args = dict(in_channels=2, out_channels=3, kernel_size=3, stride=2, padding=1, output_padding=1, dilation=1)
+        k_with_d = (args['kernel_size'] - 1) * args['dilation'] + 1
+        sz_out = (sz - 1) * args['stride'] - 2 * args['padding'] + k_with_d + args['output_padding']
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz_out, sz_out).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        conv = pac.PacConvTranspose2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(conv, (im, im_k)))
+
+    @repeat_impl_types
+    def test_conv_transpose_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 3, 3, 3, 2, 3
+        conv_args = dict(stride=2, padding=1, output_padding=1, dilation=1)
+        kernel_args = dict(kernel_size=f_sz, smooth_kernel=None, inv_alpha=None, inv_lambda=None,
+                           kernel_type='gaussian', smooth_kernel_type='none',
+                           channel_wise=False, normalize_kernel=False, transposed=True,
+                           **conv_args)
+        k_with_d = (f_sz - 1) * conv_args['dilation'] + 1
+        sz_out = (sz - 1) * conv_args['stride'] - 2 * conv_args['padding'] + k_with_d + conv_args['output_padding']
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz_out, sz_out).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(in_ch, out_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv_transpose2d(in0,
+                                                           pac.packernel2d(in1, **kernel_args)[0],
+                                                           w, b, native_impl=native_impl, **conv_args),
+            (im, im_k, conv_w, conv_b)))
+
+    @repeat_impl_types
+    def test_pool_grad(self, native_impl):
+        bs, sz, ch, k_ch = 2, 8, 2, 3
+        args = dict(kernel_size=5, stride=2, padding=4, dilation=2)
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        pool = pac.PacPool2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(pool, (im, im_k)))
+
+    def test_conv_two_impl_match(self):
+        bs, sz, k_ch = 24, 128, 3
+        args = dict(in_channels=4, out_channels=2, kernel_size=3, stride=2, padding=2, dilation=2)
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im0 = im.clone()
+        im0_k = im_k.clone()
+        im.requires_grad = im_k.requires_grad = True
+        im0.requires_grad = im0_k.requires_grad = True
+        conv = pac.PacConv2d(native_impl=False, **args).double().to(self.device)
+        conv0 = pac.PacConv2d(native_impl=True, **args).double().to(self.device)
+
+        conv_w = th.rand(args['out_channels'], args['in_channels'],
+                         args['kernel_size'], args['kernel_size']).double().to(self.device)
+        conv_b = th.rand(args['out_channels']).double().to(self.device)
+        conv.weight.data[:] = conv0.weight.data[:] = conv_w
+        conv.bias.data[:] = conv0.bias.data[:] = conv_b
+
+        out = conv(im, im_k)
+        out0 = conv0(im0, im0_k)
+        out.sum().backward()
+        out0.sum().backward()
+        self.assertTrue(_allclose(out.detach(), out0.detach()))
+        self.assertTrue(_allclose(im.grad, im0.grad))
+        self.assertTrue(_allclose(im_k.grad, im0_k.grad))
+        self.assertTrue(_allclose(conv.weight.grad, conv0.weight.grad))
+        self.assertTrue(_allclose(conv.bias.grad, conv0.bias.grad))
+
+    def test_conv_with_kernel_input_two_impl_match(self):
+        bs, sz, k_ch = 24, 128, 3
+        args = dict(in_channels=4, out_channels=2, kernel_size=3, stride=2, padding=2, dilation=2)
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        out_sz = int(np.floor(
+            (sz + 2 * args['padding'] - (args['kernel_size'] - 1) * args['dilation'] - 1) / args['stride'])) + 1
+        im_k = th.rand(bs, 1, args['kernel_size'], args['kernel_size'], out_sz, out_sz).double().to(self.device)
+        im0 = im.clone()
+        im0_k = im_k.clone()
+        im.requires_grad = im_k.requires_grad = True
+        im0.requires_grad = im0_k.requires_grad = True
+        conv = pac.PacConv2d(native_impl=False, **args).double().to(self.device)
+        conv0 = pac.PacConv2d(native_impl=True, **args).double().to(self.device)
+
+        conv_w = th.rand(args['out_channels'], args['in_channels'],
+                         args['kernel_size'], args['kernel_size']).double().to(self.device)
+        conv_b = th.rand(args['out_channels']).double().to(self.device)
+        conv.weight.data[:] = conv0.weight.data[:] = conv_w
+        conv.bias.data[:] = conv0.bias.data[:] = conv_b
+
+        out = conv(im, None, im_k)
+        out0 = conv0(im0, None, im0_k)
+        out.sum().backward()
+        out0.sum().backward()
+        self.assertTrue(_allclose(out.detach(), out0.detach()))
+        self.assertTrue(_allclose(im.grad, im0.grad))
+        self.assertTrue(_allclose(im_k.grad, im0_k.grad))
+        self.assertTrue(_allclose(conv.weight.grad, conv0.weight.grad))
+        self.assertTrue(_allclose(conv.bias.grad, conv0.bias.grad))
+
+    def test_conv_transpose_two_impl_match(self):
+        bs, sz, k_ch = 3, 128, 3
+        args = dict(in_channels=2, out_channels=3, kernel_size=3, stride=2, padding=1, output_padding=1, dilation=1)
+        k_with_d = (args['kernel_size'] - 1) * args['dilation'] + 1
+        sz_out = (sz - 1) * args['stride'] - 2 * args['padding'] + k_with_d + args['output_padding']
+        im = th.rand(bs, args['in_channels'], sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz_out, sz_out).double().to(self.device)
+        im0 = im.clone()
+        im0_k = im_k.clone()
+        im.requires_grad = im_k.requires_grad = True
+        im0.requires_grad = im0_k.requires_grad = True
+        conv = pac.PacConvTranspose2d(native_impl=False, **args).double().to(self.device)
+        conv0 = pac.PacConvTranspose2d(native_impl=True, **args).double().to(self.device)
+
+        conv_w = th.rand(args['in_channels'], args['out_channels'],
+                         args['kernel_size'], args['kernel_size']).double().to(self.device)
+        conv_b = th.rand(args['out_channels']).double().to(self.device)
+        conv.weight.data[:] = conv0.weight.data[:] = conv_w
+        conv.bias.data[:] = conv0.bias.data[:] = conv_b
+
+        out = conv(im, im_k)
+        out0 = conv0(im0, im0_k)
+        out.sum().backward()
+        out0.sum().backward()
+        self.assertTrue(_allclose(out.detach(), out0.detach()))
+        self.assertTrue(_allclose(im.grad, im0.grad))
+        self.assertTrue(_allclose(im_k.grad, im0_k.grad))
+        self.assertTrue(_allclose(conv.weight.grad, conv0.weight.grad))
+        self.assertTrue(_allclose(conv.bias.grad, conv0.bias.grad))
+
+    def test_pool_two_impl_match(self):
+        bs, sz, ch, k_ch = 2, 128, 4, 3
+        args = dict(kernel_size=3, stride=2, padding=2, dilation=2)
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im0 = im.clone()
+        im0_k = im_k.clone()
+        im.requires_grad = im_k.requires_grad = True
+        im0.requires_grad = im0_k.requires_grad = True
+        pool = pac.PacPool2d(native_impl=False, **args).to(self.device)
+        p00l0 = pac.PacPool2d(native_impl=True, **args).to(self.device)
+
+        out = pool(im, im_k)
+        out0 = p00l0(im0, im0_k)
+        out.sum().backward()
+        out0.sum().backward()
+        self.assertTrue(_allclose(out.detach(), out0.detach()))
+        self.assertTrue(_allclose(im.grad, im0.grad))
+        self.assertTrue(_allclose(im_k.grad, im0_k.grad))
+
+    def test_kernel_two_impl_match(self):
+        bs, sz, ch = 16, 256, 8
+        args = dict(kernel_size=3, stride=1, padding=1, dilation=1)
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im0 = im.clone()
+        im.requires_grad = im0.requires_grad = True
+
+        out = pac.packernel2d(im, native_impl=False, **args)[0]
+        out0 = pac.packernel2d(im0, native_impl=True, **args)[0]
+
+        out.sum().backward()
+        out0.sum().backward()
+        self.assertTrue(_allclose(out.detach(), out0.detach()))
+        self.assertTrue(_allclose(im.grad, im0.grad))
+
+    # Tests below pass on small input sizes, but may fail on larger ones
+
+    @repeat_impl_types
+    def test_conv_sum_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 10, 3, 5, 2, 4
+        conv_args = dict(stride=1, padding=2, dilation=2)
+        kernel_args = dict(kernel_size=f_sz, smooth_kernel=None, inv_alpha=None, inv_lambda=None,
+                           kernel_type='gaussian', smooth_kernel_type='none',
+                           channel_wise=False, normalize_kernel=False, transposed=False,
+                           **conv_args)
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(out_ch, in_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv2d(in0,
+                                                 pac.packernel2d(in1, **kernel_args)[0],
+                                                 w, b, native_impl=native_impl, **conv_args).sum(),
+            (im, im_k, conv_w, conv_b), rtol=0.01))
+
+    @repeat_impl_types
+    def test_conv_transpose_sum_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 3, 3, 3, 2, 3
+        conv_args = dict(stride=2, padding=1, output_padding=1, dilation=1)
+        kernel_args = dict(kernel_size=f_sz, smooth_kernel=None, inv_alpha=None, inv_lambda=None,
+                           kernel_type='gaussian', smooth_kernel_type='none',
+                           channel_wise=False, normalize_kernel=False, transposed=True,
+                           **conv_args)
+        k_with_d = (f_sz - 1) * conv_args['dilation'] + 1
+        sz_out = (sz - 1) * conv_args['stride'] - 2 * conv_args['padding'] + k_with_d + conv_args['output_padding']
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz_out, sz_out).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(in_ch, out_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv_transpose2d(in0,
+                                                           pac.packernel2d(in1, **kernel_args)[0],
+                                                           w, b, native_impl=native_impl, **conv_args).sum(),
+            (im, im_k, conv_w, conv_b), rtol=0.01))
+
+    @repeat_impl_types
+    def test_pool_sum_grad(self, native_impl):
+        bs, sz, ch, k_ch = 2, 8, 2, 3
+        args = dict(kernel_size=5, stride=2, padding=4, dilation=2)
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, k_ch, sz, sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        pool = pac.PacPool2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(lambda x, y: pool(x, y).sum(), (im, im_k), rtol=0.01))
+
+    @repeat_impl_types
+    def test_kernel_sum_grad(self, native_impl):
+        bs, sz, ch = 2, 4, 4
+        args = dict(kernel_size=3, stride=2, padding=1, dilation=1)
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im.requires_grad = True
+        self.assertTrue(_gradcheck(lambda x: pac.packernel2d(x, native_impl=native_impl, **args)[0].sum(),
+                                   (im,), rtol=0.01))
+
+    @repeat_impl_types
+    def test_conv_with_kernel_input_sum_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 10, 3, 5, 2, 4
+        args = dict(stride=1, padding=2, dilation=2)
+        out_sz = int(np.floor((sz + 2 * args['padding'] - (f_sz - 1) * args['dilation'] - 1) / args['stride'])) + 1
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, 1, f_sz, f_sz, out_sz, out_sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(out_ch, in_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv2d(in0, in1, w, b, native_impl=native_impl, **args).sum(),
+            (im, im_k, conv_w, conv_b), rtol=0.01))
+
+    @repeat_impl_types
+    def test_conv_transpose_with_kernel_input_sum_all_grad(self, native_impl):
+        bs, sz, k_ch, f_sz, in_ch, out_ch = 2, 3, 3, 3, 2, 3
+        args = dict(stride=2, padding=1, output_padding=1, dilation=1)
+        k_with_d = (f_sz - 1) * args['dilation'] + 1
+        sz_out = (sz - 1) * args['stride'] - 2 * args['padding'] + k_with_d + args['output_padding']
+        im = th.rand(bs, in_ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, 1, f_sz, f_sz, sz_out, sz_out).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+
+        conv_w = th.rand(in_ch, out_ch, f_sz, f_sz).double().to(self.device)
+        conv_b = th.rand(out_ch).double().to(self.device)
+        self.assertTrue(_gradcheck(
+            lambda in0, in1, w, b: pac.pacconv_transpose2d(in0, in1, w, b, native_impl=native_impl, **args).sum(),
+            (im, im_k, conv_w, conv_b), rtol=0.01))
+
+    @repeat_impl_types
+    def test_pool_with_kernel_input_sum_grad(self, native_impl):
+        bs, sz, ch = 2, 8, 2
+        args = dict(kernel_size=3, stride=2, padding=2, dilation=2)
+        out_sz = int(np.floor(
+            (sz + 2 * args['padding'] - (args['kernel_size'] - 1) * args['dilation'] - 1) / args['stride'])) + 1
+        im = th.rand(bs, ch, sz, sz).double().to(self.device)
+        im_k = th.rand(bs, 1, args['kernel_size'], args['kernel_size'], out_sz, out_sz).double().to(self.device)
+        im.requires_grad = im_k.requires_grad = True
+        pool = pac.PacPool2d(native_impl=native_impl, **args).double().to(self.device)
+        self.assertTrue(_gradcheck(lambda x, y: pool(x, None, y).sum(),
+                                   (im, im_k), rtol=0.01))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/__init__.py
new file mode 100644
index 0000000..da8efae
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/__init__.py
@@ -0,0 +1,8 @@
+from .data_container import DataContainer
+from .distributed import MMDistributedDataParallel
+from .scatter_gather import scatter, scatter_kwargs
+
+__all__ = [
+    'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
+    'scatter', 'scatter_kwargs'
+]
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/_functions.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/_functions.py
new file mode 100644
index 0000000..75bb954
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/_functions.py
@@ -0,0 +1,74 @@
+import torch
+from torch.nn.parallel._functions import _get_stream
+
+
+def scatter(input, devices, streams=None):
+    """Scatters tensor across multiple GPUs.
+    """
+    if streams is None:
+        streams = [None] * len(devices)
+
+    if isinstance(input, list):
+        chunk_size = (len(input) - 1) // len(devices) + 1
+        outputs = [
+            scatter(input[i], [devices[i // chunk_size]],
+                    [streams[i // chunk_size]]) for i in range(len(input))
+        ]
+        return outputs
+    elif isinstance(input, torch.Tensor):
+        output = input.contiguous()
+        # TODO: copy to a pinned buffer first (if copying from CPU)
+        stream = streams[0] if output.numel() > 0 else None
+        with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
+            output = output.cuda(devices[0], non_blocking=True)
+        return output
+    else:
+        raise Exception('Unknown type {}.'.format(type(input)))
+
+
+def synchronize_stream(output, devices, streams):
+    if isinstance(output, list):
+        chunk_size = len(output) // len(devices)
+        for i in range(len(devices)):
+            for j in range(chunk_size):
+                synchronize_stream(output[i * chunk_size + j], [devices[i]],
+                                   [streams[i]])
+    elif isinstance(output, torch.Tensor):
+        if output.numel() != 0:
+            with torch.cuda.device(devices[0]):
+                main_stream = torch.cuda.current_stream()
+                main_stream.wait_stream(streams[0])
+                output.record_stream(main_stream)
+    else:
+        raise Exception('Unknown type {}.'.format(type(output)))
+
+
+def get_input_device(input):
+    if isinstance(input, list):
+        for item in input:
+            input_device = get_input_device(item)
+            if input_device != -1:
+                return input_device
+        return -1
+    elif isinstance(input, torch.Tensor):
+        return input.get_device() if input.is_cuda else -1
+    else:
+        raise Exception('Unknown type {}.'.format(type(input)))
+
+
+class Scatter(object):
+
+    @staticmethod
+    def forward(target_gpus, input):
+        input_device = get_input_device(input)
+        streams = None
+        if input_device == -1:
+            # Perform CPU to GPU copies in a background stream
+            streams = [_get_stream(device) for device in target_gpus]
+
+        outputs = scatter(input, target_gpus, streams)
+        # Synchronize with the copy stream
+        if streams is not None:
+            synchronize_stream(outputs, target_gpus, streams)
+
+        return tuple(outputs)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_container.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_container.py
new file mode 100644
index 0000000..078cabf
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_container.py
@@ -0,0 +1,76 @@
+import functools
+
+import torch
+
+
+def assert_tensor_type(func):
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not isinstance(args[0].data, torch.Tensor):
+            raise AttributeError('{} has no attribute {} for type {}'.format(
+                args[0].__class__.__name__, func.__name__, args[0].datatype))
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+class DataContainer(object):
+    """A container for any type of objects.
+
+    Typically tensors will be stacked in the collate function and sliced along
+    some dimension in the scatter function. This behavior has some limitations.
+    1. All tensors have to be the same size.
+    2. Types are limited (numpy array or Tensor).
+
+    We design `DataContainer` and `MMDataParallel` to overcome these
+    limitations. The behavior can be either of the following.
+
+    - copy to GPU, pad all tensors to the same size and stack them
+    - copy to GPU without stacking
+    - leave the objects as is and pass it to the model
+    """
+
+    def __init__(self, data, stack=False, padding_value=0, cpu_only=False):
+        self._data = data
+        self._cpu_only = cpu_only
+        self._stack = stack
+        self._padding_value = padding_value
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__, repr(self.data))
+
+    @property
+    def data(self):
+        return self._data
+
+    @property
+    def datatype(self):
+        if isinstance(self.data, torch.Tensor):
+            return self.data.type()
+        else:
+            return type(self.data)
+
+    @property
+    def cpu_only(self):
+        return self._cpu_only
+
+    @property
+    def stack(self):
+        return self._stack
+
+    @property
+    def padding_value(self):
+        return self._padding_value
+
+    @assert_tensor_type
+    def size(self, *args, **kwargs):
+        return self.data.size(*args, **kwargs)
+
+    @assert_tensor_type
+    def dim(self):
+        return self.data.dim()
+
+    @assert_tensor_type
+    def numel(self):
+        return self.data.numel()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_parallel.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_parallel.py
new file mode 100644
index 0000000..8f00af1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/data_parallel.py
@@ -0,0 +1,243 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+"""Encoding Data Parallel"""
+import functools
+import threading
+
+import torch
+import torch.cuda.comm as comm
+from torch.autograd import Function
+from torch.nn.parallel._functions import Broadcast
+from torch.nn.parallel.data_parallel import DataParallel
+from torch.nn.parallel.parallel_apply import get_a_var
+from torch.nn.parallel.scatter_gather import gather
+
+from models.protoseg_core.lib.extensions.parallel.scatter_gather import scatter_kwargs
+
+torch_ver = torch.__version__[:3]
+
+
+class Reduce(Function):
+    @staticmethod
+    def forward(ctx, *inputs):
+        ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
+        inputs = sorted(inputs, key=lambda i: i.get_device())
+        return comm.reduce_add(inputs)
+
+    @staticmethod
+    def backward(ctx, gradOutput):
+        return Broadcast.apply(ctx.target_gpus, gradOutput)
+
+
+class DataParallelModel(DataParallel):
+    """Implements data parallelism at the module level.
+
+    This container parallelizes the application of the given module by
+    splitting the input across the specified devices by chunking in the
+    batch dimension.
+    In the forward pass, the module is replicated on each device,
+    and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
+    Note that the outputs are not gathered, please use compatible
+    :class:`encoding.parallel.DataParallelCriterion`.
+
+    The batch size should be larger than the number of GPUs used. It should
+    also be an integer multiple of the number of GPUs so that each chunk is
+    the same size (so that each GPU processes the same number of samples).
+
+    Args:
+        module: module to be parallelized
+        device_ids: CUDA devices (default: all devices)
+
+    Reference:
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
+        Amit Agrawal. "Context Encoding for Semantic Segmentation.
+        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
+
+    Example::
+
+        >>> net = DataParallelModel(model, device_ids=[0, 1, 2])
+        >>> y = net(x)
+    """
+    def __init__(self, module, device_ids=None, output_device=None, dim=0, gather_=True):
+        super(DataParallelModel, self).__init__(module, device_ids, output_device, dim)
+        self.gather_ = gather_
+
+    def gather(self, outputs, output_device):
+        if self.gather_:
+            return gather(outputs, output_device, dim=self.dim)
+
+        return outputs
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def replicate(self, module, device_ids):
+        modules = super(DataParallelModel, self).replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+
+
+class DataParallelCriterion(DataParallel):
+    """
+    Calculate loss in multiple-GPUs, which balance the memory usage for
+    Semantic Segmentation.
+    The targets are splitted across the specified devices by chunking in
+    the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
+    Reference:
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
+        Amit Agrawal. “Context Encoding for Semantic Segmentation.
+        *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
+    Example::
+        >>> net = DataParallelModel(model, device_ids=[0, 1, 2])
+        >>> criterion = DataParallelCriterion(criterion, device_ids=[0, 1, 2])
+        >>> y = net(x)
+        >>> loss = criterion(y, target)
+    """
+    def __init__(self, module, device_ids=None, output_device=None, dim=0):
+        super(DataParallelCriterion, self).__init__(module, device_ids, output_device, dim)
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def forward(self, inputs, *targets, gathered=True, **kwargs):
+        # input should be already scatterd
+        # scattering the targets instead
+        if gathered:
+            if isinstance(inputs, (list, tuple)):
+                inputs, _ = self.scatter(inputs, kwargs, self.device_ids)
+            else:
+                inputs, _ = self.scatter([inputs], kwargs, self.device_ids)
+                # inputs = tuple(inputs_per_gpu[0] for inputs_per_gpu in inputs)
+
+        if not self.device_ids:
+            return self.module(inputs, *targets, **kwargs)
+
+        targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
+        if len(self.device_ids) == 1:
+            return self.module(inputs[0], *targets[0], **kwargs[0])
+
+        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
+        # targets = tuple(targets_per_gpu[0] for targets_per_gpu in targets)
+        outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
+        return Reduce.apply(*outputs) / len(outputs)
+
+
+def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
+    assert len(modules) == len(inputs)
+    assert len(targets) == len(inputs)
+    if kwargs_tup:
+        assert len(modules) == len(kwargs_tup)
+    else:
+        kwargs_tup = ({},) * len(modules)
+    if devices is not None:
+        assert len(modules) == len(devices)
+    else:
+        devices = [None] * len(modules)
+
+    lock = threading.Lock()
+    results = {}
+    if torch_ver != "0.3":
+        grad_enabled = torch.is_grad_enabled()
+
+    def _worker(i, module, input, target, kwargs, device=None):
+        if torch_ver != "0.3":
+            torch.set_grad_enabled(grad_enabled)
+        if device is None:
+            device = get_a_var(input).get_device()
+        try:
+            with torch.cuda.device(device):
+                output = module(input, *target, **kwargs)
+            with lock:
+                results[i] = output
+        except Exception as e:
+            with lock:
+                results[i] = e
+
+    if len(modules) > 1:
+        threads = [threading.Thread(target=_worker,
+                                    args=(i, module, input, target,
+                                          kwargs, device),)
+                   for i, (module, input, target, kwargs, device) in
+                   enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
+
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+    else:
+        _worker(0, modules[0], inputs[0], targets[0], kwargs_tup[0], devices[0])
+
+    outputs = []
+    for i in range(len(inputs)):
+        output = results[i]
+        if isinstance(output, Exception):
+            raise output
+        outputs.append(output)
+    return outputs
+
+
+###########################################################################
+# Adapted from Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+#
+class CallbackContext(object):
+    pass
+
+
+def execute_replication_callbacks(modules):
+    """
+    Execute an replication callback `__data_parallel_replicate__` on each module created
+    by original replication.
+
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+
+    Note that, as all modules are isomorphism, we assign each sub-module with a context
+    (shared among multiple copies of this module on different devices).
+    Through this context, different copies can share some information.
+
+    We guarantee that the callback on the master copy (the first copy) will be called ahead
+    of calling the callback of any slave copies.
+    """
+    master_copy = modules[0]
+    nr_modules = len(list(master_copy.modules()))
+    ctxs = [CallbackContext() for _ in range(nr_modules)]
+
+    for i, module in enumerate(modules):
+        for j, m in enumerate(module.modules()):
+            if hasattr(m, '__data_parallel_replicate__'):
+                m.__data_parallel_replicate__(ctxs[j], i)
+
+
+def patch_replication_callback(data_parallel):
+    """
+    Monkey-patch an existing `DataParallel` object. Add the replication callback.
+    Useful when you have customized `DataParallel` implementation.
+
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
+        > patch_replication_callback(sync_bn)
+        # this is equivalent to
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+    """
+
+    assert isinstance(data_parallel, DataParallel)
+
+    old_replicate = data_parallel.replicate
+
+    @functools.wraps(old_replicate)
+    def new_replicate(module, device_ids):
+        modules = old_replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+
+    data_parallel.replicate = new_replicate
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/distributed.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/distributed.py
new file mode 100644
index 0000000..1fec225
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/distributed.py
@@ -0,0 +1,47 @@
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
+                          _take_tensors)
+
+from .scatter_gather import scatter_kwargs
+
+
+class MMDistributedDataParallel(nn.Module):
+
+    def __init__(self, module, dim=0, broadcast_buffers=True,
+                 bucket_cap_mb=25):
+        super(MMDistributedDataParallel, self).__init__()
+        self.module = module
+        self.dim = dim
+        self.broadcast_buffers = broadcast_buffers
+
+        self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024
+        self._sync_params()
+
+    def _dist_broadcast_coalesced(self, tensors, buffer_size):
+        for tensors in _take_tensors(tensors, buffer_size):
+            flat_tensors = _flatten_dense_tensors(tensors)
+            dist.broadcast(flat_tensors, 0)
+            for tensor, synced in zip(
+                    tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
+                tensor.copy_(synced)
+
+    def _sync_params(self):
+        module_states = list(self.module.state_dict().values())
+        if len(module_states) > 0:
+            self._dist_broadcast_coalesced(module_states,
+                                           self.broadcast_bucket_size)
+        if self.broadcast_buffers:
+            buffers = [b.data for b in self.module._all_buffers()]
+            if len(buffers) > 0:
+                self._dist_broadcast_coalesced(buffers,
+                                               self.broadcast_bucket_size)
+
+    def scatter(self, inputs, kwargs, device_ids):
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+
+    def forward(self, *inputs, **kwargs):
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        return self.module(*inputs[0], **kwargs[0])
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/scatter_gather.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/scatter_gather.py
new file mode 100644
index 0000000..a59284f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/parallel/scatter_gather.py
@@ -0,0 +1,55 @@
+import torch
+from torch.nn.parallel._functions import Scatter as OrigScatter
+
+from models.protoseg_core.lib.extensions.parallel.data_container import DataContainer
+from models.protoseg_core.lib.extensions.parallel._functions import Scatter
+
+
+def scatter(inputs, target_gpus, dim=0):
+    """Scatter inputs to target gpus.
+
+    The only difference from original :func:`scatter` is to add support for
+    :type:`~mmcv.parallel.DataContainer`.
+    """
+
+    def scatter_map(obj):
+        if isinstance(obj, torch.Tensor):
+            return OrigScatter.apply(target_gpus, None, dim, obj)
+        if isinstance(obj, DataContainer):
+            if obj.cpu_only:
+                return obj.data
+            else:
+                return Scatter.forward(target_gpus, obj.data)
+        if isinstance(obj, tuple) and len(obj) > 0:
+            return list(zip(*map(scatter_map, obj)))
+        if isinstance(obj, list) and len(obj) > 0:
+            out = list(map(list, zip(*map(scatter_map, obj))))
+            return out
+        if isinstance(obj, dict) and len(obj) > 0:
+            out = list(map(type(obj), zip(*map(scatter_map, obj.items()))))
+            return out
+
+        return [obj for targets in target_gpus]
+
+    # After scatter_map is called, a scatter_map cell will exist. This cell
+    # has a reference to the actual function scatter_map, which has references
+    # to a closure that has a reference to the scatter_map cell (because the
+    # fn is recursive). To avoid this reference cycle, we set the function to
+    # None, clearing the cell
+    try:
+        return scatter_map(inputs)
+    finally:
+        scatter_map = None
+
+
+def scatter_kwargs(inputs, kwargs, target_gpus, dim=0):
+    """Scatter with support for kwargs dictionary"""
+    inputs = scatter(inputs, target_gpus, dim) if inputs else []
+    kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
+    if len(inputs) < len(kwargs):
+        inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
+    elif len(kwargs) < len(inputs):
+        kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
+    inputs = tuple(inputs)
+    kwargs = tuple(kwargs)
+    return inputs, kwargs
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/switchablenorms/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/switchablenorms/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/switchablenorms/switchable_norm.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/switchablenorms/switchable_norm.py
new file mode 100644
index 0000000..21e5bd5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/switchablenorms/switchable_norm.py
@@ -0,0 +1,219 @@
+import torch
+import torch.nn as nn
+
+
+class SwitchNorm1d(nn.Module):
+    def __init__(self, num_features, eps=1e-5, momentum=0.997, using_moving_average=True):
+        super(SwitchNorm1d, self).__init__()
+        self.eps = eps
+        self.momentum = momentum
+        self.using_moving_average = using_moving_average
+        self.weight = nn.Parameter(torch.ones(1, num_features))
+        self.bias = nn.Parameter(torch.zeros(1, num_features))
+        self.mean_weight = nn.Parameter(torch.ones(2))
+        self.var_weight = nn.Parameter(torch.ones(2))
+        self.register_buffer('running_mean', torch.zeros(1, num_features))
+        self.register_buffer('running_var', torch.zeros(1, num_features))
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.running_mean.zero_()
+        self.running_var.zero_()
+        self.weight.data.fill_(1)
+        self.bias.data.zero_()
+
+    def _check_input_dim(self, input):
+        if input.dim() != 2:
+            raise ValueError('expected 2D input (got {}D input)'
+                             .format(input.dim()))
+
+    def forward(self, x):
+        self._check_input_dim(x)
+        mean_ln = x.mean(1, keepdim=True)
+        var_ln = x.var(1, keepdim=True)
+
+        if self.training:
+            mean_bn = x.mean(0, keepdim=True)
+            var_bn = x.var(0, keepdim=True)
+            if self.using_moving_average:
+                self.running_mean.mul_(self.momentum)
+                self.running_mean.add_((1 - self.momentum) * mean_bn.data)
+                self.running_var.mul_(self.momentum)
+                self.running_var.add_((1 - self.momentum) * var_bn.data)
+            else:
+                self.running_mean.add_(mean_bn.data)
+                self.running_var.add_(mean_bn.data ** 2 + var_bn.data)
+        else:
+            mean_bn = torch.autograd.Variable(self.running_mean)
+            var_bn = torch.autograd.Variable(self.running_var)
+
+        softmax = nn.Softmax(0)
+        mean_weight = softmax(self.mean_weight)
+        var_weight = softmax(self.var_weight)
+
+        mean = mean_weight[0] * mean_ln + mean_weight[1] * mean_bn
+        var = var_weight[0] * var_ln + var_weight[1] * var_bn
+
+        x = (x - mean) / (var + self.eps).sqrt()
+        return x * self.weight + self.bias
+
+class SwitchNorm2d(nn.Module):
+    def __init__(self, num_features, eps=1e-5, momentum=0.997, using_moving_average=True, using_bn=True,
+                 last_gamma=False):
+        super(SwitchNorm2d, self).__init__()
+        self.eps = eps
+        self.momentum = momentum
+        self.using_moving_average = using_moving_average
+        self.using_bn = using_bn
+        self.last_gamma = last_gamma
+        self.weight = nn.Parameter(torch.ones(1, num_features, 1, 1))
+        self.bias = nn.Parameter(torch.zeros(1, num_features, 1, 1))
+        if self.using_bn:
+            self.mean_weight = nn.Parameter(torch.ones(3))
+            self.var_weight = nn.Parameter(torch.ones(3))
+        else:
+            self.mean_weight = nn.Parameter(torch.ones(2))
+            self.var_weight = nn.Parameter(torch.ones(2))
+        if self.using_bn:
+            self.register_buffer('running_mean', torch.zeros(1, num_features, 1))
+            self.register_buffer('running_var', torch.zeros(1, num_features, 1))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.using_bn:
+            self.running_mean.zero_()
+            self.running_var.zero_()
+        if self.last_gamma:
+            self.weight.data.fill_(0)
+        else:
+            self.weight.data.fill_(1)
+        self.bias.data.zero_()
+
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError('expected 4D input (got {}D input)'
+                             .format(input.dim()))
+
+    def forward(self, x):
+        self._check_input_dim(x)
+        N, C, H, W = x.size()
+        x = x.view(N, C, -1)
+        mean_in = x.mean(-1, keepdim=True)
+        var_in = x.var(-1, keepdim=True)
+
+        mean_ln = mean_in.mean(1, keepdim=True)
+        temp = var_in + mean_in ** 2
+        var_ln = temp.mean(1, keepdim=True) - mean_ln ** 2
+
+        if self.using_bn:
+            if self.training:
+                mean_bn = mean_in.mean(0, keepdim=True)
+                var_bn = temp.mean(0, keepdim=True) - mean_bn ** 2
+                if self.using_moving_average:
+                    self.running_mean.mul_(self.momentum)
+                    self.running_mean.add_((1 - self.momentum) * mean_bn.data)
+                    self.running_var.mul_(self.momentum)
+                    self.running_var.add_((1 - self.momentum) * var_bn.data)
+                else:
+                    self.running_mean.add_(mean_bn.data)
+                    self.running_var.add_(mean_bn.data ** 2 + var_bn.data)
+            else:
+                mean_bn = torch.autograd.Variable(self.running_mean)
+                var_bn = torch.autograd.Variable(self.running_var)
+
+        softmax = nn.Softmax(0)
+        mean_weight = softmax(self.mean_weight)
+        var_weight = softmax(self.var_weight)
+
+        if self.using_bn:
+            mean = mean_weight[0] * mean_in + mean_weight[1] * mean_ln + mean_weight[2] * mean_bn
+            var = var_weight[0] * var_in + var_weight[1] * var_ln + var_weight[2] * var_bn
+        else:
+            mean = mean_weight[0] * mean_in + mean_weight[1] * mean_ln
+            var = var_weight[0] * var_in + var_weight[1] * var_ln
+
+        x = (x-mean) / (var+self.eps).sqrt()
+        x = x.view(N, C, H, W)
+        return x * self.weight + self.bias
+
+
+class SwitchNorm3d(nn.Module):
+    def __init__(self, num_features, eps=1e-5, momentum=0.997, using_moving_average=True, using_bn=True,
+                 last_gamma=False):
+        super(SwitchNorm3d, self).__init__()
+        self.eps = eps
+        self.momentum = momentum
+        self.using_moving_average = using_moving_average
+        self.using_bn = using_bn
+        self.last_gamma = last_gamma
+        self.weight = nn.Parameter(torch.ones(1, num_features, 1, 1, 1))
+        self.bias = nn.Parameter(torch.zeros(1, num_features, 1, 1, 1))
+        if self.using_bn:
+            self.mean_weight = nn.Parameter(torch.ones(3))
+            self.var_weight = nn.Parameter(torch.ones(3))
+        else:
+            self.mean_weight = nn.Parameter(torch.ones(2))
+            self.var_weight = nn.Parameter(torch.ones(2))
+        if self.using_bn:
+            self.register_buffer('running_mean', torch.zeros(1, num_features, 1))
+            self.register_buffer('running_var', torch.zeros(1, num_features, 1))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.using_bn:
+            self.running_mean.zero_()
+            self.running_var.zero_()
+        if self.last_gamma:
+            self.weight.data.fill_(0)
+        else:
+            self.weight.data.fill_(1)
+        self.bias.data.zero_()
+
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError('expected 5D input (got {}D input)'
+                             .format(input.dim()))
+
+    def forward(self, x):
+        self._check_input_dim(x)
+        N, C, D, H, W = x.size()
+        x = x.view(N, C, -1)
+        mean_in = x.mean(-1, keepdim=True)
+        var_in = x.var(-1, keepdim=True)
+
+        mean_ln = mean_in.mean(1, keepdim=True)
+        temp = var_in + mean_in ** 2
+        var_ln = temp.mean(1, keepdim=True) - mean_ln ** 2
+
+        if self.using_bn:
+            if self.training:
+                mean_bn = mean_in.mean(0, keepdim=True)
+                var_bn = temp.mean(0, keepdim=True) - mean_bn ** 2
+                if self.using_moving_average:
+                    self.running_mean.mul_(self.momentum)
+                    self.running_mean.add_((1 - self.momentum) * mean_bn.data)
+                    self.running_var.mul_(self.momentum)
+                    self.running_var.add_((1 - self.momentum) * var_bn.data)
+                else:
+                    self.running_mean.add_(mean_bn.data)
+                    self.running_var.add_(mean_bn.data ** 2 + var_bn.data)
+            else:
+                mean_bn = torch.autograd.Variable(self.running_mean)
+                var_bn = torch.autograd.Variable(self.running_var)
+
+        softmax = nn.Softmax(0)
+        mean_weight = softmax(self.mean_weight)
+        var_weight = softmax(self.var_weight)
+
+        if self.using_bn:
+            mean = mean_weight[0] * mean_in + mean_weight[1] * mean_ln + mean_weight[2] * mean_bn
+            var = var_weight[0] * var_in + var_weight[1] * var_ln + var_weight[2] * var_bn
+        else:
+            mean = mean_weight[0] * mean_in + mean_weight[1] * mean_ln
+            var = var_weight[0] * var_in + var_weight[1] * var_ln
+
+        x = (x - mean) / (var + self.eps).sqrt()
+        x = x.view(N, C, D, H, W)
+        return x * self.weight + self.bias
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/allreduce.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/allreduce.py
new file mode 100644
index 0000000..abfc3bd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/allreduce.py
@@ -0,0 +1,47 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+"""Encoding Data Parallel"""
+import torch
+from torch.autograd import Variable, Function
+import torch.cuda.comm as comm
+
+
+torch_ver = torch.__version__[:3]
+
+
+def allreduce(*inputs):
+    """Cross GPU all reduce autograd operation for calculate mean and
+    variance in SyncBN.
+    """
+    return AllReduce.apply(*inputs)
+
+
+class AllReduce(Function):
+    @staticmethod
+    def forward(ctx, num_inputs, *inputs):
+        ctx.num_inputs = num_inputs
+        ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
+        inputs = [inputs[i:i + num_inputs]
+                 for i in range(0, len(inputs), num_inputs)]
+        # sort before reduce sum
+        inputs = sorted(inputs, key=lambda i: i[0].get_device())
+        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
+        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
+        return tuple([t for tensors in outputs for t in tensors])
+
+    @staticmethod
+    def backward(ctx, *inputs):
+        inputs = [i.data for i in inputs]
+        inputs = [inputs[i:i + ctx.num_inputs]
+                 for i in range(0, len(inputs), ctx.num_inputs)]
+        results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
+        outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
+        return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/comm.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/comm.py
new file mode 100644
index 0000000..b64bf6b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/comm.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+# File   : comm.py
+# Author : Jiayuan Mao
+# Email  : maojiayuan@gmail.com
+# Date   : 27/01/2018
+# 
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+
+import queue
+import collections
+import threading
+
+__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
+
+
+class FutureResult(object):
+    """A thread-safe future implementation. Used only as one-to-one pipe."""
+
+    def __init__(self):
+        self._result = None
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+
+    def put(self, result):
+        with self._lock:
+            assert self._result is None, 'Previous result has\'t been fetched.'
+            self._result = result
+            self._cond.notify()
+
+    def get(self):
+        with self._lock:
+            if self._result is None:
+                self._cond.wait()
+
+            res = self._result
+            self._result = None
+            return res
+
+
+_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
+_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
+
+
+class SlavePipe(_SlavePipeBase):
+    """Pipe for master-slave communication."""
+
+    def run_slave(self, msg):
+        self.queue.put((self.identifier, msg))
+        ret = self.result.get()
+        self.queue.put(True)
+        return ret
+
+
+class SyncMaster(object):
+    """An abstract `SyncMaster` object.
+
+    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
+    call `register(id)` and obtain an `SlavePipe` to communicate with the master.
+    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
+    and passed to a registered callback.
+    - After receiving the messages, the master device should gather the information and determine to message passed
+    back to each slave devices.
+    """
+
+    def __init__(self, master_callback):
+        """
+
+        Args:
+            master_callback: a callback to be invoked after having collected messages from slave devices.
+        """
+        self._master_callback = master_callback
+        self._queue = queue.Queue()
+        self._registry = collections.OrderedDict()
+        self._activated = False
+
+    def register_slave(self, identifier):
+        """
+        Register an slave device.
+
+        Args:
+            identifier: an identifier, usually is the device id.
+
+        Returns: a `SlavePipe` object which can be used to communicate with the master device.
+
+        """
+        if self._activated:
+            assert self._queue.empty(), 'Queue is not clean before next initialization.'
+            self._activated = False
+            self._registry.clear()
+        future = FutureResult()
+        self._registry[identifier] = _MasterRegistry(future)
+        return SlavePipe(identifier, self._queue, future)
+
+    def run_master(self, master_msg):
+        """
+        Main entry for the master device in each forward pass.
+        The messages were first collected from each devices (including the master device), and then
+        an callback will be invoked to compute the message to be sent back to each devices
+        (including the master device).
+
+        Args:
+            master_msg: the message that the master want to send to itself. This will be placed as the first
+            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
+
+        Returns: the message to be sent back to the master device.
+
+        """
+        self._activated = True
+
+        intermediates = [(0, master_msg)]
+        for i in range(self.nr_slaves):
+            intermediates.append(self._queue.get())
+
+        results = self._master_callback(intermediates)
+        assert results[0][0] == 0, 'The first result should belongs to the master.'
+
+        for i, res in results:
+            if i == 0:
+                continue
+            self._registry[i].result.put(res)
+
+        for i in range(self.nr_slaves):
+            assert self._queue.get() is True
+
+        return results[0][1]
+
+    @property
+    def nr_slaves(self):
+        return len(self._registry)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/module.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/module.py
new file mode 100644
index 0000000..f4bdb1c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/module.py
@@ -0,0 +1,317 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+"""Synchronized Cross-GPU Batch Normalization Module"""
+import collections
+import os
+import threading
+
+import torch
+from lib.extensions.syncbn.allreduce import allreduce
+from torch.autograd import Function
+from torch.nn.functional import batch_norm
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
+from torch.utils.cpp_extension import load
+
+from lib.extensions.syncbn.comm import SyncMaster
+
+torch_ver = torch.__version__[:3]
+
+print('compiling/loading syncbn')
+build_path = '/tmp/bulid/syncbn'
+if not os.path.exists(build_path):
+    os.makedirs(build_path)
+
+syncbn = load(name='syncbn', sources=['lib/extensions/syncbn/src/operator.cpp',
+                                      'lib/extensions/syncbn/src/syncbn_kernel.cu'],
+              build_directory=build_path, verbose=True)
+
+
+def sum_square(input):
+    r"""Calculate sum of elements and sum of squares for Batch Normalization"""
+    return _sum_square.apply(input)
+
+
+class _sum_square(Function):
+    @staticmethod
+    def forward(ctx, input):
+        ctx.save_for_backward(input)
+        if input.is_cuda:
+            xsum, xsqusum = syncbn.sumsquare_forward(input)
+        else:
+            raise NotImplemented
+        return xsum, xsqusum
+
+    @staticmethod
+    def backward(ctx, gradSum, gradSquare):
+        input, = ctx.saved_variables
+        if input.is_cuda:
+            gradInput = syncbn.sumsquare_backward(input, gradSum, gradSquare)
+        else:
+            raise NotImplemented
+        return gradInput
+
+
+class _batchnormtrain(Function):
+    @staticmethod
+    def forward(ctx, input, mean, std, gamma, beta):
+        ctx.save_for_backward(input, mean, std, gamma, beta)
+        if input.is_cuda:
+            output = syncbn.batchnorm_forward(input, mean, std, gamma, beta)
+        else:
+            raise NotImplemented
+        return output
+
+    @staticmethod
+    def backward(ctx, gradOutput):
+        input, mean, std, gamma, beta = ctx.saved_variables
+        if gradOutput.is_cuda:
+            gradInput, gradMean, gradStd, gradGamma, gradBeta = \
+                syncbn.batchnorm_backward(gradOutput, input, mean,
+                                           std, gamma, beta, True)
+        else:
+            raise NotImplemented
+        return gradInput, gradMean, gradStd, gradGamma, gradBeta
+
+
+def batchnormtrain(input, mean, std, gamma, beta):
+    r"""Applies Batch Normalization over a 3d input that is seen as a
+    mini-batch.
+
+    .. _encoding.batchnormtrain:
+
+    .. math::
+
+        y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta
+
+    Shape:
+        - Input: :math:`(N, C)` or :math:`(N, C, L)`
+        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
+
+    """
+    return _batchnormtrain.apply(input, mean, std, gamma, beta)
+
+
+class _SyncBatchNorm(_BatchNorm):
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
+        super(_SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
+
+        self._sync_master = SyncMaster(self._data_parallel_master)
+        self._parallel_id = None
+        self._slave_pipe = None
+
+    def forward(self, input):
+        if not self.training:
+            return batch_norm(
+                input, self.running_mean, self.running_var, self.weight, self.bias,
+                self.training, self.momentum, self.eps)
+
+        # Resize the input to (B, C, -1).
+        input_shape = input.size()
+        input = input.view(input_shape[0], self.num_features, -1)
+
+        # sum(x) and sum(x^2)
+        N = input.size(0) * input.size(2)
+        xsum, xsqsum = sum_square(input)
+
+        # all-reduce for global sum(x) and sum(x^2)
+        if self._parallel_id == 0:
+            mean, inv_std = self._sync_master.run_master(_ChildMessage(xsum, xsqsum, N))
+        else:
+            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(xsum, xsqsum, N))
+        # forward
+        return batchnormtrain(input, mean, 1.0/inv_std, self.weight, self.bias).view(input_shape)
+
+    def __data_parallel_replicate__(self, ctx, copy_id):
+        self._parallel_id = copy_id
+
+        # parallel_id == 0 means master device.
+        if self._parallel_id == 0:
+            ctx.sync_master = self._sync_master
+        else:
+            self._slave_pipe = ctx.sync_master.register_slave(copy_id)
+
+    def _data_parallel_master(self, intermediates):
+        """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
+
+        # Always using same "device order" makes the ReduceAdd operation faster.
+        # Thanks to:: Tete Xiao (http://tetexiao.com/)
+        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
+
+        to_reduce = [i[1][:2] for i in intermediates]
+        to_reduce = [j for i in to_reduce for j in i]  # flatten
+        target_gpus = [i[1].sum.get_device() for i in intermediates]
+
+        sum_size = sum([i[1].sum_size for i in intermediates])
+        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
+        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
+
+        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
+
+        outputs = []
+        for i, rec in enumerate(intermediates):
+            outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))
+
+        return outputs
+
+    def _compute_mean_std(self, sum_, ssum, size):
+        """Compute the mean and standard-deviation with sum and square-sum. This method
+        also maintains the moving average on the master device."""
+        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
+        mean = sum_ / size
+        sumvar = ssum - sum_ * mean
+        unbias_var = sumvar / (size - 1)
+        bias_var = sumvar / size
+
+        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
+        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
+
+        return mean, (bias_var + self.eps) ** -0.5
+
+
+# API adapted from https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+_ChildMessage = collections.namedtuple('Message', ['sum', 'ssum', 'sum_size'])
+_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
+
+
+class BatchNorm1d(_SyncBatchNorm):
+    r"""Please see the docs in :class:`encoding.nn.BatchNorm2d`"""
+    def _check_input_dim(self, input):
+        if input.dim() != 2 and input.dim() != 3:
+            raise ValueError('expected 2D or 3D input (got {}D input)'
+                             .format(input.dim()))
+        super(BatchNorm1d, self)._check_input_dim(input)
+
+
+class BatchNorm2d(_SyncBatchNorm):
+    r"""Cross-GPU Synchronized Batch normalization (SyncBN)
+
+    Standard BN [1]_ implementation only normalize the data within each device (GPU).
+    SyncBN normalizes the input within the whole mini-batch.
+    We follow the sync-onece implmentation described in the paper [2]_ .
+    Please see the design idea in the `notes <./notes/syncbn.html>`_.
+
+    .. note::
+        We adapt the awesome python API from another `PyTorch SyncBN Implementation
+        <https://github.com/vacancy/Synchronized-BatchNorm-PyTorch>`_ and provide
+        efficient CUDA backend.
+
+    .. math::
+
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+
+    The mean and standard-deviation are calculated per-channel over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+
+    During evaluation, this running mean/variance is used for normalization.
+
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
+
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+
+    Reference:
+        .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." *ICML 2015*
+        .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018*
+
+    Examples:
+        >>> m = BatchNorm2d(100)
+        >>> net = torch.nn.DataParallel(m)
+        >>> syncbn.parallel.patch_replication_callback(net)
+        >>> output = net(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError('expected 4D input (got {}D input)'
+                             .format(input.dim()))
+        super(BatchNorm2d, self)._check_input_dim(input)
+
+
+class BatchNorm3d(_SyncBatchNorm):
+    r"""Please see the docs in :class:`encoding.nn.BatchNorm2d`"""
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError('expected 5D input (got {}D input)'
+                             .format(input.dim()))
+        super(BatchNorm3d, self)._check_input_dim(input)
+
+
+class SharedTensor(object):
+    """Shared Tensor for cross GPU all reduce operation"""
+    def __init__(self, nGPUs):
+        self.mutex = threading.Lock()
+        self.all_tasks_done = threading.Condition(self.mutex)
+        self.nGPUs = nGPUs
+        self._clear()
+
+    def _clear(self):
+        self.N = 0
+        self.dict = {}
+        self.push_tasks = self.nGPUs
+        self.reduce_tasks = self.nGPUs
+
+    def push(self, *inputs):
+        # push from device
+        with self.mutex:
+            if self.push_tasks == 0:
+                self._clear()
+            self.N += inputs[0]
+            igpu = inputs[1]
+            self.dict[igpu] = inputs[2:]
+            #idx = self.nGPUs - self.push_tasks
+            self.push_tasks -= 1
+        with self.all_tasks_done:
+            if self.push_tasks == 0:
+                self.all_tasks_done.notify_all()
+            while self.push_tasks:
+                self.all_tasks_done.wait()
+
+    def pull(self, igpu):
+        # pull from device
+        with self.mutex:
+            if igpu == 0:
+                assert(len(self.dict) == self.nGPUs)
+                # flatten the tensors
+                self.list = [t for i in range(len(self.dict)) for t in self.dict[i]]
+                self.outlist = allreduce(2, *self.list)
+                self.reduce_tasks -= 1
+            else:
+                self.reduce_tasks -= 1
+        with self.all_tasks_done:
+            if self.reduce_tasks == 0:
+                self.all_tasks_done.notify_all()
+            while self.reduce_tasks:
+                self.all_tasks_done.wait()
+        # all reduce done
+        return self.N, self.outlist[2*igpu], self.outlist[2*igpu+1]
+
+    def __len__(self):
+        return self.nGPUs
+
+    def __repr__(self):
+        return ('SharedTensor')
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/common.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/common.h
new file mode 100644
index 0000000..59e9f85
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/common.h
@@ -0,0 +1,79 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+static const unsigned WARP_SIZE = 32;
+
+// The maximum number of threads in a block
+static const unsigned MAX_BLOCK_SIZE = 512U;
+
+template<typename In, typename Out>
+struct ScalarConvert {
+  static __host__ __device__ __forceinline__ Out to(const In v) { return (Out) v; }
+};
+
+// Number of threads in a block given an input size up to MAX_BLOCK_SIZE
+static int getNumThreads(int nElem) {
+  int threadSizes[5] = { 32, 64, 128, 256, MAX_BLOCK_SIZE };
+  for (int i = 0; i != 5; ++i) {
+    if (nElem <= threadSizes[i]) {
+      return threadSizes[i];
+    }
+  }
+  return MAX_BLOCK_SIZE;
+}
+
+// Returns the index of the most significant 1 bit in `val`.
+__device__ __forceinline__ int getMSB(int val) {
+  return 31 - __clz(val);
+}
+
+template <typename T>
+__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff)
+{
+#if CUDA_VERSION >= 9000
+    return __shfl_xor_sync(mask, value, laneMask, width);
+#else
+    return __shfl_xor(value, laneMask, width);
+#endif
+}
+
+// Sum across all threads within a warp
+template <typename T>
+static __device__ __forceinline__ T warpSum(T val) {
+#if __CUDA_ARCH__ >= 300
+  for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
+    val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
+  }
+#else
+  __shared__ T values[MAX_BLOCK_SIZE];
+  values[threadIdx.x] = val;
+  __threadfence_block();
+  const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
+  for (int i = 1; i < WARP_SIZE; i++) {
+    val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
+  }
+#endif
+  return val;
+}
+
+template <typename DType, typename Acctype>
+struct Float2 {
+  Acctype v1, v2;
+  __device__ Float2() {}
+  __device__ Float2(DType v1, DType v2) : v1(ScalarConvert<DType, Acctype>::to(v1)), v2(ScalarConvert<DType, Acctype>::to(v2)) {}
+  __device__ Float2(DType v) : v1(ScalarConvert<DType, Acctype>::to(v)), v2(ScalarConvert<DType, Acctype>::to(v)) {}
+  __device__ Float2(int v) : v1(ScalarConvert<int, Acctype>::to(v)), v2(ScalarConvert<int, Acctype>::to(v)) {}
+  __device__ Float2& operator+=(const Float2& a) {
+    v1 += a.v1;
+    v2 += a.v2;
+    return *this;
+  }
+};
+
+template <typename DType, typename Acctype>
+static __device__ __forceinline__ Float2<DType, Acctype> warpSum(Float2<DType, Acctype> value) {
+  value.v1 = warpSum(value.v1);
+  value.v2 = warpSum(value.v2);
+  return value;
+}
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/device_tensor.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/device_tensor.h
new file mode 100644
index 0000000..8a04b32
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/device_tensor.h
@@ -0,0 +1,110 @@
+#include <ATen/ATen.h>
+
+template<typename DType, int Dim>
+struct DeviceTensor {
+ public:
+  inline __device__ __host__ DeviceTensor(DType *p, const int *size)
+    : dptr_(p) {
+    for (int i = 0; i < Dim; ++i) {
+      size_[i] = size ? size[i] : 0;
+    }
+  }
+
+  inline __device__ __host__ unsigned getSize(const int i) const {
+    assert(i < Dim);
+    return size_[i];
+  }
+
+  inline __device__ __host__ int numElements() const {
+    int n = 1;
+    for (int i = 0; i < Dim; ++i) {
+      n *= size_[i];
+    }
+    return n;
+  }
+
+  inline __device__ __host__ DeviceTensor<DType, Dim-1> select(const size_t x) const {
+    assert(Dim > 1);
+    int offset = x;
+    for (int i = 1; i < Dim; ++i) {
+      offset *= size_[i];
+    }
+    DeviceTensor<DType, Dim-1> tensor(dptr_ + offset, nullptr);
+    for (int i = 0; i < Dim - 1; ++i) {
+      tensor.size_[i] = this->size_[i+1];
+    }
+    return tensor;
+  }
+
+  inline __device__ __host__ DeviceTensor<DType, Dim-1> operator[](const size_t x) const {
+    assert(Dim > 1);
+    int offset = x;
+    for (int i = 1; i < Dim; ++i) {
+      offset *= size_[i];
+    }
+    DeviceTensor<DType, Dim-1> tensor(dptr_ + offset, nullptr);
+    for (int i = 0; i < Dim - 1; ++i) {
+      tensor.size_[i] = this->size_[i+1];
+    }
+    return tensor;
+  }
+
+  inline __device__ __host__ size_t InnerSize() const {
+    assert(Dim >= 3);
+    size_t sz = 1;
+    for (size_t i = 2; i < Dim; ++i) {
+      sz *= size_[i];
+    }
+    return sz;
+  }
+
+  inline __device__ __host__ size_t ChannelCount() const {
+    assert(Dim >= 3);
+    return size_[1];
+  }
+
+  inline __device__ __host__ DType* data_ptr() const {
+    return dptr_;
+  }
+
+  DType *dptr_;
+  int size_[Dim];
+};
+
+template<typename DType>
+struct DeviceTensor<DType, 1> {
+  inline __device__ __host__ DeviceTensor(DType *p, const int *size)
+    : dptr_(p) {
+    size_[0] = size ? size[0] : 0;
+  }
+
+  inline __device__ __host__ unsigned getSize(const int i) const {
+    assert(i == 0);
+    return size_[0];
+  }
+
+  inline __device__ __host__ int numElements() const {
+    return size_[0];
+  }
+
+  inline __device__ __host__ DType &operator[](const size_t x) const {
+      return *(dptr_ + x);
+  }
+
+  inline __device__ __host__ DType* data_ptr() const {
+    return dptr_;
+  }
+
+  DType *dptr_;
+  int size_[1];
+};
+
+template<typename DType, int Dim>
+static DeviceTensor<DType, Dim> devicetensor(const at::Tensor &blob) {
+  DType *data = blob.data<DType>();
+  DeviceTensor<DType, Dim> tensor(data, nullptr);
+  for (int i = 0; i < Dim; ++i) {
+    tensor.size_[i] = blob.size(i);
+  }
+  return tensor;
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.cpp
new file mode 100644
index 0000000..4b59359
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.cpp
@@ -0,0 +1,8 @@
+#include "operator.h"
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)");
+  m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)");
+  m.def("sumsquare_forward", &Sum_Square_Forward_CUDA, "SumSqu forward (CUDA)");
+  m.def("sumsquare_backward", &Sum_Square_Backward_CUDA, "SumSqu backward (CUDA)");
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.h b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.h
new file mode 100644
index 0000000..59ac564
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/operator.h
@@ -0,0 +1,27 @@
+#include <torch/torch.h>
+#include <vector>
+
+
+at::Tensor BatchNorm_Forward_CUDA(
+  const at::Tensor input_, 
+  const at::Tensor mean_,
+  const at::Tensor std_,
+  const at::Tensor gamma_,
+  const at::Tensor beta_);
+
+std::vector<at::Tensor> BatchNorm_Backward_CUDA(
+  const at::Tensor gradoutput_,
+  const at::Tensor input_,
+  const at::Tensor mean_, 
+  const at::Tensor std_,
+  const at::Tensor gamma_,
+  const at::Tensor beta_, 
+  bool train);
+
+std::vector<at::Tensor> Sum_Square_Forward_CUDA(
+  const at::Tensor input_);
+
+at::Tensor Sum_Square_Backward_CUDA(
+  const at::Tensor input_,
+  const at::Tensor gradSum_,
+  const at::Tensor gradSquare_);
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_cpu.cpp b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_cpu.cpp
new file mode 100644
index 0000000..66269b5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_cpu.cpp
@@ -0,0 +1,58 @@
+#include <ATen/ATen.h>
+#include <vector>
+
+at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
+  if (x.ndimension() == 2) {
+    return v;
+  } else {
+    std::vector<int64_t> broadcast_size = {1, -1};
+    for (int64_t i = 2; i < x.ndimension(); ++i)
+      broadcast_size.push_back(1);
+     return v.view(broadcast_size);
+  }
+}
+at::Tensor BatchNorm_Forward_CPU(
+    const at::Tensor input,
+    const at::Tensor mean,
+    const at::Tensor std,
+    const at::Tensor gamma,
+    const at::Tensor beta) {
+  auto output = (input - broadcast_to(mean, input)) / broadcast_to(std, input);
+  output = output * broadcast_to(gamma, input) + broadcast_to(beta, input);
+  return output;
+}
+
+// Not implementing CPU backward for now
+std::vector<at::Tensor> BatchNorm_Backward_CPU(
+    const at::Tensor gradoutput,
+    const at::Tensor input,
+    const at::Tensor mean,
+    const at::Tensor std,
+    const at::Tensor gamma,
+    const at::Tensor beta,
+    bool train) {
+  /* outputs*/
+  at::Tensor gradinput = at::zeros_like(input);
+  at::Tensor gradgamma = at::zeros_like(gamma);
+  at::Tensor gradbeta = at::zeros_like(beta);
+  at::Tensor gradMean = at::zeros_like(mean);
+  at::Tensor gradStd = at::zeros_like(std);
+  return {gradinput, gradMean, gradStd, gradgamma, gradbeta};
+}
+
+std::vector<at::Tensor> Sum_Square_Forward_CPU(
+    const at::Tensor input) {
+  /* outputs */
+  at::Tensor sum = input.type().tensor({input.size(1)}).zero_();
+  at::Tensor square = input.type().tensor({input.size(1)}).zero_();
+  return {sum, square};
+}
+
+at::Tensor Sum_Square_Backward_CPU(
+    const at::Tensor input,
+    const at::Tensor gradSum,
+    const at::Tensor gradSquare) {
+  /* outputs */
+  at::Tensor gradInput = at::zeros_like(input);
+  return gradInput;
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_kernel.cu b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_kernel.cu
new file mode 100644
index 0000000..60860e4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/extensions/syncbn/src/syncbn_kernel.cu
@@ -0,0 +1,289 @@
+#include <ATen/ATen.h>
+#include <vector>
+
+#include "common.h"
+#include "device_tensor.h"
+
+namespace {
+
+template <typename DType, typename Acctype, typename DeviceTensor3>
+struct GradOp {
+  __device__ GradOp(Acctype m, const DeviceTensor3 i, const DeviceTensor3 g)
+    : mean(m), input(i), gradOutput(g) {}
+  __device__ __forceinline__ Float2<DType, Acctype> operator()(int batch, int plane, int n) {
+    DType g = gradOutput[batch][plane][n];
+    DType c = ScalarConvert<Acctype, DType>::to(input[batch][plane][n] - mean);
+    return Float2<DType, Acctype>(g, g * c);
+  }
+  const Acctype mean;
+  const DeviceTensor3 input;
+  const DeviceTensor3 gradOutput;
+};
+
+template <typename DType, typename Acctype>
+struct SumOp {
+  __device__ SumOp(DeviceTensor<DType, 3> i) : input(i){}
+  __device__ __forceinline__ Float2<DType, Acctype> operator()(int batch, int plane, int n) {
+    DType g = input[batch][plane][n];
+    return Float2<DType, Acctype>(g, g * g);
+  }
+  DType mean;
+  DeviceTensor<DType, 3> input;
+};
+
+// Sum across (batch, x/y/z) applying Op() pointwise
+template<typename T, typename Op, typename DeviceTensor3>
+__device__ T reduce(Op op, DeviceTensor3 tensor, int plane) {
+  T sum = (T)0;
+  for (int batch = 0; batch < tensor.getSize(0); ++batch) {
+    for (int x = threadIdx.x; x < tensor.getSize(2); x += blockDim.x) {
+      sum += op(batch, plane, x);
+    }
+  }
+
+  // sum over NumThreads within a warp
+  sum = warpSum(sum);
+
+  // 'transpose', and reduce within warp again
+  __shared__ T shared[32];
+  __syncthreads();
+  if (threadIdx.x % WARP_SIZE == 0) {
+    shared[threadIdx.x / WARP_SIZE] = sum;
+  }
+  if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
+    // zero out the other entries in shared
+    shared[threadIdx.x] = (T)0;
+  }
+  __syncthreads();
+  if (threadIdx.x / WARP_SIZE == 0) {
+    sum = warpSum(shared[threadIdx.x]);
+    if (threadIdx.x == 0) {
+      shared[0] = sum;
+    }
+  }
+  __syncthreads();
+
+  // Everyone picks it up, should be broadcast into the whole gradInput
+  return shared[0];
+}
+
+template <typename DType>
+__global__ void BatchNorm_Forward_kernel (
+  DeviceTensor<DType, 3> output,
+  DeviceTensor<DType, 3> input,
+  DeviceTensor<DType, 1> mean,
+  DeviceTensor<DType, 1> std,
+  DeviceTensor<DType, 1> gamma,
+  DeviceTensor<DType, 1> beta) {
+  int c = blockIdx.x;
+  /* main operation */ 
+  for (int b = 0; b < input.getSize(0); ++b) {
+    for (int x = threadIdx.x; x < input.getSize(2); x += blockDim.x) {
+      DType inp = input[b][c][x];
+      output[b][c][x] = gamma[c] * (inp - mean[c]) /
+        std[c] + beta[c];
+    }
+  }
+}
+
+template <typename DType>
+__global__ void BatchNorm_Backward_kernel (
+    DeviceTensor<DType, 3> gradoutput,
+    DeviceTensor<DType, 3> input,
+    DeviceTensor<DType, 3> gradinput,
+    DeviceTensor<DType, 1> gradgamma,
+    DeviceTensor<DType, 1> gradbeta,
+    DeviceTensor<DType, 1> mean,
+    DeviceTensor<DType, 1> std,
+    DeviceTensor<DType, 1> gamma,
+    DeviceTensor<DType, 1> beta,
+    DeviceTensor<DType, 1> gradMean, 
+    DeviceTensor<DType, 1> gradStd,
+    bool train) {
+  /* declarations of the variables */
+  /* Get the index and channels */ 
+  int c = blockIdx.x; 
+  /* main operation */ 
+  GradOp<DType, DType, DeviceTensor<DType, 3>> g(mean[c], input, gradoutput);
+  Float2<DType, DType> res = reduce<Float2<DType, DType>,
+    GradOp<DType, DType, DeviceTensor<DType, 3>>,
+    DeviceTensor<DType, 3>>(g, gradoutput, c);
+  DType gradOutputSum = res.v1;
+  DType dotP = res.v2;
+  DType invstd = DType(1.0) / std[c];
+  DType gradScale = invstd * gamma[c];
+  if (train && threadIdx.x == 0) {
+    gradMean[c] = - gradOutputSum * gamma[c] * invstd;
+    gradStd[c]  = - dotP * gamma[c] * invstd * invstd;
+  }
+  if (gradinput.numElements() > 0) {
+    for (int batch = 0; batch < gradoutput.getSize(0); ++batch) {
+      for (int x = threadIdx.x; x < gradoutput.getSize(2); x += blockDim.x) {
+        gradinput[batch][c][x] = gradoutput[batch][c][x] * gradScale;
+      }
+    }
+  }
+  if (gradgamma.numElements() > 0) {
+    if (threadIdx.x == 0) {
+      gradgamma[c] += dotP * invstd;
+    }
+  }
+  if (gradbeta.numElements() > 0) {
+    if (threadIdx.x == 0) {
+      gradbeta[c] += gradOutputSum;
+    }
+  }
+}
+
+
+template <typename DType>
+__global__ void Sum_Square_Forward_kernel (
+    DeviceTensor<DType, 3> input,
+    DeviceTensor<DType, 1> sum,
+    DeviceTensor<DType, 1> square) {
+  int c = blockIdx.x;
+  /* main operation */ 
+  SumOp<DType, DType> g(input);
+  Float2<DType, DType> res = reduce<Float2<DType, DType>,
+    SumOp<DType, DType>, DeviceTensor<DType, 3>>(g, input, c);
+  DType xsum = res.v1;
+  DType xsquare = res.v2;
+  if (threadIdx.x == 0) {
+    sum[c] = xsum;
+    square[c] = xsquare;
+  }
+}
+
+template <typename DType>
+__global__ void Sum_Square_Backward_kernel (
+  DeviceTensor<DType, 3> gradInput,
+  DeviceTensor<DType, 3> input,
+  DeviceTensor<DType, 1> gradSum,
+  DeviceTensor<DType, 1> gradSquare) {
+  int c = blockIdx.x;
+  /* main operation */ 
+  for (int batch = 0; batch < gradInput.getSize(0); ++batch) {
+    for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x)
+    {
+      gradInput[batch][c][x] = gradSum[c] + 2 * gradSquare[c] *
+          input[batch][c][x];
+    }
+  }   
+}
+
+} // namespcae
+
+at::Tensor BatchNorm_Forward_CUDA(
+    const at::Tensor input_, 
+    const at::Tensor mean_,
+    const at::Tensor std_,
+    const at::Tensor gamma_,
+    const at::Tensor beta_) {
+  auto output_ = at::zeros_like(input_);
+  cudaStream_t stream = at::globalContext().getCurrentCUDAStream();
+  dim3 blocks(input_.size(1));
+  dim3 threads(getNumThreads(input_.size(2)));
+  AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Forward_CUDA", ([&] {
+    /* Device tensors */
+    DeviceTensor<scalar_t, 3> output = devicetensor<scalar_t, 3>(output_);
+    DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
+    DeviceTensor<scalar_t, 1> mean = devicetensor<scalar_t, 1>(mean_);
+    DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
+    DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
+    DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
+    /* kernel function */
+    BatchNorm_Forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
+        output, input, mean, std, gamma, beta);
+  }));
+  AT_ASSERT(cudaGetLastError() == cudaSuccess);
+  return output_;
+}
+
+std::vector<at::Tensor> BatchNorm_Backward_CUDA(
+    const at::Tensor gradoutput_,
+    const at::Tensor input_,
+    const at::Tensor mean_, 
+    const at::Tensor std_,
+    const at::Tensor gamma_,
+    const at::Tensor beta_, 
+    bool train) {
+  /* outputs*/
+  at::Tensor gradinput_ = at::zeros_like(input_);
+  at::Tensor gradgamma_ = at::zeros_like(gamma_);
+  at::Tensor gradbeta_ = at::zeros_like(beta_);
+  at::Tensor gradMean_ = at::zeros_like(mean_);
+  at::Tensor gradStd_ = at::zeros_like(std_);
+  /* cuda utils*/
+  cudaStream_t stream = at::globalContext().getCurrentCUDAStream();
+  dim3 blocks(input_.size(1));
+  dim3 threads(getNumThreads(input_.size(2)));
+  AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Backward_CUDA", ([&] {
+    /* Device tensors */
+    DeviceTensor<scalar_t, 3> gradoutput = devicetensor<scalar_t, 3>(gradoutput_);
+    DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
+    DeviceTensor<scalar_t, 3> gradinput = devicetensor<scalar_t, 3>(gradinput_);
+    DeviceTensor<scalar_t, 1> gradgamma = devicetensor<scalar_t, 1>(gradgamma_);
+    DeviceTensor<scalar_t, 1> gradbeta = devicetensor<scalar_t, 1>(gradbeta_);
+    DeviceTensor<scalar_t, 1> mean = devicetensor<scalar_t, 1>(mean_);
+    DeviceTensor<scalar_t, 1> std = devicetensor<scalar_t, 1>(std_);
+    DeviceTensor<scalar_t, 1> gamma = devicetensor<scalar_t, 1>(gamma_);
+    DeviceTensor<scalar_t, 1> beta = devicetensor<scalar_t, 1>(beta_);
+    DeviceTensor<scalar_t, 1> gradMean = devicetensor<scalar_t, 1>(gradMean_);
+    DeviceTensor<scalar_t, 1> gradStd = devicetensor<scalar_t, 1>(gradStd_);
+    /* kernel function */
+    BatchNorm_Backward_kernel<scalar_t>
+      <<<blocks, threads, 0, stream>>>(
+      gradoutput, input, gradinput, gradgamma, gradbeta, mean, std, 
+      gamma, beta, gradMean, gradStd, train);
+  }));
+  AT_ASSERT(cudaGetLastError() == cudaSuccess);
+  return {gradinput_, gradMean_, gradStd_, gradgamma_, gradbeta_};
+}
+
+std::vector<at::Tensor> Sum_Square_Forward_CUDA(
+    const at::Tensor input_) {
+  /* outputs */
+  at::Tensor sum_ = input_.type().tensor({input_.size(1)}).zero_();
+  at::Tensor square_ = input_.type().tensor({input_.size(1)}).zero_();
+  // at::Tensor sum_ = torch::zeros({input_.size(1)}, input_.options());
+  // at::Tensor square_ = torch::zeros({input_.size(1)}, input_.options());
+  /* cuda utils*/
+  cudaStream_t stream = at::globalContext().getCurrentCUDAStream();
+  dim3 blocks(input_.size(1));
+  dim3 threads(getNumThreads(input_.size(2)));
+  AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Backward_CUDA", ([&] {
+    /* Device tensors */
+    DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
+    DeviceTensor<scalar_t, 1> sum = devicetensor<scalar_t, 1>(sum_);
+    DeviceTensor<scalar_t, 1> square = devicetensor<scalar_t, 1>(square_);
+    /* kernel function */
+    Sum_Square_Forward_kernel<scalar_t>
+      <<<blocks, threads, 0, stream>>>(input, sum, square);
+  }));
+  AT_ASSERT(cudaGetLastError() == cudaSuccess);
+  return {sum_, square_};
+}
+
+at::Tensor Sum_Square_Backward_CUDA(
+    const at::Tensor input_,
+    const at::Tensor gradSum_,
+    const at::Tensor gradSquare_) {
+  /* outputs */
+  at::Tensor gradInput_ = at::zeros_like(input_);
+  /* cuda utils*/
+  cudaStream_t stream = at::globalContext().getCurrentCUDAStream();
+  dim3 blocks(input_.size(1));
+  dim3 threads(getNumThreads(input_.size(2)));
+  AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Backward_CUDA", ([&] {
+    /* Device tensors */
+    DeviceTensor<scalar_t, 3> gradInput = devicetensor<scalar_t, 3>(gradInput_);
+    DeviceTensor<scalar_t, 3> input = devicetensor<scalar_t, 3>(input_);
+    DeviceTensor<scalar_t, 1> gradSum = devicetensor<scalar_t, 1>(gradSum_);
+    DeviceTensor<scalar_t, 1> gradSquare =devicetensor<scalar_t, 1>(gradSquare_);
+    /* kernel function */
+    Sum_Square_Backward_kernel<scalar_t>
+      <<<blocks, threads, 0, stream>>>(gradInput, input, gradSum, gradSquare);
+  }));
+  AT_ASSERT(cudaGetLastError() == cudaSuccess);
+  return gradInput_;
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/layers.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/layers.py
new file mode 100644
index 0000000..c3c9e5e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/layers.py
@@ -0,0 +1,198 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+
+def eightway_activation(x):
+    """Retrieves neighboring pixels/features on the eight corners from
+  a 3x3 patch.
+
+  Args:
+    x: A tensor of size [batch_size, height_in, width_in, channels]
+
+  Returns:
+    A tensor of size [batch_size, height_in, width_in, channels, 8]
+  """
+    # Get the number of channels in the input.
+    shape_x = list(x.shape)
+    if len(shape_x) != 4:
+        raise ValueError('Only support for 4-D tensors!')
+
+    # Pad at the margin.
+    x = F.pad(x,
+              pad=(0, 0, 1, 1, 1, 1, 0, 0),
+              mode='reflect')
+    # Get eight neighboring pixels/features.
+    x_groups = [
+        x[:, 1:-1, :-2, :].clone(),  # left
+        x[:, 1:-1, 2:, :].clone(),  # right
+        x[:, :-2, 1:-1, :].clone(),  # up
+        x[:, 2:, 1:-1, :].clone(),  # down
+        x[:, :-2, :-2, :].clone(),  # left-up
+        x[:, 2:, :-2, :].clone(),  # left-down
+        x[:, :-2, 2:, :].clone(),  # right-up
+        x[:, 2:, 2:, :].clone()  # right-down
+    ]
+    output = [
+        torch.unsqueeze(c, dim=-1) for c in x_groups
+    ]
+    output = torch.cat(output, dim=-1)
+
+    return output
+
+
+def eightcorner_activation(x, size):
+    """Retrieves neighboring pixels one the eight corners from a
+  (2*size+1)x(2*size+1) patch.
+
+  Args:
+    x: A tensor of size [batch_size, height_in, width_in, channels]
+    size: A number indicating the half size of a patch.
+
+  Returns:
+    A tensor of size [batch_size, height_in, width_in, channels, 8]
+  """
+    # Get the number of channels in the input.
+    shape_x = list(x.shape)
+    if len(shape_x) != 4:
+        raise ValueError('Only support for 4-D tensors!')
+    n, c, h, w = shape_x
+
+    # Pad at the margin.
+    p = size
+    x_pad = F.pad(x,
+                  pad=(p, p, p, p, 0, 0, 0, 0),
+                  mode='constant',
+                  value=0)
+
+    # Get eight corner pixels/features in the patch.
+    x_groups = []
+    for st_y in range(0, 2 * size + 1, size):
+        for st_x in range(0, 2 * size + 1, size):
+            if st_y == size and st_x == size:
+                # Ignore the center pixel/feature.
+                continue
+
+            x_neighbor = x_pad[:, :, st_y:st_y + h, st_x:st_x + w].clone()
+            x_groups.append(x_neighbor)
+
+    output = [torch.unsqueeze(c, dim=-1) for c in x_groups]
+    output = torch.cat(output, dim=-1)
+
+    return output
+
+
+def ignores_from_label(labels, num_classes, size, ignore_index):
+    """Retrieves ignorable pixels from the ground-truth labels.
+
+  This function returns a binary map in which 1 denotes ignored pixels
+  and 0 means not ignored ones. For those ignored pixels, they are not
+  only the pixels with label value >= num_classes, but also the
+  corresponding neighboring pixels, which are on the the eight cornerls
+  from a (2*size+1)x(2*size+1) patch.
+  
+  Args:
+    labels: A tensor of size [batch_size, height_in, width_in], indicating 
+      semantic segmentation ground-truth labels.
+    num_classes: A number indicating the total number of valid classes. The 
+      labels ranges from 0 to (num_classes-1), and any value >= num_classes
+      would be ignored.
+    size: A number indicating the half size of a patch.
+
+  Return:
+    A tensor of size [batch_size, height_in, width_in, 8]
+  """
+    # Get the number of channels in the input.
+    shape_lab = list(labels.shape)
+    if len(shape_lab) != 3:
+        raise ValueError('Only support for 3-D label tensors!')
+    n, h, w = shape_lab
+
+    # Retrieve ignored pixels with label value >= num_classes.
+    # ignore = labels>num_classes-1 # NxHxW
+    ignore = (labels == ignore_index)
+
+    # Pad at the margin.
+    p = size
+    ignore_pad = F.pad(ignore,
+                       pad=(p, p, p, p, 0, 0),
+                       mode='constant',
+                       value=1)
+
+    # Retrieve eight corner pixels from the center, where the center
+    # is ignored. Note that it should be bi-directional. For example,
+    # when computing AAF loss with top-left pixels, the ignored pixels
+    # might be the center or the top-left ones.
+    ignore_groups = []
+    for st_y in range(2 * size, -1, -size):
+        for st_x in range(2 * size, -1, -size):
+            if st_y == size and st_x == size:
+                continue
+            ignore_neighbor = ignore_pad[:, st_y:st_y + h, st_x:st_x + w].clone()
+            mask = ignore_neighbor | ignore
+            ignore_groups.append(mask)
+
+    ig = 0
+    for st_y in range(0, 2 * size + 1, size):
+        for st_x in range(0, 2 * size + 1, size):
+            if st_y == size and st_x == size:
+                continue
+            ignore_neighbor = ignore_pad[:, st_y:st_y + h, st_x:st_x + w].clone()
+            mask = ignore_neighbor | ignore_groups[ig]
+            ignore_groups[ig] = mask
+            ig += 1
+
+    ignore_groups = [
+        torch.unsqueeze(c, dim=-1) for c in ignore_groups
+    ]  # NxHxWx1
+    ignore = torch.cat(ignore_groups, dim=-1)  # NxHxWx8
+
+    return ignore
+
+
+def edges_from_label(labels, size, ignore_class=255):
+    """Retrieves edge positions from the ground-truth labels.
+
+  This function computes the edge map by considering if the pixel values
+  are equal between the center and the neighboring pixels on the eight
+  corners from a (2*size+1)*(2*size+1) patch. Ignore edges where the any
+  of the paired pixels with label value >= num_classes.
+
+  Args:
+    labels: A tensor of size [batch_size, height_in, width_in], indicating 
+      semantic segmentation ground-truth labels.
+    size: A number indicating the half size of a patch.
+    ignore_class: A number indicating the label value to ignore.
+
+  Return:
+    A tensor of size [batch_size, height_in, width_in, 1, 8]
+  """
+    # Get the number of channels in the input.
+    shape_lab = list(labels.shape)
+    if len(shape_lab) != 4:
+        raise ValueError('Only support for 4-D label tensors!')
+    n, h, w, c = shape_lab
+
+    # Pad at the margin.
+    p = size
+    labels_pad = F.pad(
+        labels, pad=(0, 0, p, p, p, p, 0, 0),
+        mode='constant',
+        value=ignore_class)
+
+    # Get the edge by comparing label value of the center and it paired pixels.
+    edge_groups = []
+    for st_y in range(0, 2 * size + 1, size):
+        for st_x in range(0, 2 * size + 1, size):
+            if st_y == size and st_x == size:
+                continue
+            labels_neighbor = labels_pad[:, st_y:st_y + h, st_x:st_x + w]
+            edge = labels_neighbor != labels
+            edge_groups.append(edge)
+
+    edge_groups = [
+        torch.unsqueeze(c, dim=-1) for c in edge_groups
+    ]  # NxHxWx1x1
+    edge = torch.cat(edge_groups, dim=-1)  # NxHxWx1x8
+
+    return edge
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/losses.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/losses.py
new file mode 100644
index 0000000..58ea104
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/aaf/losses.py
@@ -0,0 +1,192 @@
+import torch
+import torch.nn.functional as F
+from models.protoseg_core.lib.loss.aaf import layers as nnx
+import numpy as np
+
+
+def affinity_loss(labels,
+                  probs,
+                  num_classes,
+                  kld_margin):
+    """Affinity Field (AFF) loss.
+
+  This function computes AFF loss. There are several components in the
+  function:
+  1) extracts edges from the ground-truth labels.
+  2) extracts ignored pixels and their paired pixels (the neighboring
+     pixels on the eight corners).
+  3) extracts neighboring pixels on the eight corners from a 3x3 patch.
+  4) computes KL-Divergence between center pixels and their neighboring
+     pixels from the eight corners.
+
+  Args:
+    labels: A tensor of size [batch_size, height_in, width_in], indicating 
+      semantic segmentation ground-truth labels.
+    probs: A tensor of size [batch_size, height_in, width_in, num_classes],
+      indicating segmentation predictions.
+    num_classes: A number indicating the total number of valid classes.
+    kld_margin: A number indicating the margin for KL-Divergence at edge.
+
+  Returns:
+    Two 1-D tensors value indicating the loss at edge and non-edge.
+  """
+    # Compute ignore map (e.g, label of 255 and their paired pixels).
+
+    labels = torch.squeeze(labels, dim=1)  # NxHxW
+    ignore = nnx.ignores_from_label(labels, num_classes, 1)  # NxHxWx8
+    not_ignore = np.logical_not(ignore)
+    not_ignore = torch.unsqueeze(not_ignore, dim=3)  # NxHxWx1x8
+
+    # Compute edge map.
+    one_hot_lab = F.one_hot(labels, depth=num_classes)
+    edge = nnx.edges_from_label(one_hot_lab, 1, 255)  # NxHxWxCx8
+
+    # Remove ignored pixels from the edge/non-edge.
+    edge = np.logical_and(edge, not_ignore)
+    not_edge = np.logical_and(np.logical_not(edge), not_ignore)
+
+    edge_indices = torch.nonzero(torch.reshape(edge, (-1,)))
+    not_edge_indices = torch.nonzero(torch.reshape(not_edge, (-1,)))
+
+    # Extract eight corner from the center in a patch as paired pixels.
+    probs_paired = nnx.eightcorner_activation(probs, 1)  # NxHxWxCx8
+    probs = torch.unsqueeze(probs, dim=-1)  # NxHxWxCx1
+    bot_epsilon = 1e-4
+    top_epsilon = 1.0
+
+    neg_probs = np.clip(
+        1 - probs, bot_epsilon, top_epsilon)
+    neg_probs_paired = np.clip(
+        1 - probs_paired, bot_epsilon, top_epsilon)
+    probs = np.clip(
+        probs, bot_epsilon, top_epsilon)
+    probs_paired = np.clip(
+        probs_paired, bot_epsilon, top_epsilon)
+
+    # Compute KL-Divergence.
+    kldiv = probs_paired * torch.log(probs_paired / probs)
+    kldiv += neg_probs_paired * torch.log(neg_probs_paired / neg_probs)
+    edge_loss = torch.max(0.0, kld_margin - kldiv)
+    not_edge_loss = kldiv
+
+    not_edge_loss = torch.reshape(not_edge_loss, (-1,))
+    not_edge_loss = torch.gather(not_edge_loss, 0, not_edge_indices)
+    edge_loss = torch.reshape(edge_loss, (-1,))
+    edge_loss = torch.gather(edge_loss, 0, edge_indices)
+
+    return edge_loss, not_edge_loss
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+def adaptive_affinity_loss(labels,
+                           one_hot_lab,
+                           probs,
+                           size,
+                           num_classes,
+                           kld_margin,
+                           w_edge,
+                           w_not_edge,
+                           ignore_index=-1):
+    """Adaptive affinity field (AAF) loss.
+
+  This function computes AAF loss. There are three components in the function:
+  1) extracts edges from the ground-truth labels.
+  2) extracts ignored pixels and their paired pixels (usually the eight corner
+     pixels).
+  3) extracts eight corner pixels/predictions from the center in a
+     (2*size+1)x(2*size+1) patch
+  4) computes KL-Divergence between center pixels and their paired pixels (the 
+     eight corner).
+  5) imposes adaptive weightings on the loss.
+
+  Args:
+    labels: A tensor of size [batch_size, height_in, width_in], indicating 
+      semantic segmentation ground-truth labels.
+    one_hot_lab: A tensor of size [batch_size, num_classes, height_in, width_in]
+      which is the ground-truth labels in the form of one-hot vector.
+    probs: A tensor of size [batch_size, num_classes, height_in, width_in],
+      indicating segmentation predictions.
+    size: A number indicating the half size of a patch.
+    num_classes: A number indicating the total number of valid classes. The 
+    kld_margin: A number indicating the margin for KL-Divergence at edge.
+    w_edge: A number indicating the weighting for KL-Divergence at edge.
+    w_not_edge: A number indicating the weighting for KL-Divergence at non-edge.
+    ignore_index: ignore index
+
+  Returns:
+    Two 1-D tensors value indicating the loss at edge and non-edge.
+  """
+    # Compute ignore map (e.g, label of 255 and their paired pixels).
+    labels = torch.squeeze(labels, dim=1)  # NxHxW
+    ignore = nnx.ignores_from_label(labels, num_classes, size, ignore_index)  # NxHxWx8
+    not_ignore = ~ignore
+    not_ignore = torch.unsqueeze(not_ignore, dim=3)  # NxHxWx1x8
+
+    # Compute edge map.
+    edge = nnx.edges_from_label(one_hot_lab, size, ignore_index)  # NxHxWxCx8
+
+    # Log.info('{} {}'.format(edge.shape, not_ignore.shape))
+
+    # Remove ignored pixels from the edge/non-edge.
+    edge = edge & not_ignore
+
+
+    not_edge = ~edge & not_ignore
+
+    edge_indices = torch.nonzero(torch.reshape(edge, (-1,)))
+    # print(edge_indices.size())
+    if edge_indices.size()[0] == 0:
+        edge_loss = torch.tensor(0.0, requires_grad=False).cuda()
+        not_edge_loss = torch.tensor(0.0, requires_grad=False).cuda()
+        return edge_loss, not_edge_loss
+
+    not_edge_indices = torch.nonzero(torch.reshape(not_edge, (-1,)))
+
+    # Extract eight corner from the center in a patch as paired pixels.
+    probs_paired = nnx.eightcorner_activation(probs, size)  # NxHxWxCx8
+    probs = torch.unsqueeze(probs, dim=-1)  # NxHxWxCx1
+    bot_epsilon = torch.tensor(1e-4, requires_grad=False).cuda()
+    top_epsilon = torch.tensor(1.0, requires_grad=False).cuda()
+
+    neg_probs = torch.where(1 - probs < bot_epsilon, bot_epsilon, 1 - probs)
+    neg_probs = torch.where(neg_probs > top_epsilon, top_epsilon, neg_probs)
+
+    neg_probs_paired = torch.where(1 - probs_paired < bot_epsilon, bot_epsilon, 1 - probs_paired)
+    neg_probs_paired = torch.where(neg_probs_paired > top_epsilon, top_epsilon, neg_probs_paired)
+
+    probs = torch.where(probs < bot_epsilon, bot_epsilon, probs)
+    probs = torch.where(probs > top_epsilon, top_epsilon, probs)
+
+    probs_paired = torch.where(probs_paired < bot_epsilon, bot_epsilon, probs_paired)
+    probs_paired = torch.where(probs_paired > top_epsilon, top_epsilon, probs_paired)
+
+    # neg_probs = np.clip(
+    #     1-probs, bot_epsilon, top_epsilon)
+    # neg_probs_paired = np.clip(
+    #     1-probs_paired, bot_epsilon, top_epsilon)
+    # probs = np.clip(
+    #     probs, bot_epsilon, top_epsilon)
+    # probs_paired = np.clip(
+    #   probs_paired, bot_epsilon, top_epsilon)
+
+    # Compute KL-Divergence.
+    kldiv = probs_paired * torch.log(probs_paired / probs)
+    kldiv += neg_probs_paired * torch.log(neg_probs_paired / neg_probs)
+    edge_loss = torch.max(torch.tensor(0.0, requires_grad=False).cuda(), kld_margin - kldiv)
+    not_edge_loss = kldiv
+
+    # Impose weights on edge/non-edge losses.
+    one_hot_lab = torch.unsqueeze(one_hot_lab, dim=-1)
+
+    w_edge = torch.sum(w_edge * one_hot_lab.float(), dim=3, keepdim=True)  # NxHxWx1x1
+    w_not_edge = torch.sum(w_not_edge * one_hot_lab.float(), dim=3, keepdim=True)  # NxHxWx1x1
+
+    edge_loss *= w_edge.permute(0, 3, 1, 2, 4)
+    not_edge_loss *= w_not_edge.permute(0, 3, 1, 2, 4)
+
+    not_edge_loss = torch.reshape(not_edge_loss, (-1, 1))
+    not_edge_loss = torch.gather(not_edge_loss, 0, not_edge_indices)
+    edge_loss = torch.reshape(edge_loss, (-1, 1))
+    edge_loss = torch.gather(edge_loss, 0, edge_indices)
+
+    return edge_loss, not_edge_loss
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast.py
new file mode 100644
index 0000000..c368f16
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast.py
@@ -0,0 +1,234 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from abc import ABC
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.loss.loss_helper import FSAuxCELoss, FSAuxRMILoss, FSCELoss
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class PixelContrastLoss(nn.Module, ABC):
+    def __init__(self, configer):
+        super(PixelContrastLoss, self).__init__()
+
+        self.configer = configer
+        self.temperature = self.configer.get('contrast', 'temperature')
+        self.base_temperature = self.configer.get('contrast', 'base_temperature')
+
+        self.ignore_label = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            self.ignore_label = self.configer.get('loss', 'params')['ce_ignore_index']
+
+        self.max_samples = self.configer.get('contrast', 'max_samples')
+        self.max_views = self.configer.get('contrast', 'max_views')
+
+    def _hard_anchor_sampling(self, X, y_hat, y):
+        batch_size, feat_dim = X.shape[0], X.shape[-1]
+
+        classes = []
+        total_classes = 0
+        for ii in range(batch_size):
+            this_y = y_hat[ii]
+            this_classes = torch.unique(this_y)
+            this_classes = [x for x in this_classes if x != self.ignore_label]
+            this_classes = [x for x in this_classes if (this_y == x).nonzero().shape[0] > self.max_views]
+
+            classes.append(this_classes)
+            total_classes += len(this_classes)
+
+        if total_classes == 0:
+            return None, None
+
+        n_view = self.max_samples // total_classes
+        n_view = min(n_view, self.max_views)
+
+        X_ = torch.zeros((total_classes, n_view, feat_dim), dtype=torch.float).cuda()
+        y_ = torch.zeros(total_classes, dtype=torch.float).cuda()
+
+        X_ptr = 0
+        for ii in range(batch_size):
+            this_y_hat = y_hat[ii]
+            this_y = y[ii]
+            this_classes = classes[ii]
+
+            for cls_id in this_classes:
+                hard_indices = ((this_y_hat == cls_id) & (this_y != cls_id)).nonzero()
+                easy_indices = ((this_y_hat == cls_id) & (this_y == cls_id)).nonzero()
+
+                num_hard = hard_indices.shape[0]
+                num_easy = easy_indices.shape[0]
+
+                if num_hard >= n_view / 2 and num_easy >= n_view / 2:
+                    num_hard_keep = n_view // 2
+                    num_easy_keep = n_view - num_hard_keep
+                elif num_hard >= n_view / 2:
+                    num_easy_keep = num_easy
+                    num_hard_keep = n_view - num_easy_keep
+                elif num_easy >= n_view / 2:
+                    num_hard_keep = num_hard
+                    num_easy_keep = n_view - num_hard_keep
+                else:
+                    Log.info('this shoud be never touched! {} {} {}'.format(num_hard, num_easy, n_view))
+                    raise Exception
+
+                perm = torch.randperm(num_hard)
+                hard_indices = hard_indices[perm[:num_hard_keep]]
+                perm = torch.randperm(num_easy)
+                easy_indices = easy_indices[perm[:num_easy_keep]]
+                indices = torch.cat((hard_indices, easy_indices), dim=0)
+
+                X_[X_ptr, :, :] = X[ii, indices, :].squeeze(1)
+                y_[X_ptr] = cls_id
+                X_ptr += 1
+
+        return X_, y_
+
+    def _contrastive(self, feats_, labels_):
+        anchor_num, n_view = feats_.shape[0], feats_.shape[1]
+
+        labels_ = labels_.contiguous().view(-1, 1)
+        mask = torch.eq(labels_, torch.transpose(labels_, 0, 1)).float().cuda()
+
+        contrast_count = n_view
+        contrast_feature = torch.cat(torch.unbind(feats_, dim=1), dim=0)
+
+        anchor_feature = contrast_feature
+        anchor_count = contrast_count
+
+        anchor_dot_contrast = torch.div(torch.matmul(anchor_feature, torch.transpose(contrast_feature, 0, 1)),
+                                        self.temperature)
+        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
+        logits = anchor_dot_contrast - logits_max.detach()
+
+        mask = mask.repeat(anchor_count, contrast_count)
+        neg_mask = 1 - mask
+
+        logits_mask = torch.ones_like(mask).scatter_(1,
+                                                     torch.arange(anchor_num * anchor_count).view(-1, 1).cuda(),
+                                                     0)
+        mask = mask * logits_mask
+
+        neg_logits = torch.exp(logits) * neg_mask
+        neg_logits = neg_logits.sum(1, keepdim=True)
+
+        exp_logits = torch.exp(logits)
+
+        log_prob = logits - torch.log(exp_logits + neg_logits)
+
+        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
+
+        loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
+        loss = loss.mean()
+
+        return loss
+
+    def forward(self, feats, labels=None, predict=None):
+        labels = labels.unsqueeze(1).float().clone()
+        labels = torch.nn.functional.interpolate(labels,
+                                                 (feats.shape[2], feats.shape[3]), mode='nearest')
+        labels = labels.squeeze(1).long()
+        assert labels.shape[-1] == feats.shape[-1], '{} {}'.format(labels.shape, feats.shape)
+
+        batch_size = feats.shape[0]
+
+        labels = labels.contiguous().view(batch_size, -1)
+        predict = predict.contiguous().view(batch_size, -1)
+        feats = feats.permute(0, 2, 3, 1)
+        feats = feats.contiguous().view(feats.shape[0], -1, feats.shape[-1])
+
+        feats_, labels_ = self._hard_anchor_sampling(feats, labels, predict)
+
+        loss = self._contrastive(feats_, labels_)
+        return loss
+
+
+class ContrastCELoss(nn.Module, ABC):
+    def __init__(self, configer=None):
+        super(ContrastCELoss, self).__init__()
+
+        self.configer = configer
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.loss_weight = self.configer.get('contrast', 'loss_weight')
+        self.use_rmi = self.configer.get('contrast', 'use_rmi')
+
+        if self.use_rmi:
+            self.seg_criterion = FSAuxRMILoss(configer=configer)
+        else:
+            self.seg_criterion = FSCELoss(configer=configer)
+
+        self.contrast_criterion = PixelContrastLoss(configer=configer)
+
+    def forward(self, preds, target, with_embed=False):
+        h, w = target.size(1), target.size(2)
+
+        assert "seg" in preds
+        assert "embed" in preds
+
+        seg = preds['seg']
+        embedding = preds['embed']
+
+        pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+        loss = self.seg_criterion(pred, target)
+
+        _, predict = torch.max(seg, 1)
+        loss_contrast = self.contrast_criterion(embedding, target, predict)
+
+        if with_embed is True:
+            return loss + self.loss_weight * loss_contrast
+
+        return loss + 0 * loss_contrast  # just a trick to avoid errors in distributed training
+
+
+class ContrastAuxCELoss(nn.Module, ABC):
+    def __init__(self, configer=None):
+        super(ContrastAuxCELoss, self).__init__()
+
+        self.configer = configer
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.loss_weight = self.configer.get('contrast', 'loss_weight')
+        self.use_rmi = self.configer.get('contrast', 'use_rmi')
+
+        if self.use_rmi:
+            self.seg_criterion = FSAuxRMILoss(configer=configer)
+        else:
+            self.seg_criterion = FSAuxCELoss(configer=configer)
+
+        self.contrast_criterion = PixelContrastLoss(configer=configer)
+
+    def forward(self, preds, target, with_embed=False):
+        h, w = target.size(1), target.size(2)
+
+        assert "seg" in preds
+        assert "seg_aux" in preds
+        assert "embed" in preds
+
+        seg = preds['seg']
+        seg_aux = preds['seg_aux']
+        embedding = preds['embed']
+
+        pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+        pred_aux = F.interpolate(input=seg_aux, size=(h, w), mode='bilinear', align_corners=True)
+        loss = self.seg_criterion([pred_aux, pred], target)
+
+        _, predict = torch.max(seg, 1)
+        loss_contrast = self.contrast_criterion(embedding, target, predict)
+
+        if with_embed is True:
+            return loss + self.loss_weight * loss_contrast
+
+        return loss + 0 * loss_contrast  # just a trick to avoid errors in distributed training
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast_mem.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast_mem.py
new file mode 100644
index 0000000..ff91402
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_contrast_mem.py
@@ -0,0 +1,276 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from abc import ABC
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.loss.loss_helper import FSAuxCELoss, FSRMILoss, FSCELoss, FSCELOVASZLoss
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class PixelContrastLoss(nn.Module, ABC):
+    def __init__(self, configer):
+        super(PixelContrastLoss, self).__init__()
+
+        self.configer = configer
+        self.temperature = self.configer.get('contrast', 'temperature')
+        self.base_temperature = self.configer.get('contrast', 'base_temperature')
+
+        self.ignore_label = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            self.ignore_label = self.configer.get('loss', 'params')['ce_ignore_index']
+
+        self.max_samples = self.configer.get('contrast', 'max_samples')
+        self.max_views = self.configer.get('contrast', 'max_views')
+
+    def _hard_anchor_sampling(self, X, y_hat, y):
+        batch_size, feat_dim = X.shape[0], X.shape[-1]
+
+        classes = []
+        total_classes = 0
+        for ii in range(batch_size):
+            this_y = y_hat[ii]
+            this_classes = torch.unique(this_y)
+            this_classes = [x for x in this_classes if x != self.ignore_label]
+            this_classes = [x for x in this_classes if (this_y == x).nonzero().shape[0] > self.max_views]
+
+            classes.append(this_classes)
+            total_classes += len(this_classes)
+
+        if total_classes == 0:
+            return None, None
+
+        n_view = self.max_samples // total_classes
+        n_view = min(n_view, self.max_views)
+
+        X_ = torch.zeros((total_classes, n_view, feat_dim), dtype=torch.float).cuda()
+        y_ = torch.zeros(total_classes, dtype=torch.float).cuda()
+
+        X_ptr = 0
+        for ii in range(batch_size):
+            this_y_hat = y_hat[ii]
+            this_y = y[ii]
+            this_classes = classes[ii]
+
+            for cls_id in this_classes:
+                hard_indices = ((this_y_hat == cls_id) & (this_y != cls_id)).nonzero()
+                easy_indices = ((this_y_hat == cls_id) & (this_y == cls_id)).nonzero()
+
+                num_hard = hard_indices.shape[0]
+                num_easy = easy_indices.shape[0]
+
+                if num_hard >= n_view / 2 and num_easy >= n_view / 2:
+                    num_hard_keep = n_view // 2
+                    num_easy_keep = n_view - num_hard_keep
+                elif num_hard >= n_view / 2:
+                    num_easy_keep = num_easy
+                    num_hard_keep = n_view - num_easy_keep
+                elif num_easy >= n_view / 2:
+                    num_hard_keep = num_hard
+                    num_easy_keep = n_view - num_hard_keep
+                else:
+                    Log.info('this shoud be never touched! {} {} {}'.format(num_hard, num_easy, n_view))
+                    raise Exception
+
+                perm = torch.randperm(num_hard)
+                hard_indices = hard_indices[perm[:num_hard_keep]]
+                perm = torch.randperm(num_easy)
+                easy_indices = easy_indices[perm[:num_easy_keep]]
+                indices = torch.cat((hard_indices, easy_indices), dim=0)
+
+                X_[X_ptr, :, :] = X[ii, indices, :].squeeze(1)
+                y_[X_ptr] = cls_id
+                X_ptr += 1
+
+        return X_, y_
+
+    def _sample_negative(self, Q):
+        class_num, cache_size, feat_size = Q.shape
+
+        X_ = torch.zeros((class_num * cache_size, feat_size)).float().cuda()
+        y_ = torch.zeros((class_num * cache_size, 1)).float().cuda()
+        sample_ptr = 0
+        for ii in range(class_num):
+            if ii == 0: continue
+            this_q = Q[ii, :cache_size, :]
+
+            X_[sample_ptr:sample_ptr + cache_size, ...] = this_q
+            y_[sample_ptr:sample_ptr + cache_size, ...] = ii
+            sample_ptr += cache_size
+
+        return X_, y_
+
+    def _contrastive(self, X_anchor, y_anchor, queue=None):
+        anchor_num, n_view = X_anchor.shape[0], X_anchor.shape[1]
+
+        y_anchor = y_anchor.contiguous().view(-1, 1)
+        anchor_count = n_view
+        anchor_feature = torch.cat(torch.unbind(X_anchor, dim=1), dim=0)
+
+        if queue is not None:
+            X_contrast, y_contrast = self._sample_negative(queue)
+            y_contrast = y_contrast.contiguous().view(-1, 1)
+            contrast_count = 1
+            contrast_feature = X_contrast
+        else:
+            y_contrast = y_anchor
+            contrast_count = n_view
+            contrast_feature = torch.cat(torch.unbind(X_anchor, dim=1), dim=0)
+
+        mask = torch.eq(y_anchor, y_contrast.T).float().cuda()
+
+        anchor_dot_contrast = torch.div(torch.matmul(anchor_feature, contrast_feature.T),
+                                        self.temperature)
+        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
+        logits = anchor_dot_contrast - logits_max.detach()
+
+        mask = mask.repeat(anchor_count, contrast_count)
+        neg_mask = 1 - mask
+
+        logits_mask = torch.ones_like(mask).scatter_(1,
+                                                     torch.arange(anchor_num * anchor_count).view(-1, 1).cuda(),
+                                                     0)
+
+        mask = mask * logits_mask
+
+        neg_logits = torch.exp(logits) * neg_mask
+        neg_logits = neg_logits.sum(1, keepdim=True)
+
+        exp_logits = torch.exp(logits)
+
+        log_prob = logits - torch.log(exp_logits + neg_logits)
+
+        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
+
+        loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
+        loss = loss.mean()
+
+        return loss
+
+    def forward(self, feats, labels=None, predict=None, queue=None):
+        labels = labels.unsqueeze(1).float().clone()
+        labels = torch.nn.functional.interpolate(labels,
+                                                 (feats.shape[2], feats.shape[3]), mode='nearest')
+        labels = labels.squeeze(1).long()
+        assert labels.shape[-1] == feats.shape[-1], '{} {}'.format(labels.shape, feats.shape)
+
+        batch_size = feats.shape[0]
+
+        labels = labels.contiguous().view(batch_size, -1)
+        predict = predict.contiguous().view(batch_size, -1)
+        feats = feats.permute(0, 2, 3, 1)
+        feats = feats.contiguous().view(feats.shape[0], -1, feats.shape[-1])
+
+        feats_, labels_ = self._hard_anchor_sampling(feats, labels, predict)
+
+        loss = self._contrastive(feats_, labels_, queue=queue)
+        return loss
+
+
+class ContrastCELoss(nn.Module, ABC):
+    def __init__(self, configer=None):
+        super(ContrastCELoss, self).__init__()
+
+        self.configer = configer
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.loss_weight = self.configer.get('contrast', 'loss_weight')
+        self.use_rmi = self.configer.get('contrast', 'use_rmi')
+        self.use_lovasz = self.configer.get('contrast', 'use_lovasz')
+
+        if self.use_rmi:
+            self.seg_criterion = FSRMILoss(configer=configer)
+        elif self.use_lovasz:
+            self.seg_criterion = FSCELOVASZLoss(configer=configer)
+        else:
+            self.seg_criterion = FSCELoss(configer=configer)
+
+        self.contrast_criterion = PixelContrastLoss(configer=configer)
+
+    def forward(self, preds, target, with_embed=False):
+        h, w = target.size(1), target.size(2)
+
+        assert "seg" in preds
+        assert "embed" in preds
+
+        seg = preds['seg']
+        embedding = preds['embed']
+
+        if "segment_queue" in preds:
+            segment_queue = preds['segment_queue']
+        else:
+            segment_queue = None
+
+        if "pixel_queue" in preds:
+            pixel_queue = preds['pixel_queue']
+        else:
+            pixel_queue = None
+
+        pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+        loss = self.seg_criterion(pred, target)
+
+        if segment_queue is not None and pixel_queue is not None:
+            queue = torch.cat((segment_queue, pixel_queue), dim=1)
+
+            _, predict = torch.max(seg, 1)
+            loss_contrast = self.contrast_criterion(embedding, target, predict, queue)
+        else:
+            loss_contrast = 0
+
+        if with_embed is True:
+            return loss + self.loss_weight * loss_contrast
+
+        return loss + 0 * loss_contrast  # just a trick to avoid errors in distributed training
+
+
+class ContrastAuxCELoss(nn.Module, ABC):
+    def __init__(self, configer=None):
+        super(ContrastAuxCELoss, self).__init__()
+
+        self.configer = configer
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.loss_weight = self.configer.get('contrast', 'loss_weight')
+        self.use_rmi = self.configer.get('contrast', 'use_rmi')
+
+        if self.use_rmi:
+            self.seg_criterion = FSAuxRMILoss(configer=configer)
+        else:
+            self.seg_criterion = FSAuxCELoss(configer=configer)
+
+        self.contrast_criterion = PixelContrastLoss(configer=configer)
+
+    def forward(self, preds, target):
+        h, w = target.size(1), target.size(2)
+
+        assert "seg" in preds
+        assert "seg_aux" in preds
+
+        seg = preds['seg']
+        seg_aux = preds['seg_aux']
+
+        embedding = preds['embedding'] if 'embedding' in preds else None
+
+        pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+        pred_aux = F.interpolate(input=seg_aux, size=(h, w), mode='bilinear', align_corners=True)
+        loss = self.seg_criterion([pred_aux, pred], target)
+
+        if embedding is not None:
+            _, predict = torch.max(seg, 1)
+
+            loss_contrast = self.contrast_criterion(embedding, target, predict)
+            return loss + self.loss_weight * loss_contrast
+
+        return loss
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_helper.py
new file mode 100644
index 0000000..cb00356
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_helper.py
@@ -0,0 +1,431 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Donny You, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pdb
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+from torch.autograd import Variable
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.loss.rmi_loss import RMILoss
+
+from models.protoseg_core.lib.loss.lovasz_loss import lovasz_softmax_flat, flatten_probas
+
+
+class FSCERMILoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSCERMILoss, self).__init__()
+        self.configer = configer
+        weight = None
+        if self.configer.exists('loss', 'params') and 'ce_weight' in self.configer.get('loss', 'params'):
+            weight = self.configer.get('loss', 'params')['ce_weight']
+            weight = torch.FloatTensor(weight).cuda()
+
+        reduction = 'elementwise_mean'
+        if self.configer.exists('loss', 'params') and 'ce_reduction' in self.configer.get('loss', 'params'):
+            reduction = self.configer.get('loss', 'params')['ce_reduction']
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.ignore_index = ignore_index
+        self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction)
+        self.rmi_loss = RMILoss(self.configer)
+
+    def forward(self, inputs, *targets, weights=None, **kwargs):
+        if isinstance(inputs, dict) and 'seg' in inputs:
+            inputs = inputs['seg']
+        loss = 0.0
+        if isinstance(inputs, tuple) or isinstance(inputs, list):
+            if weights is None:
+                weights = [1.0] * len(inputs)
+
+            for i in range(len(inputs)):
+                if len(targets) > 1:
+                    target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+                else:
+                    target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+
+        else:
+            target = self._scale_target(targets[0], (inputs.size(2), inputs.size(3)))
+            loss = self.ce_loss(inputs, target)
+
+            loss_rmi = self.rmi_loss(inputs, target)
+
+            loss = loss + loss_rmi
+
+        return loss
+
+
+class FSCELOVASZLoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSCELOVASZLoss, self).__init__()
+        self.configer = configer
+        weight = None
+        if self.configer.exists('loss', 'params') and 'ce_weight' in self.configer.get('loss', 'params'):
+            weight = self.configer.get('loss', 'params')['ce_weight']
+            weight = torch.FloatTensor(weight).cuda()
+
+        reduction = 'elementwise_mean'
+        if self.configer.exists('loss', 'params') and 'ce_reduction' in self.configer.get('loss', 'params'):
+            reduction = self.configer.get('loss', 'params')['ce_reduction']
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.ignore_index = ignore_index
+        self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction)
+
+    def forward(self, inputs, *targets, weights=None, **kwargs):
+        if isinstance(inputs, dict) and 'seg' in inputs:
+            inputs = inputs['seg']
+        loss = 0.0
+        if isinstance(inputs, tuple) or isinstance(inputs, list):
+            if weights is None:
+                weights = [1.0] * len(inputs)
+
+            for i in range(len(inputs)):
+                if len(targets) > 1:
+                    target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+                else:
+                    target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+
+        else:
+            target = self._scale_target(targets[0], (inputs.size(2), inputs.size(3)))
+            loss = self.ce_loss(inputs, target)
+
+            pred = F.softmax(input=inputs, dim=1)
+            loss_lovasz = lovasz_softmax_flat(*flatten_probas(pred, target, self.ignore_index),
+                                              only_present=True)
+
+            loss = loss + loss_lovasz
+
+        return loss
+
+    @staticmethod
+    def _scale_target(targets_, scaled_size):
+        targets = targets_.clone().unsqueeze(1).float()
+        targets = F.interpolate(targets, size=scaled_size, mode='nearest')
+        return targets.squeeze(1).long()
+
+
+class WeightedFSOhemCELoss(nn.Module):
+    def __init__(self, configer):
+        super().__init__()
+        self.configer = configer
+        self.thresh = self.configer.get('loss', 'params')['ohem_thresh']
+        self.reduction = 'elementwise_mean'
+        if self.configer.exists('loss', 'params') and 'ce_reduction' in self.configer.get('loss', 'params'):
+            self.reduction = self.configer.get('loss', 'params')['ce_reduction']
+
+    def forward(self, predict, target, min_kept=1, weight=None, ignore_index=-1, **kwargs):
+        """
+            Args:
+                predict:(n, c, h, w)
+                target:(n, h, w)
+        """
+        prob_out = F.softmax(predict, dim=1)
+        tmp_target = target.clone()
+        tmp_target[tmp_target == ignore_index] = 0
+        prob = prob_out.gather(1, tmp_target.unsqueeze(1))
+        mask = target.contiguous().view(-1, ) != ignore_index
+        sort_prob, sort_indices = prob.contiguous().view(-1, )[mask].contiguous().sort()
+        min_threshold = sort_prob[min(min_kept, sort_prob.numel() - 1)]
+        threshold = max(min_threshold, self.thresh)
+        loss_matrix = F.cross_entropy(predict, target, weight=weight, ignore_index=ignore_index,
+                                      reduction='none').contiguous().view(-1, )
+        sort_loss_matrix = loss_matrix[mask][sort_indices]
+        select_loss_matrix = sort_loss_matrix[sort_prob < threshold]
+        if self.reduction == 'sum':
+            return select_loss_matrix.sum()
+        elif self.reduction == 'elementwise_mean':
+            return select_loss_matrix.mean()
+        else:
+            raise NotImplementedError('Reduction Error!')
+
+
+# Cross-entropy Loss
+class FSCELoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSCELoss, self).__init__()
+        self.configer = configer
+        weight = None
+        if self.configer.exists('loss', 'params') and 'ce_weight' in self.configer.get('loss', 'params'):
+            weight = self.configer.get('loss', 'params')['ce_weight']
+            weight = torch.FloatTensor(weight).cuda()
+
+        reduction = 'elementwise_mean'
+        if self.configer.exists('loss', 'params') and 'ce_reduction' in self.configer.get('loss', 'params'):
+            reduction = self.configer.get('loss', 'params')['ce_reduction']
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+
+        self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction=reduction)
+
+    def forward(self, inputs, *targets, weights=None, **kwargs):
+        loss = 0.0
+        if isinstance(inputs, tuple) or isinstance(inputs, list):
+            if weights is None:
+                weights = [1.0] * len(inputs)
+
+            for i in range(len(inputs)):
+                if len(targets) > 1:
+                    target = self._scale_target(targets[i], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+                else:
+                    target = self._scale_target(targets[0], (inputs[i].size(2), inputs[i].size(3)))
+                    loss += weights[i] * self.ce_loss(inputs[i], target)
+
+        else:
+            target = self._scale_target(targets[0], (inputs.size(2), inputs.size(3)))
+            loss = self.ce_loss(inputs, target)
+
+        return loss
+
+    @staticmethod
+    def _scale_target(targets_, scaled_size):
+        targets = targets_.clone().unsqueeze(1).float()
+        targets = F.interpolate(targets, size=scaled_size, mode='nearest')
+        return targets.squeeze(1).long()
+
+
+class FSOhemCELoss(nn.Module):
+    def __init__(self, configer):
+        super(FSOhemCELoss, self).__init__()
+        self.configer = configer
+        self.thresh = self.configer.get('loss', 'params')['ohem_thresh']
+        self.min_kept = max(1, self.configer.get('loss', 'params')['ohem_minkeep'])
+        weight = None
+        if self.configer.exists('loss', 'params') and 'ce_weight' in self.configer.get('loss', 'params'):
+            weight = self.configer.get('loss', 'params')['ce_weight']
+            weight = torch.FloatTensor(weight).cuda()
+
+        self.reduction = 'elementwise_mean'
+        if self.configer.exists('loss', 'params') and 'ce_reduction' in self.configer.get('loss', 'params'):
+            self.reduction = self.configer.get('loss', 'params')['ce_reduction']
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+
+        self.ignore_label = ignore_index
+        self.ce_loss = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index, reduction='none')
+
+    def forward(self, predict, target, **kwargs):
+        """
+            Args:
+                predict:(n, c, h, w)
+                target:(n, h, w)
+                weight (Tensor, optional): a manual rescaling weight given to each class.
+                                           If given, has to be a Tensor of size "nclasses"
+        """
+        prob_out = F.softmax(predict, dim=1)
+        tmp_target = target.clone()
+        tmp_target[tmp_target == self.ignore_label] = 0
+        prob = prob_out.gather(1, tmp_target.unsqueeze(1))
+        mask = target.contiguous().view(-1, ) != self.ignore_label
+        sort_prob, sort_indices = prob.contiguous().view(-1, )[mask].contiguous().sort()
+        min_threshold = sort_prob[min(self.min_kept, sort_prob.numel() - 1)]
+        threshold = max(min_threshold, self.thresh)
+        loss_matirx = self.ce_loss(predict, target).contiguous().view(-1, )
+        sort_loss_matirx = loss_matirx[mask][sort_indices]
+        select_loss_matrix = sort_loss_matirx[sort_prob < threshold]
+        if self.reduction == 'sum':
+            return select_loss_matrix.sum()
+        elif self.reduction == 'elementwise_mean':
+            return select_loss_matrix.mean()
+        else:
+            raise NotImplementedError('Reduction Error!')
+
+
+class FSAuxOhemCELoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSAuxOhemCELoss, self).__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+        if self.configer.get('loss', 'loss_type') == 'fs_auxohemce_loss':
+            self.ohem_ce_loss = FSOhemCELoss(self.configer)
+        else:
+            assert self.configer.get('loss', 'loss_type') == 'fs_auxslowohemce_loss'
+            self.ohem_ce_loss = FSSlowOhemCELoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        aux_out, seg_out = inputs
+        seg_loss = self.ohem_ce_loss(seg_out, targets)
+        aux_loss = self.ce_loss(aux_out, targets)
+        loss = self.configer.get('network', 'loss_weights')['seg_loss'] * seg_loss
+        loss = loss + self.configer.get('network', 'loss_weights')['aux_loss'] * aux_loss
+        return loss
+
+
+class FSAuxCELossDSN(nn.Module):
+    def __init__(self, configer=None):
+        super(FSAuxCELossDSN, self).__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        aux1, aux2, aux3, seg_out = inputs
+        seg_loss = self.ce_loss(seg_out, targets)
+        aux1_loss = self.ce_loss(aux1, targets)
+        aux2_loss = self.ce_loss(aux2, targets)
+        aux3_loss = self.ce_loss(aux3, targets)
+        loss = self.configer.get('network', 'loss_weights')['seg_loss'] * seg_loss
+        loss = loss + self.configer.get('network', 'loss_weights')['aux_loss'] * (aux1_loss + aux2_loss + aux3_loss) / 3
+        return loss
+
+
+class FSAuxCELoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSAuxCELoss, self).__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        aux_out, seg_out = inputs
+        seg_loss = self.ce_loss(seg_out, targets)
+        aux_loss = self.ce_loss(aux_out, targets)
+        loss = self.configer.get('network', 'loss_weights')['seg_loss'] * seg_loss
+        loss = loss + self.configer.get('network', 'loss_weights')['aux_loss'] * aux_loss
+        return loss
+
+
+class FSAuxRMILoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSAuxRMILoss, self).__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+        self.rmi_loss = RMILoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        aux_out, seg_out = inputs
+        aux_loss = self.ce_loss(aux_out, targets)
+        seg_loss = self.rmi_loss(seg_out, targets)
+        loss = self.configer.get('network', 'loss_weights')['seg_loss'] * seg_loss
+        loss = loss + self.configer.get('network', 'loss_weights')['aux_loss'] * aux_loss
+        return loss
+
+
+class MSFSAuxRMILoss(nn.Module):
+    def __init__(self, configer=None):
+        super(MSFSAuxRMILoss, self).__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+        self.rmi_loss = RMILoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        aux_out = inputs['aux']
+        seg_out = inputs['pred']
+        pred_05x = inputs['pred_05x']
+        pred_10x = inputs['pred_10x']
+
+        aux_loss = self.ce_loss(aux_out, targets)
+        seg_loss = self.rmi_loss(seg_out, targets)
+        loss = self.configer.get('network', 'loss_weights')['seg_loss'] * seg_loss
+        loss = loss + self.configer.get('network', 'loss_weights')['aux_loss'] * aux_loss
+
+        scaled_pred_05x = torch.nn.functional.interpolate(pred_05x, size=(seg_out.size(2), seg_out.size(3)),
+                                                          mode='bilinear', align_corners=False)
+        loss_lo = self.ce_loss(scaled_pred_05x, targets)
+        loss_hi = self.ce_loss(pred_10x, targets)
+        loss += 0.05 * loss_lo
+        loss += 0.05 * loss_hi
+
+        return loss
+
+
+class FSRMILoss(nn.Module):
+    def __init__(self, configer=None):
+        super(FSRMILoss, self).__init__()
+        self.configer = configer
+        self.rmi_loss = RMILoss(self.configer)
+
+    def forward(self, inputs, targets, **kwargs):
+        seg_out = inputs
+        loss = self.rmi_loss(seg_out, targets)
+        return loss
+
+
+class SegFixLoss(nn.Module):
+    """
+    We predict a binary mask to categorize the boundary pixels as class 1 and otherwise as class 0
+    Based on the pixels predicted as 1 within the binary mask, we further predict the direction for these
+    pixels.
+    """
+
+    def __init__(self, configer=None):
+        super().__init__()
+        self.configer = configer
+        self.ce_loss = FSCELoss(self.configer)
+
+    def calc_weights(self, label_map, num_classes):
+
+        weights = []
+        for i in range(num_classes):
+            weights.append((label_map == i).sum().data)
+        weights = torch.FloatTensor(weights)
+        weights_sum = weights.sum()
+        return (1 - weights / weights_sum).cuda()
+
+    def forward(self, inputs, targets, **kwargs):
+
+        from lib.utils.helpers.offset_helper import DTOffsetHelper
+
+        pred_mask, pred_direction = inputs
+
+        seg_label_map, distance_map, angle_map = targets[0], targets[1], targets[2]
+        gt_mask = DTOffsetHelper.distance_to_mask_label(distance_map, seg_label_map, return_tensor=True)
+
+        gt_size = gt_mask.shape[1:]
+        mask_weights = self.calc_weights(gt_mask, 2)
+
+        pred_direction = F.interpolate(pred_direction, size=gt_size, mode="bilinear", align_corners=True)
+        pred_mask = F.interpolate(pred_mask, size=gt_size, mode="bilinear", align_corners=True)
+        mask_loss = F.cross_entropy(pred_mask, gt_mask, weight=mask_weights, ignore_index=-1)
+
+        mask_threshold = float(os.environ.get('mask_threshold', 0.5))
+        binary_pred_mask = torch.softmax(pred_mask, dim=1)[:, 1, :, :] > mask_threshold
+
+        gt_direction = DTOffsetHelper.angle_to_direction_label(
+            angle_map,
+            seg_label_map=seg_label_map,
+            extra_ignore_mask=(binary_pred_mask == 0),
+            return_tensor=True
+        )
+
+        direction_loss_mask = gt_direction != -1
+        direction_weights = self.calc_weights(gt_direction[direction_loss_mask], pred_direction.size(1))
+        direction_loss = F.cross_entropy(pred_direction, gt_direction, weight=direction_weights, ignore_index=-1)
+
+        if self.training \
+                and self.configer.get('iters') % self.configer.get('solver', 'display_iter') == 0 \
+                and torch.cuda.current_device() == 0:
+            Log.info('mask loss: {} direction loss: {}.'.format(mask_loss, direction_loss))
+
+        mask_weight = float(os.environ.get('mask_weight', 1))
+        direction_weight = float(os.environ.get('direction_weight', 1))
+
+        return mask_weight * mask_loss + direction_weight * direction_loss
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_manager.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_manager.py
new file mode 100644
index 0000000..c5d4f33
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_manager.py
@@ -0,0 +1,72 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: DonnyYou, RainbowSecret, JingyiXie, JianyuanGuo
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from models.protoseg_core.lib.loss.loss_helper import FSAuxOhemCELoss, FSOhemCELoss, FSRMILoss
+from models.protoseg_core.lib.loss.loss_helper import FSCELoss, FSAuxCELoss, FSAuxRMILoss, FSCELOVASZLoss, MSFSAuxRMILoss, FSAuxCELossDSN
+from models.protoseg_core.lib.loss.loss_helper import SegFixLoss
+from models.protoseg_core.lib.loss.rmi_loss import RMILoss
+from models.protoseg_core.lib.loss.loss_contrast import ContrastAuxCELoss, ContrastCELoss
+from models.protoseg_core.lib.loss.loss_contrast_mem import ContrastCELoss as MemContrastCELoss
+from models.protoseg_core.lib.loss.loss_proto import PixelPrototypeCELoss
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.utils.distributed import is_distributed
+
+
+SEG_LOSS_DICT = {
+    'fs_ce_loss': FSCELoss,
+    'fs_ohemce_loss': FSOhemCELoss,
+    'fs_auxce_loss': FSAuxCELoss,
+    'fs_aux_rmi_loss': FSAuxRMILoss,
+    'fs_auxohemce_loss': FSAuxOhemCELoss,
+    'segfix_loss': SegFixLoss,
+    'rmi_loss': RMILoss,
+    'fs_rmi_loss': FSRMILoss,
+    'contrast_auxce_loss': ContrastAuxCELoss,
+    'contrast_ce_loss': ContrastCELoss,
+    'fs_ce_lovasz_loss': FSCELOVASZLoss,
+    'ms_fs_aux_rmi_loss': MSFSAuxRMILoss,
+    'fs_auxce_dsn_loss': FSAuxCELossDSN,
+    'mem_contrast_ce_loss': MemContrastCELoss,
+    'pixel_prototype_ce_loss': PixelPrototypeCELoss
+}
+
+
+class LossManager(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def _parallel(self, loss):
+        if is_distributed():
+            Log.info('use distributed loss')
+            return loss
+            
+        if self.configer.get('network', 'loss_balance') and len(self.configer.get('gpu')) > 1:
+            Log.info('use DataParallelCriterion loss')
+            from lib.extensions.parallel.data_parallel import DataParallelCriterion
+            loss = DataParallelCriterion(loss)
+
+        return loss
+
+    def get_seg_loss(self, loss_type=None):
+        key = self.configer.get('loss', 'loss_type') if loss_type is None else loss_type
+        if key not in SEG_LOSS_DICT:
+            Log.error('Loss: {} not valid!'.format(key))
+            exit(1)
+        Log.info('use loss: {}.'.format(key))
+        loss = SEG_LOSS_DICT[key](self.configer)
+        return self._parallel(loss)
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_proto.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_proto.py
new file mode 100644
index 0000000..3f9184d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/loss_proto.py
@@ -0,0 +1,101 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from abc import ABC
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.loss.loss_helper import FSAuxCELoss, FSAuxRMILoss, FSCELoss
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class PPC(nn.Module, ABC):
+    def __init__(self, configer):
+        super(PPC, self).__init__()
+
+        self.configer = configer
+
+        self.ignore_label = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            self.ignore_label = self.configer.get('loss', 'params')['ce_ignore_index']
+
+    def forward(self, contrast_logits, contrast_target):
+        loss_ppc = F.cross_entropy(contrast_logits, contrast_target.long(), ignore_index=self.ignore_label)
+
+        return loss_ppc
+
+
+class PPD(nn.Module, ABC):
+    def __init__(self, configer):
+        super(PPD, self).__init__()
+
+        self.configer = configer
+
+        self.ignore_label = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            self.ignore_label = self.configer.get('loss', 'params')['ce_ignore_index']
+
+    def forward(self, contrast_logits, contrast_target):
+        contrast_logits = contrast_logits[contrast_target != self.ignore_label, :]
+        contrast_target = contrast_target[contrast_target != self.ignore_label]
+
+        logits = torch.gather(contrast_logits, 1, contrast_target[:, None].long())
+        loss_ppd = (1 - logits).pow(2).mean()
+
+        return loss_ppd
+
+
+class PixelPrototypeCELoss(nn.Module, ABC):
+    def __init__(self, configer=None):
+        super(PixelPrototypeCELoss, self).__init__()
+
+        self.configer = configer
+
+        ignore_index = -1
+        if self.configer.exists('loss', 'params') and 'ce_ignore_index' in self.configer.get('loss', 'params'):
+            ignore_index = self.configer.get('loss', 'params')['ce_ignore_index']
+        Log.info('ignore_index: {}'.format(ignore_index))
+
+        self.loss_ppc_weight = self.configer.get('protoseg', 'loss_ppc_weight')
+        self.loss_ppd_weight = self.configer.get('protoseg', 'loss_ppd_weight')
+
+        self.use_rmi = self.configer.get('protoseg', 'use_rmi')
+
+        if self.use_rmi:
+            self.seg_criterion = FSAuxRMILoss(configer=configer)
+        else:
+            self.seg_criterion = FSCELoss(configer=configer)
+
+        self.ppc_criterion = PPC(configer=configer)
+        self.ppd_criterion = PPD(configer=configer)
+
+    def forward(self, preds, target):
+        h, w = target.size(1), target.size(2)
+
+        if isinstance(preds, dict):
+            assert "seg" in preds
+            assert "logits" in preds
+            assert "target" in preds
+
+            seg = preds['seg']
+            contrast_logits = preds['logits']
+            contrast_target = preds['target']
+            loss_ppc = self.ppc_criterion(contrast_logits, contrast_target)
+            loss_ppd = self.ppd_criterion(contrast_logits, contrast_target)
+
+            pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+            loss = self.seg_criterion(pred, target)
+            return loss + self.loss_ppc_weight * loss_ppc + self.loss_ppd_weight * loss_ppd
+
+        seg = preds
+        pred = F.interpolate(input=seg, size=(h, w), mode='bilinear', align_corners=True)
+        loss = self.seg_criterion(pred, target)
+        return loss
+
+
+
+
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/lovasz_loss.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/lovasz_loss.py
new file mode 100644
index 0000000..4e22089
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/lovasz_loss.py
@@ -0,0 +1,432 @@
+from itertools import filterfalse as ifilterfalse
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from torch.nn import BCELoss
+from models.protoseg_core.lib.loss.aaf import losses as lossx
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+# weights
+# ATR training
+# [0.85978634, 1.19630769, 1.02639146, 1.30664970, 0.97220603, 1.04885815,
+#  1.01745278, 1.01481690, 1.27155077, 1.12947663, 1.13016390, 1.06514227,
+#  1.08384483, 1.08506841, 1.09560942, 1.09565198, 1.07504567, 1.20411509]
+
+# CCF
+# [0.82073458, 1.23651165, 1.0366326,  0.97076566, 1.2802332,  0.98860602,
+#  1.29035071, 1.03882453, 0.96725283, 1.05142434, 1.0075884,  0.98630539,
+#  1.06208869, 1.0160915,  1.1613597,  1.17624919, 1.1701143,  1.24720215]
+
+# PPSS
+# [0.89680465, 1.14352656, 1.20982646, 0.99269248,
+#  1.17911144, 1.00641032, 1.47017195, 1.16447113]
+
+# Pascal
+# [0.82877791, 0.95688253, 0.94921949, 1.00538108, 1.0201687,  1.01665831, 1.05470914]
+
+# Lip
+# [0.7602572,  0.94236198, 0.85644457, 1.04346266, 1.10627293, 0.80980162,
+#  0.95168713, 0.8403769,  1.05798412, 0.85746254, 1.01274366, 1.05854692,
+#  1.03430773, 0.84867818, 0.88027721, 0.87580925, 0.98747462, 0.9876475,
+#  1.00016535, 1.00108882]
+
+class ABRLovaszLoss(nn.Module):
+    """Lovasz loss for Alpha process"""
+
+    def __init__(self, ignore_index=None, only_present=True):
+        super(ABRLovaszLoss, self).__init__()
+        self.ignore_index = ignore_index
+        self.only_present = only_present
+        # self.weight = torch.FloatTensor([0.80777327, 1.00125961, 0.90997236, 1.10867908, 1.17541499,
+        #                                  0.86041422, 1.01116758, 0.89290045, 1.12410812, 0.91105395,
+        #                                  1.07604013, 1.12470610, 1.09895196, 0.90172057, 0.93529453,
+        #                                  0.93054733, 1.04919178, 1.04937547, 1.06267568, 1.06365688])
+        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
+
+    def forward(self, preds, targets):
+        h, w = targets[0].size(1), targets[0].size(2)
+        # seg loss
+        pred = F.interpolate(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
+        pred = F.softmax(input=pred, dim=1)
+        loss = lovasz_softmax_flat(*flatten_probas(pred, targets[0], self.ignore_index), only_present=self.only_present)
+
+        # dsn loss
+        pred_dsn = F.interpolate(input=preds[-1], size=(h, w), mode='bilinear', align_corners=True)
+        loss_dsn = self.criterion(pred_dsn, targets[0])
+        return loss + 0.4 * loss_dsn
+
+
+class SegmentationLoss(nn.Module):
+    """Lovasz loss for Alpha process"""
+
+    def __init__(self, ignore_index=None, only_present=True):
+        super(SegmentationLoss, self).__init__()
+        self.ignore_index = ignore_index
+        self.only_present = only_present
+
+        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
+
+    def forward(self, preds, targets):
+        h, w = targets.size(1), targets.size(2)
+        # seg loss
+        pred = F.interpolate(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
+        loss_ce = self.criterion(pred, targets)
+
+        # dsn loss
+        pred_dsn = F.interpolate(input=preds[-1], size=(h, w), mode='bilinear', align_corners=True)
+        loss_dsn = self.criterion(pred_dsn, targets)
+        total_loss = loss_ce + 0.4 * loss_dsn
+
+        return total_loss
+
+
+class ABRLovaszCELoss(nn.Module):
+    """Lovasz loss for Alpha process"""
+
+    def __init__(self, ignore_index=None, only_present=True):
+        super(ABRLovaszCELoss, self).__init__()
+        self.ignore_index = ignore_index
+        self.only_present = only_present
+
+        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
+
+    def forward(self, preds, targets):
+        h, w = targets.size(1), targets.size(2)
+        # seg loss
+        pred = F.interpolate(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
+        loss_ce = self.criterion(pred, targets)
+
+        pred = F.softmax(input=pred, dim=1)
+        loss = lovasz_softmax_flat(*flatten_probas(pred, targets, self.ignore_index),
+                                   only_present=self.only_present)
+
+        # dsn loss
+        pred_dsn = F.interpolate(input=preds[-1], size=(h, w), mode='bilinear', align_corners=True)
+        loss_dsn = self.criterion(pred_dsn, targets)
+        total_loss = loss_ce + loss + 0.4 * loss_dsn
+
+        return total_loss
+
+
+class LovaszSoftmaxLoss(nn.Module):
+    """Lovasz loss for Deep Supervision"""
+
+    def __init__(self, ignore_index=None, only_present=False, per_image=False):
+        super(LovaszSoftmaxLoss, self).__init__()
+        self.ignore_index = ignore_index
+        self.only_present = only_present
+        self.per_image = per_image
+        self.weight = torch.FloatTensor([0.80777327, 1.00125961, 0.90997236, 1.10867908, 1.17541499,
+                                         0.86041422, 1.01116758, 0.89290045, 1.12410812, 0.91105395,
+                                         1.07604013, 1.12470610, 1.09895196, 0.90172057, 0.93529453,
+                                         0.93054733, 1.04919178, 1.04937547, 1.06267568, 1.06365688])
+        self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index, weight=self.weight)
+
+    def forward(self, preds, targets):
+        h, w = targets.size(1), targets.size(2)
+        # seg loss
+        pred = F.interpolate(input=preds[0], size=(h, w), mode='bilinear', align_corners=True)
+        pred = F.softmax(input=pred, dim=1)
+        if self.per_image:
+            loss = mean(lovasz_softmax_flat(*flatten_probas(pre.unsqueeze(0), tar.unsqueeze(0), self.ignore_index),
+                                            only_present=self.only_present) for pre, tar in zip(pred, targets))
+        else:
+            loss = lovasz_softmax_flat(*flatten_probas(pred, targets, self.ignore_index),
+                                       only_present=self.only_present)
+        # dsn loss
+        pred_dsn = F.interpolate(input=preds[1], size=(h, w), mode='bilinear', align_corners=True)
+        loss_dsn = self.criterion(pred_dsn, targets)
+        return loss + 0.4 * loss_dsn
+
+
+def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1).
+              Interpreted as binary (sigmoid) output with outputs of size [B, H, W].
+      labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+      per_image: compute the loss per image instead of per batch
+      ignore: void class labels
+    """
+    if per_image:
+        loss = mean(
+            lovasz_softmax_flat_ori(*flatten_probas_ori(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes)
+            for prob, lab in zip(probas, labels))
+    else:
+        loss = lovasz_softmax_flat_ori(*flatten_probas_ori(probas, labels, ignore), classes=classes)
+    return loss
+
+
+def lovasz_softmax_flat_ori(probas, labels, classes='present'):
+    """
+    Multi-class Lovasz-Softmax loss
+      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+    """
+    if probas.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return probas * 0.
+    C = probas.size(1)
+    losses = []
+    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
+    for c in class_to_sum:
+        fg = (labels == c).float()  # foreground for class c
+        if (classes is 'present' and fg.sum() == 0):
+            continue
+        if C == 1:
+            if len(classes) > 1:
+                raise ValueError('Sigmoid output possible only with 1 class')
+            class_pred = probas[:, 0]
+        else:
+            class_pred = probas[:, c]
+        errors = (Variable(fg) - class_pred).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+    return mean(losses)
+
+
+def flatten_probas_ori(probas, labels, ignore=None):
+    """
+    Flattens predictions in the batch
+    """
+    if probas.dim() == 3:
+        # assumes output of a sigmoid layer
+        B, H, W = probas.size()
+        probas = probas.view(B, 1, H, W)
+    B, C, H, W = probas.size()
+    probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
+    labels = labels.view(-1)
+    if ignore is None:
+        return probas, labels
+    valid = (labels != ignore)
+    vprobas = probas[valid.nonzero().squeeze()]
+    vlabels = labels[valid]
+    return vprobas, vlabels
+
+
+def lovasz_softmax_flat(preds, targets, only_present=False):
+    """
+    Multi-class Lovasz-Softmax loss
+      :param preds: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
+      :param targets: [P] Tensor, ground truth labels (between 0 and C - 1)
+      :param only_present: average only on classes present in ground truth
+    """
+    if preds.numel() == 0:
+        # only void pixels, the gradients should be 0
+        return preds * 0.
+
+    C = preds.size(1)
+    losses = []
+    for c in range(C):
+        fg = (targets == c).float()  # foreground for class c
+        if only_present and fg.sum() == 0:
+            continue
+        errors = (Variable(fg) - preds[:, c]).abs()
+        errors_sorted, perm = torch.sort(errors, 0, descending=True)
+        perm = perm.data
+        fg_sorted = fg[perm]
+        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
+    return mean(losses)
+
+
+def lovasz_grad(gt_sorted):
+    """
+    Computes gradient of the Lovasz extension w.r.t sorted errors
+    """
+    p = len(gt_sorted)
+    gts = gt_sorted.sum()
+    intersection = gts - gt_sorted.float().cumsum(0)
+    union = gts + (1 - gt_sorted).float().cumsum(0)
+    jaccard = 1. - intersection / union
+    if p > 1:  # cover 1-pixel case
+        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
+    return jaccard
+
+
+def flatten_probas(preds, targets, ignore=None):
+    """
+    Flattens predictions in the batch
+    """
+    B, C, H, W = preds.size()
+    preds = preds.permute(0, 2, 3, 1).contiguous().view(-1, C)  # B * H * W, C = P, C
+    targets = targets.view(-1)
+    if ignore is None:
+        return preds, targets
+    valid = (targets != ignore)
+    vprobas = preds[valid.nonzero().squeeze()]
+    vlabels = targets[valid]
+    return vprobas, vlabels
+
+
+# --------------------------- BINARY LOSSES ---------------------------
+
+
+def lovasz_hinge(logits, labels, per_image=True, ignore=None):
+    """
+    Binary Lovasz hinge loss
+      logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
+      labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
+      per_image: compute the loss per image instead of per batch
+      ignore: void class id
+    """
+    if per_image:
+        loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
+                    for log, lab in zip(logits, labels))
+    else:
+        loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+    return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+    """
+    Binary Lovasz hinge loss
+      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
+      labels: [P] Tensor, binary ground truth labels (0 or 1)
+      ignore: label to ignore
+    """
+    if len(labels) == 0:
+        # only void pixels, the gradients should be 0
+        return logits.sum() * 0.
+    signs = 2. * labels.float() - 1.
+    errors = (1. - logits * Variable(signs))
+    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
+    perm = perm.data
+    gt_sorted = labels[perm]
+    grad = lovasz_grad(gt_sorted)
+    loss = torch.dot(F.relu(errors_sorted), Variable(grad))
+    return loss
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+    """
+    Flattens predictions in the batch (binary case)
+    Remove labels equal to 'ignore'
+    """
+    scores = scores.view(-1)
+    labels = labels.view(-1)
+    if ignore is None:
+        return scores, labels
+    valid = (labels != ignore)
+    vscores = scores[valid]
+    vlabels = labels[valid]
+    return vscores, vlabels
+
+
+def mean(l, ignore_nan=True, empty=0):
+    """
+    nan mean compatible with generators.
+    """
+    l = iter(l)
+    if ignore_nan:
+        l = ifilterfalse(isnan, l)
+    try:
+        n = 1
+        acc = next(l)
+    except StopIteration:
+        if empty == 'raise':
+            raise ValueError('Empty mean')
+        return empty
+    for n, v in enumerate(l, 2):
+        acc += v
+    if n == 1:
+        return acc
+    return acc / n
+
+
+def isnan(x):
+    return x != x
+
+
+class AAF_Loss(nn.Module):
+    """
+    Loss function for multiple outputs
+    """
+
+    def __init__(self, ignore_index=255, num_classes=7):
+        super(AAF_Loss, self).__init__()
+        self.ignore_index = ignore_index
+        self.num_classes = num_classes
+        self.kld_margin = 3.0
+        self.kld_lambda_1 = 1.0
+        self.kld_lambda_2 = 1.0
+        # self.dec = 1e-3
+        self.dec = 1e-2
+        self.softmax = nn.Softmax(dim=1)
+        self.w_edge = torch.zeros(1, 1, 1, self.num_classes, 1, 3)
+        self.w_edge_softmax = nn.Softmax(dim=-1)
+        self.w_not_edge = torch.zeros(1, 1, 1, self.num_classes, 1, 3)
+        self.w_not_edge_softmax = nn.Softmax(dim=-1)
+
+    def forward(self, preds, targets):
+        h, w = targets.size(1), targets.size(2)
+        # seg loss
+        pred = F.interpolate(input=preds, size=(h, w), mode='bilinear', align_corners=True)
+        pred = F.softmax(input=pred, dim=1)
+
+        # aaf loss
+        labels = targets.unsqueeze(1)
+        one_label = labels.clone()
+        one_label[labels == self.ignore_index] = 0
+        # one_hot_lab = F.one_hot(one_label, num_classes=self.num_classes)
+
+        one_hot_lab = torch.zeros(one_label.size(0), self.num_classes, one_label.size(2), one_label.size(3)).cuda()
+        one_hot_lab = one_hot_lab.scatter_(1, one_label.data, 1)
+
+        targets_p_node_list = list(torch.split(one_hot_lab, 1, dim=1))
+        for i in range(self.num_classes):
+            # Log.info('{} {}'.format(targets_p_node_list[i].shape, labels.shape))
+            targets_p_node_list[i] = targets_p_node_list[i].squeeze(-1)
+            targets_p_node_list[i][labels == self.ignore_index] = self.ignore_index
+        one_hot_lab = torch.cat(targets_p_node_list, dim=1).permute(0, 2, 3, 1)
+
+        prob = pred
+        w_edge = self.w_edge_softmax(self.w_edge).cuda()
+        w_not_edge = self.w_not_edge_softmax(self.w_not_edge).cuda()
+        # Log.info('{} {} {} {}'.format(one_hot_lab.shape, labels.shape, w_edge.shape, w_not_edge.shape))
+
+        # w_edge_shape=list(w_edge.shape)
+        # Apply AAF on 3x3 patch.
+        eloss_1, neloss_1 = lossx.adaptive_affinity_loss(labels,
+                                                         one_hot_lab,
+                                                         prob,
+                                                         1,
+                                                         self.num_classes,
+                                                         self.kld_margin,
+                                                         w_edge[..., 0],
+                                                         w_not_edge[..., 0])
+        # Apply AAF on 5x5 patch.
+        # eloss_2, neloss_2 = lossx.adaptive_affinity_loss(labels,
+        #                                                  one_hot_lab,
+        #                                                  prob,
+        #                                                  2,
+        #                                                  self.num_classes,
+        #                                                  self.kld_margin,
+        #                                                  w_edge[..., 1],
+        #                                                  w_not_edge[..., 1])
+        # # Apply AAF on 7x7 patch.
+        # eloss_3, neloss_3 = lossx.adaptive_affinity_loss(labels,
+        #                                                  one_hot_lab,
+        #                                                  prob,
+        #                                                  3,
+        #                                                  self.num_classes,
+        #                                                  self.kld_margin,
+        #                                                  w_edge[..., 2],
+        #                                                  w_not_edge[..., 2])
+        dec = self.dec
+        aaf_loss = torch.mean(eloss_1) * self.kld_lambda_1 * dec
+        # aaf_loss += torch.mean(eloss_2) * self.kld_lambda_1*dec
+        # aaf_loss += torch.mean(eloss_3) * self.kld_lambda_1*dec
+        aaf_loss += torch.mean(neloss_1) * self.kld_lambda_2 * dec
+        # aaf_loss += torch.mean(neloss_2) * self.kld_lambda_2*dec
+        # aaf_loss += torch.mean(neloss_3) * self.kld_lambda_2*dec
+
+        return aaf_loss
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/rmi_loss.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/rmi_loss.py
new file mode 100644
index 0000000..e1a2f3d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/loss/rmi_loss.py
@@ -0,0 +1,402 @@
+# coding=utf-8
+
+"""
+The implementation of the paper:
+Region Mutual Information Loss for Semantic Segmentation.
+"""
+
+# python 2.X, 3.X compatibility
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import pdb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+__all__ = ['RMILoss']
+
+TORCH_VERSION = torch.__version__[:3]
+
+_euler_num = 2.718281828  # euler number
+_pi = 3.14159265  # pi
+_ln_2_pi = 1.837877  # ln(2 * pi)
+_CLIP_MIN = 1e-6  # min clip value after softmax or sigmoid operations
+_CLIP_MAX = 1.0  # max clip value after softmax or sigmoid operations
+_POS_ALPHA = 1e-3  # add this factor to ensure the AA^T is positive definite
+_IS_SUM = 1  # sum the loss per channel
+
+
+def map_get_pairs(labels_4D, probs_4D, radius=3, is_combine=True):
+    """get map pairs
+	Args:
+		labels_4D	:	labels, shape [N, C, H, W]
+		probs_4D	:	probabilities, shape [N, C, H, W]
+		radius		:	the square radius
+	Return:
+		tensor with shape [N, C, radius * radius, H - (radius - 1), W - (radius - 1)]
+	"""
+    # pad to ensure the following slice operation is valid
+    # pad_beg = int(radius // 2)
+    # pad_end = radius - pad_beg
+
+    # the original height and width
+    label_shape = labels_4D.size()
+    h, w = label_shape[2], label_shape[3]
+    new_h, new_w = h - (radius - 1), w - (radius - 1)
+    # https://pytorch.org/docs/stable/nn.html?highlight=f%20pad#torch.nn.functional.pad
+    # padding = (pad_beg, pad_end, pad_beg, pad_end)
+    # labels_4D, probs_4D = F.pad(labels_4D, padding), F.pad(probs_4D, padding)
+
+    # get the neighbors
+    la_ns = []
+    pr_ns = []
+    # for x in range(0, radius, 1):
+    for y in range(0, radius, 1):
+        for x in range(0, radius, 1):
+            la_now = labels_4D[:, :, y:y + new_h, x:x + new_w]
+            pr_now = probs_4D[:, :, y:y + new_h, x:x + new_w]
+            la_ns.append(la_now)
+            pr_ns.append(pr_now)
+
+    if is_combine:
+        # for calculating RMI
+        pair_ns = la_ns + pr_ns
+        p_vectors = torch.stack(pair_ns, dim=2)
+        return p_vectors
+    else:
+        # for other purpose
+        la_vectors = torch.stack(la_ns, dim=2)
+        pr_vectors = torch.stack(pr_ns, dim=2)
+        return la_vectors, pr_vectors
+
+
+def map_get_pairs_region(labels_4D, probs_4D, radius=3, is_combine=0, num_classeses=21):
+    """get map pairs
+	Args:
+		labels_4D	:	labels, shape [N, C, H, W].
+		probs_4D	:	probabilities, shape [N, C, H, W].
+		radius		:	The side length of the square region.
+	Return:
+		A tensor with shape [N, C, radiu * radius, H // radius, W // raidius]
+	"""
+    kernel = torch.zeros([num_classeses, 1, radius, radius]).type_as(probs_4D)
+    padding = radius // 2
+    # get the neighbours
+    la_ns = []
+    pr_ns = []
+    for y in range(0, radius, 1):
+        for x in range(0, radius, 1):
+            kernel_now = kernel.clone()
+            kernel_now[:, :, y, x] = 1.0
+            la_now = F.conv2d(labels_4D, kernel_now, stride=radius, padding=padding, groups=num_classeses)
+            pr_now = F.conv2d(probs_4D, kernel_now, stride=radius, padding=padding, groups=num_classeses)
+            la_ns.append(la_now)
+            pr_ns.append(pr_now)
+
+    if is_combine:
+        # for calculating RMI
+        pair_ns = la_ns + pr_ns
+        p_vectors = torch.stack(pair_ns, dim=2)
+        return p_vectors
+    else:
+        # for other purpose
+        la_vectors = torch.stack(la_ns, dim=2)
+        pr_vectors = torch.stack(pr_ns, dim=2)
+        return la_vectors, pr_vectors
+    return
+
+
+def log_det_by_cholesky(matrix):
+    """
+	Args:
+		matrix: matrix must be a positive define matrix.
+				shape [N, C, D, D].
+	Ref:
+		https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/ops/linalg/linalg_impl.py
+	"""
+    # This uses the property that the log det(A) = 2 * sum(log(real(diag(C))))
+    # where C is the cholesky decomposition of A.
+    chol = torch.cholesky(matrix)
+    # return 2.0 * torch.sum(torch.log(torch.diagonal(chol, dim1=-2, dim2=-1) + 1e-6), dim=-1)
+    return 2.0 * torch.sum(torch.log(torch.diagonal(chol, dim1=-2, dim2=-1) + 1e-8), dim=-1)
+
+
+def batch_cholesky_inverse(matrix):
+    """
+	Args: 	matrix, 4-D tensor, [N, C, M, M].
+			matrix must be a symmetric positive define matrix.
+	"""
+    chol_low = torch.cholesky(matrix, upper=False)
+    chol_low_inv = batch_low_tri_inv(chol_low)
+    return torch.matmul(chol_low_inv.transpose(-2, -1), chol_low_inv)
+
+
+def batch_low_tri_inv(L):
+    """
+	Batched inverse of lower triangular matrices
+	Args:
+		L :	a lower triangular matrix
+	Ref:
+		https://www.pugetsystems.com/labs/hpc/PyTorch-for-Scientific-Computing
+	"""
+    n = L.shape[-1]
+    invL = torch.zeros_like(L)
+    for j in range(0, n):
+        invL[..., j, j] = 1.0 / L[..., j, j]
+        for i in range(j + 1, n):
+            S = 0.0
+            for k in range(0, i + 1):
+                S = S - L[..., i, k] * invL[..., k, j].clone()
+            invL[..., i, j] = S / L[..., i, i]
+    return invL
+
+
+def log_det_by_cholesky_test():
+    """
+	test for function log_det_by_cholesky()
+	"""
+    a = torch.randn(1, 4, 4)
+    a = torch.matmul(a, a.transpose(2, 1))
+    print(a)
+    res_1 = torch.logdet(torch.squeeze(a))
+    res_2 = log_det_by_cholesky(a)
+    print(res_1, res_2)
+
+
+def batch_inv_test():
+    """
+	test for function batch_cholesky_inverse()
+	"""
+    a = torch.randn(1, 1, 4, 4)
+    a = torch.matmul(a, a.transpose(-2, -1))
+    print(a)
+    res_1 = torch.inverse(a)
+    res_2 = batch_cholesky_inverse(a)
+    print(res_1, '\n', res_2)
+
+
+def mean_var_test():
+    x = torch.randn(3, 4)
+    y = torch.randn(3, 4)
+
+    x_mean = x.mean(dim=1, keepdim=True)
+    x_sum = x.sum(dim=1, keepdim=True) / 2.0
+    y_mean = y.mean(dim=1, keepdim=True)
+    y_sum = y.sum(dim=1, keepdim=True) / 2.0
+
+    x_var_1 = torch.matmul(x - x_mean, (x - x_mean).t())
+    x_var_2 = torch.matmul(x, x.t()) - torch.matmul(x_sum, x_sum.t())
+    xy_cov = torch.matmul(x - x_mean, (y - y_mean).t())
+    xy_cov_1 = torch.matmul(x, y.t()) - x_sum.matmul(y_sum.t())
+
+    print(x_var_1)
+    print(x_var_2)
+
+    print(xy_cov, '\n', xy_cov_1)
+
+
+class RMILoss(nn.Module):
+    """
+	region mutual information
+	I(A, B) = H(A) + H(B) - H(A, B)
+	This version need a lot of memory if do not dwonsample.
+	"""
+
+    def __init__(self,
+                 configer=None):
+        super(RMILoss, self).__init__()
+        self.configer = configer
+        self.use_sigmoid = self.configer.get('loss', 'params')['use_sigmoid']
+        self.num_classes = self.configer.get('loss', 'params')['num_classes']
+        # radius choices
+        self.rmi_radius = self.configer.get('loss', 'params')['rmi_radius']
+        assert self.rmi_radius in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        self.rmi_pool_way = self.configer.get('loss', 'params')['rmi_pool_way']
+        assert self.rmi_pool_way in [0, 1, 2, 3]
+
+        # set the pool_size = rmi_pool_stride
+        self.rmi_pool_size = self.configer.get('loss', 'params')['rmi_pool_size']
+        self.rmi_pool_stride = self.configer.get('loss', 'params')['rmi_pool_stride']
+        assert self.rmi_pool_size == self.rmi_pool_stride
+
+        self.weight_lambda = self.configer.get('loss', 'params')['loss_weight_lambda']
+        self.loss_weight = self.configer.get('loss', 'params')['loss_weight']
+        self.lambda_way = self.configer.get('loss', 'params')['lambda_way']
+
+        # dimension of the distribution
+        self.half_d = self.rmi_radius * self.rmi_radius
+        self.d = 2 * self.half_d
+        self.kernel_padding = self.rmi_pool_size // 2
+        # ignore class
+        self.ignore_index = 255
+
+    def forward(self,
+                cls_score,
+                label,
+                weight=None,
+                **kwargs):
+        label[label < 0] = 255
+        loss = self.loss_weight * self.forward_sigmoid(cls_score, label)
+        label[label == 255] = -1
+        # loss = self.forward_softmax_sigmoid(cls_score, label)
+        return loss
+
+    def forward_softmax_sigmoid(self, logits_4D, labels_4D):
+        """
+		Using both softmax and sigmoid operations.
+		Args:
+			logits_4D 	:	[N, C, H, W], dtype=float32
+			labels_4D 	:	[N, H, W], dtype=long
+		"""
+        # PART I -- get the normal cross entropy loss
+        print(
+            "max label: {} min label: {}".format(labels_4D[labels_4D != 255].max(), labels_4D[labels_4D != 255].min()))
+        normal_loss = F.cross_entropy(input=logits_4D,
+                                      target=labels_4D.long(),
+                                      ignore_index=self.ignore_index,
+                                      reduction='mean')
+
+        # PART II -- get the lower bound of the region mutual information
+        # get the valid label and logits
+        # valid label, [N, C, H, W]
+        label_mask_3D = labels_4D < self.num_classes
+        valid_onehot_labels_4D = F.one_hot(labels_4D.long() * label_mask_3D.long(),
+                                           num_classes=self.num_classes).float()
+        label_mask_3D = label_mask_3D.float()
+        valid_onehot_labels_4D = valid_onehot_labels_4D * label_mask_3D.unsqueeze(dim=3)
+        valid_onehot_labels_4D = valid_onehot_labels_4D.permute(0, 3, 1, 2).requires_grad_(False)
+        # valid probs
+        probs_4D = F.sigmoid(logits_4D) * label_mask_3D.unsqueeze(dim=1)
+        probs_4D = probs_4D.clamp(min=_CLIP_MIN, max=_CLIP_MAX)
+
+        # get region mutual information
+        rmi_loss = self.rmi_lower_bound(valid_onehot_labels_4D, probs_4D)
+
+        # add together
+        final_loss = (self.weight_lambda * normal_loss + rmi_loss * (1 - self.weight_lambda) if self.lambda_way
+                      else normal_loss + rmi_loss * self.weight_lambda)
+
+        return final_loss
+
+    def forward_sigmoid(self, logits_4D, labels_4D):
+        """
+		Using the sigmiod operation both.
+		Args:
+			logits_4D 	:	[N, C, H, W], dtype=float32
+			labels_4D 	:	[N, H, W], dtype=long
+		"""
+        # label mask -- [N, H, W, 1]
+        label_mask_3D = labels_4D < self.num_classes
+
+        # valid label
+        valid_onehot_labels_4D = F.one_hot(labels_4D.long() * label_mask_3D.long(),
+                                           num_classes=self.num_classes).float()
+        label_mask_3D = label_mask_3D.float()
+        label_mask_flat = label_mask_3D.view([-1, ])
+        valid_onehot_labels_4D = valid_onehot_labels_4D * label_mask_3D.unsqueeze(dim=3)
+        valid_onehot_labels_4D.requires_grad_(False)
+
+        # PART I -- calculate the sigmoid binary cross entropy loss
+        valid_onehot_label_flat = valid_onehot_labels_4D.view([-1, self.num_classes]).requires_grad_(False)
+        logits_flat = logits_4D.permute(0, 2, 3, 1).contiguous().view([-1, self.num_classes])
+
+        # binary loss, multiplied by the not_ignore_mask
+        valid_pixels = torch.sum(label_mask_flat)
+        binary_loss = F.binary_cross_entropy_with_logits(logits_flat,
+                                                         target=valid_onehot_label_flat,
+                                                         weight=label_mask_flat.unsqueeze(dim=1),
+                                                         reduction='sum')
+        bce_loss = torch.div(binary_loss, valid_pixels + 1.0)
+
+        # PART II -- get rmi loss
+        # onehot_labels_4D -- [N, C, H, W]
+        probs_4D = logits_4D.sigmoid() * label_mask_3D.unsqueeze(dim=1) + _CLIP_MIN
+        valid_onehot_labels_4D = valid_onehot_labels_4D.permute(0, 3, 1, 2).requires_grad_(False)
+
+        # get region mutual information
+        rmi_loss = self.rmi_lower_bound(valid_onehot_labels_4D, probs_4D)
+
+        # add together
+        final_loss = (self.weight_lambda * bce_loss + rmi_loss * (1 - self.weight_lambda) if self.lambda_way
+                      else bce_loss + rmi_loss * self.weight_lambda)
+
+        return final_loss
+
+    def rmi_lower_bound(self, labels_4D, probs_4D):
+        """
+		calculate the lower bound of the region mutual information.
+		Args:
+			labels_4D 	:	[N, C, H, W], dtype=float32
+			probs_4D 	:	[N, C, H, W], dtype=float32
+		"""
+        assert labels_4D.size() == probs_4D.size()
+
+        p, s = self.rmi_pool_size, self.rmi_pool_stride
+        if self.rmi_pool_stride > 1:
+            if self.rmi_pool_way == 0:
+                labels_4D = F.max_pool2d(labels_4D, kernel_size=p, stride=s, padding=self.kernel_padding)
+                probs_4D = F.max_pool2d(probs_4D, kernel_size=p, stride=s, padding=self.kernel_padding)
+            elif self.rmi_pool_way == 1:
+                labels_4D = F.avg_pool2d(labels_4D, kernel_size=p, stride=s, padding=self.kernel_padding)
+                probs_4D = F.avg_pool2d(probs_4D, kernel_size=p, stride=s, padding=self.kernel_padding)
+            elif self.rmi_pool_way == 2:
+                # interpolation
+                shape = labels_4D.size()
+                new_h, new_w = shape[2] // s, shape[3] // s
+                labels_4D = F.interpolate(labels_4D, size=(new_h, new_w), mode='nearest')
+                probs_4D = F.interpolate(probs_4D, size=(new_h, new_w), mode='bilinear', align_corners=True)
+            else:
+                raise NotImplementedError("Pool way of RMI is not defined!")
+        # we do not need the gradient of label.
+        label_shape = labels_4D.size()
+        n, c = label_shape[0], label_shape[1]
+
+        # combine the high dimension points from label and probability map. new shape [N, C, radius * radius, H, W]
+        la_vectors, pr_vectors = map_get_pairs(labels_4D, probs_4D, radius=self.rmi_radius, is_combine=0)
+
+        la_vectors = la_vectors.view([n, c, self.half_d, -1]).type(torch.cuda.DoubleTensor).requires_grad_(False)
+        pr_vectors = pr_vectors.view([n, c, self.half_d, -1]).type(torch.cuda.DoubleTensor)
+
+        # small diagonal matrix, shape = [1, 1, radius * radius, radius * radius]
+        diag_matrix = torch.eye(self.half_d).unsqueeze(dim=0).unsqueeze(dim=0)
+
+        # the mean and covariance of these high dimension points
+        # Var(X) = E(X^2) - E(X) E(X), N * Var(X) = X^2 - X E(X)
+        la_vectors = la_vectors - la_vectors.mean(dim=3, keepdim=True)
+        la_cov = torch.matmul(la_vectors, la_vectors.transpose(2, 3))
+
+        pr_vectors = pr_vectors - pr_vectors.mean(dim=3, keepdim=True)
+        pr_cov = torch.matmul(pr_vectors, pr_vectors.transpose(2, 3))
+        # https://github.com/pytorch/pytorch/issues/7500
+        # waiting for batched torch.cholesky_inverse()
+        pr_cov_inv = torch.inverse(pr_cov + diag_matrix.type_as(pr_cov) * _POS_ALPHA)
+        # if the dimension of the point is less than 9, you can use the below function
+        # to acceleration computational speed.
+        # pr_cov_inv = utils.batch_cholesky_inverse(pr_cov + diag_matrix.type_as(pr_cov) * _POS_ALPHA)
+
+        la_pr_cov = torch.matmul(la_vectors, pr_vectors.transpose(2, 3))
+        # the approxiamation of the variance, det(c A) = c^n det(A), A is in n x n shape;
+        # then log det(c A) = n log(c) + log det(A).
+        # appro_var = appro_var / n_points, we do not divide the appro_var by number of points here,
+        # and the purpose is to avoid underflow issue.
+        # If A = A^T, A^-1 = (A^-1)^T.
+        appro_var = la_cov - torch.matmul(la_pr_cov.matmul(pr_cov_inv), la_pr_cov.transpose(-2, -1))
+        # appro_var = la_cov - torch.chain_matmul(la_pr_cov, pr_cov_inv, la_pr_cov.transpose(-2, -1))
+        # appro_var = torch.div(appro_var, n_points.type_as(appro_var)) + diag_matrix.type_as(appro_var) * 1e-6
+
+        # The lower bound. If A is nonsingular, ln( det(A) ) = Tr( ln(A) ).
+        rmi_now = 0.5 * log_det_by_cholesky(appro_var + diag_matrix.type_as(appro_var) * _POS_ALPHA)
+        # rmi_now = 0.5 * torch.logdet(appro_var + diag_matrix.type_as(appro_var) * _POS_ALPHA)
+
+        # mean over N samples. sum over classes.
+        rmi_per_class = rmi_now.view([-1, self.num_classes]).mean(dim=0).float()
+        # is_half = False
+        # if is_half:
+        #	rmi_per_class = torch.div(rmi_per_class, float(self.half_d / 2.0))
+        # else:
+        rmi_per_class = torch.div(rmi_per_class, float(self.half_d))
+
+        rmi_loss = torch.sum(rmi_per_class) if _IS_SUM else torch.mean(rmi_per_class)
+        return rmi_loss
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/F1_running_score.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/F1_running_score.py
new file mode 100644
index 0000000..ca1fea9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/F1_running_score.py
@@ -0,0 +1,259 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## Code adapted from:
+## https://github.com/nv-tlabs/GSCNN/blob/master/utils/f_boundary.py
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import numpy as np
+import torch
+from multiprocessing.pool import Pool
+
+
+class F1RunningScore(object):
+
+    def __init__(self, configer=None, num_classes=None, boundary_threshold=0.00088, num_proc=15):
+
+        assert configer is not None or num_classes is not None
+        self.configer = configer
+
+        if configer is not None:
+            self.n_classes = self.configer.get('data', 'num_classes')
+        else:
+            self.n_classes = num_classes
+
+        self.ignore_index = -1
+        self.boundary_threshold = boundary_threshold
+        self.pool = Pool(processes=num_proc)
+        self.num_proc = num_proc
+
+        self._Fpc = 0
+        self._Fc = 0
+        self.seg_map_cache = []
+        self.gt_map_cache = []
+
+    def _update_cache(self, seg_map, gt_map):
+        """
+        Append inputs to `seg_map_cache` and `gt_map_cache`.
+
+        Returns whether the length reached our pool size.
+        """
+        self.seg_map_cache.extend(seg_map)
+        self.gt_map_cache.extend(gt_map)
+        return len(self.gt_map_cache) >= self.num_proc
+
+    def _get_from_cache(self):
+
+        n = self.num_proc
+        seg_map, self.seg_map_cache = self.seg_map_cache[:n], self.seg_map_cache[n:]
+        gt_map, self.gt_map_cache = self.gt_map_cache[:n], self.gt_map_cache[n:]
+
+        return seg_map, gt_map
+
+    def update(self, seg_map, gt_map):
+
+        if self._update_cache(seg_map, gt_map):
+            seg_map, gt_map = self._get_from_cache()
+            self._update_scores(seg_map, gt_map)
+        else:
+            return
+
+    def _update_scores(self, seg_map, gt_map):
+        batch_size = len(seg_map)
+        if batch_size == 0:
+            return
+
+        Fpc = np.zeros(self.n_classes)
+        Fc = np.zeros(self.n_classes)
+
+
+        for class_id in range(self.n_classes):
+            args = []
+            for i in range(batch_size):
+                if seg_map[i].shape[0] == self.n_classes:
+                    pred_i = seg_map[i][class_id] > 0.5
+                    pred_is_boundary = True
+                else:
+                    pred_i = seg_map[i] == class_id
+                    pred_is_boundary = False
+
+                args.append([
+                    (pred_i).astype(np.uint8),
+                    (gt_map[i] == class_id).astype(np.uint8),
+                    (gt_map[i] == -1),
+                    self.boundary_threshold,
+                    class_id,
+                    pred_is_boundary
+                ])
+            results = self.pool.map(db_eval_boundary, args)
+            results = np.array(results)
+            Fs = results[:, 0]
+            _valid = ~np.isnan(Fs)
+            Fc[class_id] = np.sum(_valid)
+            Fs[np.isnan(Fs)] = 0
+            Fpc[class_id] = sum(Fs)
+
+        self._Fc = self._Fc + Fc
+        self._Fpc = self._Fpc + Fpc
+
+    def get_scores(self):
+
+        if self.seg_map_cache is None:
+            return 0, 0
+
+        self._update_scores(self.seg_map_cache, self.gt_map_cache)
+
+        F_score = np.sum(self._Fpc / self._Fc) / self.n_classes
+        F_score_classwise = self._Fpc / self._Fc
+
+        return F_score, F_score_classwise
+
+    def reset(self):
+        self._Fpc = self._Fc = 0
+
+
+def db_eval_boundary(args):
+    """
+    Compute mean,recall and decay from per-frame evaluation.
+    Calculates precision/recall for boundaries between foreground_mask and
+    gt_mask using morphological operators to speed it up.
+
+    Arguments:
+            foreground_mask (ndarray): binary segmentation image.
+            gt_mask         (ndarray): binary annotated image.
+
+    Returns:
+            F (float): boundaries F-measure
+            P (float): boundaries precision
+            R (float): boundaries recall
+    """
+
+    foreground_mask, gt_mask, ignore_mask, bound_th, class_id, pred_is_boundary = args
+
+    assert np.atleast_3d(foreground_mask).shape[2] == 1
+
+    bound_pix = bound_th if bound_th >= 1 else \
+        np.ceil(bound_th*np.linalg.norm(foreground_mask.shape))
+
+    # print(bound_pix)
+    # print(gt.shape)
+    # print(np.unique(gt))
+    foreground_mask[ignore_mask] = 0
+    gt_mask[ignore_mask] = 0
+
+    # Get the pixel boundaries of both masks
+    if pred_is_boundary:
+        fg_boundary = foreground_mask
+    else:
+        fg_boundary = seg2bmap(foreground_mask)
+    gt_boundary = seg2bmap(gt_mask)
+
+    from skimage.morphology import disk
+    from cv2 import dilate
+    def binary_dilation(x, d): return dilate(
+        x.astype(np.uint8), d).astype(np.bool)
+    fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
+    gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
+
+    # Get the intersection
+    gt_match = gt_boundary * fg_dil
+    fg_match = fg_boundary * gt_dil
+
+    # Area of the intersection
+    n_fg = np.sum(fg_boundary)
+    n_gt = np.sum(gt_boundary)
+
+    # % Compute precision and recall
+    if n_fg == 0 and n_gt > 0:
+        precision = 1
+        recall = 0
+    elif n_fg > 0 and n_gt == 0:
+        precision = 0
+        recall = 1
+    elif n_fg == 0 and n_gt == 0:
+        precision = 1
+        recall = 1
+    else:
+        precision = np.sum(fg_match) / float(n_fg)
+        recall = np.sum(gt_match) / float(n_gt)
+
+    # Compute F measure
+    if precision + recall == 0:
+        F = 0
+    else:
+        F = 2 * precision * recall / (precision + recall)
+
+    return F, precision
+
+
+def seg2bmap(seg, width=None, height=None):
+    """
+    From a segmentation, compute a binary boundary map with 1 pixel wide
+    boundaries.  The boundary pixels are offset by 1/2 pixel towards the
+    origin from the actual segment boundary.
+
+    Arguments:
+            seg     : Segments labeled from 1..k.
+            width	:	Width of desired bmap  <= seg.shape[1]
+            height  :	Height of desired bmap <= seg.shape[0]
+
+    Returns:
+            bmap (ndarray):	Binary boundary map.
+
+     David Martin <dmartin@eecs.berkeley.edu>
+     January 2003
+    """
+
+    seg = seg.astype(np.bool)
+    seg[seg > 0] = 1
+
+    assert np.atleast_3d(seg).shape[2] == 1
+
+    width = seg.shape[1] if width is None else width
+    height = seg.shape[0] if height is None else height
+
+    h, w = seg.shape[:2]
+
+    ar1 = float(width) / float(height)
+    ar2 = float(w) / float(h)
+
+    assert not (width > w | height > h | abs(ar1 - ar2) > 0.01),\
+        'Can''t convert %dx%d seg to %dx%d bmap.' % (w, h, width, height)
+
+    e = np.zeros_like(seg)
+    s = np.zeros_like(seg)
+    se = np.zeros_like(seg)
+
+    e[:, :-1] = seg[:, 1:]
+    s[:-1, :] = seg[1:, :]
+    se[:-1, :-1] = seg[1:, 1:]
+
+    b = seg ^ e | seg ^ s | seg ^ se
+    b[-1, :] = seg[-1, :] ^ e[-1, :]
+    b[:, -1] = seg[:, -1] ^ s[:, -1]
+    b[-1, -1] = 0
+
+    if w == width and h == height:
+        bmap = b
+    else:
+        bmap = np.zeros((height, width))
+        for x in range(w):
+            for y in range(h):
+                if b[y, x]:
+                    j = 1 + floor((y - 1) + height / h)
+                    i = 1 + floor((x - 1) + width / h)
+                    bmap[j, i] = 1
+
+    return bmap
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/ade20k_evaluator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/ade20k_evaluator.py
new file mode 100644
index 0000000..7879264
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/ade20k_evaluator.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+
+
+import argparse
+import os
+import pdb
+
+import numpy as np
+
+from lib.utils.helpers.image_helper import ImageHelper
+from lib.utils.tools.logger import Logger as Log
+from lib.utils.tools.configer import Configer
+from lib.metrics.running_score import RunningScore
+
+
+class ADE20KEvaluator(object):
+    def __init__(self, configer):
+        self.configer = configer
+        self.seg_running_score = RunningScore(configer, ignore_index=255)
+
+    def relabel(self, labelmap):
+        return (labelmap - 1).astype(np.uint8)
+
+    def evaluate(self, pred_dir, gt_dir):
+        img_cnt = 0
+        for filename in os.listdir(pred_dir):
+            pred_path = os.path.join(pred_dir, filename)
+            gt_path = os.path.join(gt_dir, filename)
+            predmap = ImageHelper.img2np(ImageHelper.read_image(pred_path, tool='pil', mode='P'))
+            gtmap = ImageHelper.img2np(ImageHelper.read_image(gt_path, tool='pil', mode='P'))
+
+            if "pascal_context" in gt_dir or "ade" in gt_dir or "coco_stuff" in gt_dir:
+                predmap = self.relabel(predmap)
+                gtmap = self.relabel(gtmap)
+
+            if "coco_stuff" in gt_dir or "woodscape" in gt_dir:
+                gtmap[gtmap == 0] = 255
+
+            self.seg_running_score.update(predmap[np.newaxis, :, :], gtmap[np.newaxis, :, :])
+            img_cnt += 1
+
+        Log.info('Evaluate {} images'.format(img_cnt))
+        Log.info('mIOU: {}'.format(self.seg_running_score.get_mean_iou()))
+        Log.info('Pixel ACC: {}'.format(self.seg_running_score.get_pixel_acc()))
+        Log.info('Class mIOU: {}'.format(self.seg_running_score.get_cls_iou()))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--configs', default=None, type=str,
+                        dest='configs', help='The configs file of pose.')
+    parser.add_argument('--gt_dir', default=None, type=str,
+                        dest='gt_dir', help='The groundtruth annotations.')
+    parser.add_argument('--pred_dir', default=None, type=str,
+                        dest='pred_dir', help='The label dir of predict annotations.')
+    args = parser.parse_args()
+
+    ade20k_evaluator = ADE20KEvaluator(Configer(configs=args.configs))
+    ade20k_evaluator.evaluate(args.pred_dir, args.gt_dir)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix.pyx b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix.pyx
new file mode 100644
index 0000000..a229b54
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix.pyx
@@ -0,0 +1,44 @@
+# cython methods to speed-up evaluation
+
+import numpy as np
+cimport cython
+cimport numpy as np
+import ctypes
+
+np.import_array()
+
+cdef extern from "addToConfusionMatrix_impl.c":
+	void addToConfusionMatrix( const unsigned char* f_prediction_p  ,
+                               const unsigned char* f_groundTruth_p ,
+                               const unsigned int   f_width_i       ,
+                               const unsigned int   f_height_i      ,
+                               unsigned long long*  f_confMatrix_p  ,
+                               const unsigned int   f_confMatDim_i  )
+
+
+cdef tonumpyarray(unsigned long long* data, unsigned long long size):
+	if not (data and size >= 0): raise ValueError
+	return np.PyArray_SimpleNewFromData(2, [size, size], np.NPY_UINT64, <void*>data)
+
+@cython.boundscheck(False)
+def cEvaluatePair( np.ndarray[np.uint8_t , ndim=2] predictionArr   ,
+                   np.ndarray[np.uint8_t , ndim=2] groundTruthArr  ,
+                   np.ndarray[np.uint64_t, ndim=2] confMatrix      ,
+                   evalLabels                                    ):
+	cdef np.ndarray[np.uint8_t    , ndim=2, mode="c"] predictionArr_c
+	cdef np.ndarray[np.uint8_t    , ndim=2, mode="c"] groundTruthArr_c
+	cdef np.ndarray[np.ulonglong_t, ndim=2, mode="c"] confMatrix_c
+
+	predictionArr_c  = np.ascontiguousarray(predictionArr , dtype=np.uint8    )
+	groundTruthArr_c = np.ascontiguousarray(groundTruthArr, dtype=np.uint8    )
+	confMatrix_c     = np.ascontiguousarray(confMatrix    , dtype=np.ulonglong)
+
+	cdef np.uint32_t height_ui     = predictionArr.shape[1]
+	cdef np.uint32_t width_ui      = predictionArr.shape[0]
+	cdef np.uint32_t confMatDim_ui = confMatrix.shape[0]
+
+	addToConfusionMatrix(&predictionArr_c[0,0], &groundTruthArr_c[0,0], height_ui, width_ui, &confMatrix_c[0,0], confMatDim_ui)
+
+	confMatrix = np.ascontiguousarray(tonumpyarray(&confMatrix_c[0,0], confMatDim_ui))
+
+	return np.copy(confMatrix)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix_impl.c b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix_impl.c
new file mode 100644
index 0000000..83ac1e3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/addToConfusionMatrix_impl.c
@@ -0,0 +1,17 @@
+// cython methods to speed-up evaluation
+
+void addToConfusionMatrix( const unsigned char* f_prediction_p  ,
+                           const unsigned char* f_groundTruth_p ,
+                           const unsigned int   f_width_i       ,
+                           const unsigned int   f_height_i      ,
+                           unsigned long long*  f_confMatrix_p  ,
+                           const unsigned int   f_confMatDim_i  )
+{
+    const unsigned int size_ui = f_height_i * f_width_i;
+    for (unsigned int i = 0; i < size_ui; ++i)
+    {
+        const unsigned char predPx = f_prediction_p [i];
+        const unsigned char gtPx   = f_groundTruth_p[i];
+        f_confMatrix_p[f_confMatDim_i*gtPx + predPx] += 1u;
+    }
+}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/csHelpers.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/csHelpers.py
new file mode 100644
index 0000000..3665a61
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/csHelpers.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+#
+# Various helper methods and includes for Cityscapes
+#
+
+import os, sys, getopt
+import glob
+import math
+import json
+from collections import namedtuple
+
+# Image processing
+# Check if PIL is actually Pillow as expected
+try:
+    from PIL import PILLOW_VERSION
+except:
+    print("Please install the module 'Pillow' for image processing, e.g.")
+    print("pip install pillow")
+    sys.exit(-1)
+
+try:
+    import PIL.Image     as Image
+    import PIL.ImageDraw as ImageDraw
+except:
+    print("Failed to import the image processing packages.")
+    sys.exit(-1)
+
+# Numpy for datastructures
+try:
+    import numpy as np
+except:
+    print("Failed to import numpy package.")
+    sys.exit(-1)
+
+# Cityscapes modules
+try:
+    from lib.metrics.cityscapes.helpers.annotation import Annotation
+    from lib.metrics.cityscapes.helpers.labels import labels, name2label, id2label, trainId2label, category2labels
+except:
+    print("Failed to find all Cityscapes modules")
+    sys.exit(-1)
+
+# Print an error message and quit
+def printError(message):
+    print('ERROR: ' + str(message))
+    sys.exit(-1)
+
+# Class for colors
+class colors:
+    RED       = '\033[31;1m'
+    GREEN     = '\033[32;1m'
+    YELLOW    = '\033[33;1m'
+    BLUE      = '\033[34;1m'
+    MAGENTA   = '\033[35;1m'
+    CYAN      = '\033[36;1m'
+    BOLD      = '\033[1m'
+    UNDERLINE = '\033[4m'
+    ENDC      = '\033[0m'
+
+# Colored value output if colorized flag is activated.
+def getColorEntry(val, args):
+    if not args.colorized:
+        return ""
+    if not isinstance(val, float) or math.isnan(val):
+        return colors.ENDC
+    if (val < .20):
+        return colors.RED
+    elif (val < .40):
+        return colors.YELLOW
+    elif (val < .60):
+        return colors.BLUE
+    elif (val < .80):
+        return colors.CYAN
+    else:
+        return colors.GREEN
+
+# Cityscapes files have a typical filename structure
+# <city>_<sequenceNb>_<frameNb>_<type>[_<type2>].<ext>
+# This class contains the individual elements as members
+# For the sequence and frame number, the strings are returned, including leading zeros
+CsFile = namedtuple( 'csFile' , [ 'city' , 'sequenceNb' , 'frameNb' , 'type' , 'type2' , 'ext' ] )
+
+# Returns a CsFile object filled from the info in the given filename
+def getCsFileInfo(fileName):
+    baseName = os.path.basename(fileName)
+    parts = baseName.split('_')
+    parts = parts[:-1] + parts[-1].split('.')
+    if not parts:
+        printError( 'Cannot parse given filename ({}). Does not seem to be a valid Cityscapes file.'.format(fileName) )
+    if len(parts) == 5:
+        csFile = CsFile( *parts[:-1] , type2="" , ext=parts[-1] )
+    elif len(parts) == 6:
+        csFile = CsFile( *parts )
+    else:
+        printError( 'Found {} part(s) in given filename ({}). Expected 5 or 6.'.format(len(parts) , fileName) )
+
+    return csFile
+
+# Returns the part of Cityscapes filenames that is common to all data types
+# e.g. for city_123456_123456_gtFine_polygons.json returns city_123456_123456
+def getCoreImageFileName(filename):
+    csFile = getCsFileInfo(filename)
+    return "{}_{}_{}".format( csFile.city , csFile.sequenceNb , csFile.frameNb )
+
+# Returns the directory name for the given filename, e.g.
+# fileName = "/foo/bar/foobar.txt"
+# return value is "bar"
+# Not much error checking though
+def getDirectory(fileName):
+    dirName = os.path.dirname(fileName)
+    return os.path.basename(dirName)
+
+# Make sure that the given path exists
+def ensurePath(path):
+    if not path:
+        return
+    if not os.path.isdir(path):
+        os.makedirs(path)
+
+# Write a dictionary as json file
+def writeDict2JSON(dictName, fileName):
+    with open(fileName, 'w') as f:
+        f.write(json.dumps(dictName, default=lambda o: o.__dict__, sort_keys=True, indent=4))
+
+# dummy main
+if __name__ == "__main__":
+    printError("Only for include, not executable on its own.")
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalInstanceLevelSemanticLabeling.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalInstanceLevelSemanticLabeling.py
new file mode 100644
index 0000000..e05de74
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalInstanceLevelSemanticLabeling.py
@@ -0,0 +1,716 @@
+#!/usr/bin/python
+#
+# The evaluation script for instance-level img labeling.
+# We use this script to evaluate your approach on the test set.
+# You can use the script to evaluate on the validation set.
+#
+# Please check the description of the "getPrediction" method below
+# and set the required environment variables as needed, such that
+# this script can locate your results.
+# If the default implementation of the method works, then it's most likely
+# that our evaluation server will be able to process your results as well.
+#
+# To run this script, make sure that your results contain text files
+# (one for each test set image) with the content:
+#   relPathPrediction1 labelIDPrediction1 confidencePrediction1
+#   relPathPrediction2 labelIDPrediction2 confidencePrediction2
+#   relPathPrediction3 labelIDPrediction3 confidencePrediction3
+#   ...
+#
+# - The given paths "relPathPrediction" point to images that contain
+# binary masks for the described predictions, where any non-zero is
+# part of the predicted instance. The paths must not contain spaces,
+# must be relative to the root directory and must point to locations
+# within the root directory.
+# - The label IDs "labelIDPrediction" specify the class of that mask,
+# encoded as defined in labels.py. Note that the regular ID is used,
+# not the train ID.
+# - The field "confidencePrediction" is a float value that assigns a
+# confidence score to the mask.
+#
+# Note that this tool creates a file named "gtInstances.json" during its
+# first run. This file helps to speed up computation and should be deleted
+# whenever anything changes in the ground truth annotations or anything
+# goes wrong.
+
+# python imports
+from __future__ import print_function
+import os, sys
+import fnmatch
+from copy import deepcopy
+
+# Cityscapes imports
+sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
+from csHelpers      import *
+from instances2dict import instances2dict
+
+
+###################################
+# PLEASE READ THESE INSTRUCTIONS!!!
+###################################
+# Provide the prediction file for the given ground truth file.
+# Please read the instructions above for a description of
+# the result file.
+#
+# The current implementation expects the results to be in a certain root folder.
+# This folder is one of the following with decreasing priority:
+#   - environment variable CITYSCAPES_RESULTS
+#   - environment variable CITYSCAPES_DATASET/results
+#   - ../../results/"
+# (Remember to set the variables using "export CITYSCAPES_<VARIABLE>=<path>".)
+#
+# Within the root folder, a matching prediction file is recursively searched.
+# A file matches, if the filename follows the pattern
+# <city>_123456_123456*.txt
+# for a ground truth filename
+# <city>_123456_123456_gtFine_instanceIds.png
+def getPrediction( groundTruthFile , args ):
+    # determine the prediction path, if the method is first called
+    if not args.predictionPath:
+        rootPath = None
+        if 'CITYSCAPES_RESULTS' in os.environ:
+            rootPath = os.environ['CITYSCAPES_RESULTS']
+        elif 'CITYSCAPES_DATASET' in os.environ:
+            rootPath = os.path.join( os.environ['CITYSCAPES_DATASET'] , "results" )
+        else:
+            rootPath = os.path.join(os.path.dirname(os.path.realpath(__file__)),'..','..','results')
+
+        if not os.path.isdir(rootPath):
+            printError("Could not find a result root folder. Please read the instructions of this method.")
+
+        args.predictionPath = os.path.abspath(rootPath)
+
+    # walk the prediction path, if not happened yet
+    if not args.predictionWalk:
+        walk = []
+        for root, dirnames, filenames in os.walk(args.predictionPath):
+            walk.append( (root,filenames) )
+        args.predictionWalk = walk
+
+    csFile = getCsFileInfo(groundTruthFile)
+    filePattern = "{}_{}_{}*.txt".format( csFile.city , csFile.sequenceNb , csFile.frameNb )
+
+    predictionFile = None
+    for root, filenames in args.predictionWalk:
+        for filename in fnmatch.filter(filenames, filePattern):
+            if not predictionFile:
+                predictionFile = os.path.join(root, filename)
+            else:
+                printError("Found multiple predictions for ground truth {}".format(groundTruthFile))
+
+    if not predictionFile:
+        printError("Found no prediction for ground truth {}".format(groundTruthFile))
+
+    return predictionFile
+
+
+######################
+# Parameters
+######################
+
+
+# A dummy class to collect all bunch of data
+class CArgs(object):
+    pass
+# And a global object of that class
+args = CArgs()
+
+# Where to look for Cityscapes
+if 'CITYSCAPES_DATASET' in os.environ:
+    args.cityscapesPath = os.environ['CITYSCAPES_DATASET']
+else:
+    args.cityscapesPath = os.path.join(os.path.dirname(os.path.realpath(__file__)),'..','..')
+
+# Parameters that should be modified by user
+args.exportFile         = os.path.join( args.cityscapesPath , "evaluationResults" , "resultInstanceLevelSemanticLabeling.json" )
+args.groundTruthSearch  = os.path.join( args.cityscapesPath , "gtFine" , "val" , "*", "*_gtFine_instanceIds.png" )
+
+# overlaps for evaluation
+args.overlaps           = np.arange(0.5,1.,0.05)
+# minimum region size for evaluation [pixels]
+args.minRegionSizes     = np.array( [ 100 , 1000 , 1000 ] )
+# distance thresholds [m]
+args.distanceThs        = np.array( [  float('inf') , 100 , 50 ] )
+# distance confidences
+args.distanceConfs      = np.array( [ -float('inf') , 0.5 , 0.5 ] )
+
+args.gtInstancesFile    = os.path.join(os.path.dirname(os.path.realpath(__file__)),'gtInstances.json')
+args.distanceAvailable  = False
+args.JSONOutput         = True
+args.quiet              = False
+args.csv                = False
+args.colorized          = True
+args.instLabels         = []
+
+# store some parameters for finding predictions in the args variable
+# the values are filled when the method getPrediction is first called
+args.predictionPath = None
+args.predictionWalk = None
+
+
+# Determine the labels that have instances
+def setInstanceLabels(args):
+    args.instLabels = []
+    for label in labels:
+        if label.hasInstances and not label.ignoreInEval:
+            args.instLabels.append(label.name)
+
+# Read prediction info
+# imgFile, predId, confidence
+def readPredInfo(predInfoFileName,args):
+    predInfo = {}
+    if (not os.path.isfile(predInfoFileName)):
+        printError("Infofile '{}' for the predictions not found.".format(predInfoFileName))
+    with open(predInfoFileName, 'r') as f:
+        for line in f:
+            splittedLine         = line.split(" ")
+            if len(splittedLine) != 3:
+                printError( "Invalid prediction file. Expected content: relPathPrediction1 labelIDPrediction1 confidencePrediction1" )
+            if os.path.isabs(splittedLine[0]):
+                printError( "Invalid prediction file. First entry in each line must be a relative path." )
+
+            filename             = os.path.join( os.path.dirname(predInfoFileName),splittedLine[0] )
+            filename             = os.path.abspath( filename )
+
+            # check if that file is actually somewhere within the prediction root
+            if os.path.commonprefix( [filename,args.predictionPath] ) != args.predictionPath:
+                printError( "Predicted mask {} in prediction text file {} points outside of prediction path.".format(filename,predInfoFileName) )
+
+            imageInfo            = {}
+            imageInfo["labelID"] = int(float(splittedLine[1]))
+            imageInfo["conf"]    = float(splittedLine[2])
+            predInfo[filename]   = imageInfo
+
+    return predInfo
+
+# Routine to read ground truth image
+def readGTImage(gtImageFileName,args):
+    return Image.open(gtImageFileName)
+
+# either read or compute a dictionary of all ground truth instances
+def getGtInstances(groundTruthList,args):
+    gtInstances = {}
+    # if there is a global statistics json, then load it
+    if (os.path.isfile(args.gtInstancesFile)):
+        if not args.quiet:
+            print("Loading ground truth instances from JSON.")
+        with open(args.gtInstancesFile) as json_file:
+            gtInstances = json.load(json_file)
+    # otherwise create it
+    else:
+        if (not args.quiet):
+            print("Creating ground truth instances from png files.")
+        gtInstances = instances2dict(groundTruthList,not args.quiet)
+        writeDict2JSON(gtInstances, args.gtInstancesFile)
+
+    return gtInstances
+
+# Filter instances, ignore labels without instances
+def filterGtInstances(singleImageInstances,args):
+    instanceDict = {}
+    for labelName in singleImageInstances:
+        if not labelName in args.instLabels:
+            continue
+        instanceDict[labelName] = singleImageInstances[labelName]
+    return instanceDict
+
+# match ground truth instances with predicted instances
+def matchGtWithPreds(predictionList,groundTruthList,gtInstances,args):
+    matches = {}
+    if not args.quiet:
+        print("Matching {} pairs of images...".format(len(predictionList)))
+
+    count = 0
+    for (pred,gt) in zip(predictionList,groundTruthList):
+        # key for dicts
+        dictKey = os.path.abspath(gt)
+
+        # Read input files
+        gtImage  = readGTImage(gt,args)
+        predInfo = readPredInfo(pred,args)
+
+        # Get and filter ground truth instances
+        unfilteredInstances = gtInstances[ dictKey ]
+        curGtInstancesOrig  = filterGtInstances(unfilteredInstances,args)
+
+        # Try to assign all predictions
+        (curGtInstances,curPredInstances) = assignGt2Preds(curGtInstancesOrig, gtImage, predInfo, args)
+
+        # append to global dict
+        matches[ dictKey ] = {}
+        matches[ dictKey ]["groundTruth"] = curGtInstances
+        matches[ dictKey ]["prediction"]  = curPredInstances
+
+        count += 1
+        if not args.quiet:
+            print("\rImages Processed: {}".format(count), end=' ')
+            sys.stdout.flush()
+
+    if not args.quiet:
+        print("")
+
+    return matches
+
+# For a given frame, assign all predicted instances to ground truth instances
+def assignGt2Preds(gtInstancesOrig, gtImage, predInfo, args):
+    # In this method, we create two lists
+    #  - predInstances: contains all predictions and their associated gt
+    #  - gtInstances:   contains all gt instances and their associated predictions
+    predInstances    = {}
+    predInstCount    = 0
+
+    # Create a prediction array for each class
+    for label in args.instLabels:
+        predInstances[label] = []
+
+    # We already know about the gt instances
+    # Add the matching information array
+    gtInstances = deepcopy(gtInstancesOrig)
+    for label in gtInstances:
+        for gt in gtInstances[label]:
+            gt["matchedPred"] = []
+
+    # Make the gt a numpy array
+    gtNp = np.array(gtImage)
+
+    # Get a mask of void labels in the groundtruth
+    voidLabelIDList = []
+    for label in labels:
+        if label.ignoreInEval:
+            voidLabelIDList.append(label.id)
+    boolVoid = np.in1d(gtNp, voidLabelIDList).reshape(gtNp.shape)
+
+    # Loop through all prediction masks
+    for predImageFile in predInfo:
+        # Additional prediction info
+        labelID  = predInfo[predImageFile]["labelID"]
+        predConf = predInfo[predImageFile]["conf"]
+
+        # label name
+        labelName = id2label[int(labelID)].name
+
+        # maybe we are not interested in that label
+        if not labelName in args.instLabels:
+            continue
+
+        # Read the mask
+        predImage = Image.open(predImageFile)
+        predImage = predImage.convert("L")
+        predNp    = np.array(predImage)
+
+        # make the image really binary, i.e. everything non-zero is part of the prediction
+        boolPredInst   = predNp != 0
+        predPixelCount = np.count_nonzero( boolPredInst )
+
+        # skip if actually empty
+        if not predPixelCount:
+            continue
+
+        # The information we want to collect for this instance
+        predInstance = {}
+        predInstance["imgName"]          = predImageFile
+        predInstance["predID"]           = predInstCount
+        predInstance["labelID"]          = int(labelID)
+        predInstance["pixelCount"]       = predPixelCount
+        predInstance["confidence"]       = predConf
+        # Determine the number of pixels overlapping void
+        predInstance["voidIntersection"] = np.count_nonzero( np.logical_and(boolVoid, boolPredInst) )
+
+        # A list of all overlapping ground truth instances
+        matchedGt = []
+
+        # Loop through all ground truth instances with matching label
+        # This list contains all ground truth instances that distinguish groups
+        # We do not know, if a certain instance is actually a single object or a group
+        # e.g. car or cargroup
+        # However, for now we treat both the same and do the rest later
+        for (gtNum,gtInstance) in enumerate(gtInstancesOrig[labelName]):
+
+            intersection = np.count_nonzero( np.logical_and( gtNp == gtInstance["instID"] , boolPredInst) )
+
+            # If they intersect add them as matches to both dicts
+            if (intersection > 0):
+                gtCopy   = gtInstance.copy()
+                predCopy = predInstance.copy()
+
+                # let the two know their intersection
+                gtCopy["intersection"]   = intersection
+                predCopy["intersection"] = intersection
+
+                # append ground truth to matches
+                matchedGt.append(gtCopy)
+                # append prediction to ground truth instance
+                gtInstances[labelName][gtNum]["matchedPred"].append(predCopy)
+
+        predInstance["matchedGt"] = matchedGt
+        predInstCount += 1
+        predInstances[labelName].append(predInstance)
+
+    return (gtInstances,predInstances)
+
+
+def evaluateMatches(matches, args):
+    # In the end, we need two vectors for each class and for each overlap
+    # The first vector (y_true) is binary and is 1, where the ground truth says true,
+    # and is 0 otherwise.
+    # The second vector (y_score) is float [0...1] and represents the confidence of
+    # the prediction.
+    #
+    # We represent the following cases as:
+    #                                       | y_true |   y_score
+    #   gt instance with matched prediction |    1   | confidence
+    #   gt instance w/o  matched prediction |    1   |     0.0
+    #          false positive prediction    |    0   | confidence
+    #
+    # The current implementation makes only sense for an overlap threshold >= 0.5,
+    # since only then, a single prediction can either be ignored or matched, but
+    # never both. Further, it can never match to two gt instances.
+    # For matching, we vary the overlap and do the following steps:
+    #   1.) remove all predictions that satisfy the overlap criterion with an ignore region (either void or *group)
+    #   2.) remove matches that do not satisfy the overlap
+    #   3.) mark non-matched predictions as false positive
+
+    # AP
+    overlaps  = args.overlaps
+    # region size
+    minRegionSizes = args.minRegionSizes
+    # distance thresholds
+    distThs   = args.distanceThs
+    # distance confidences
+    distConfs = args.distanceConfs
+    # only keep the first, if distances are not available
+    if not args.distanceAvailable:
+        minRegionSizes = [ minRegionSizes[0] ]
+        distThs        = [ distThs       [0] ]
+        distConfs      = [ distConfs     [0] ]
+
+    # last three must be of same size
+    if len(distThs) != len(minRegionSizes):
+        printError("Number of distance thresholds and region sizes different")
+    if len(distThs) != len(distConfs):
+        printError("Number of distance thresholds and confidences different")
+
+    # Here we hold the results
+    # First dimension is class, second overlap
+    ap = np.zeros( (len(distThs) , len(args.instLabels) , len(overlaps)) , np.float )
+
+    for dI,(minRegionSize,distanceTh,distanceConf) in enumerate(zip(minRegionSizes,distThs,distConfs)):
+        for (oI,overlapTh) in enumerate(overlaps):
+            for (lI,labelName) in enumerate(args.instLabels):
+                y_true   = np.empty( 0 )
+                y_score  = np.empty( 0 )
+                # count hard false negatives
+                hardFns  = 0
+                # found at least one gt and predicted instance?
+                haveGt   = False
+                havePred = False
+
+                for img in matches:
+                    predInstances = matches[img]["prediction" ][labelName]
+                    gtInstances   = matches[img]["groundTruth"][labelName]
+                    # filter groups in ground truth
+                    gtInstances   = [ gt for gt in gtInstances if gt["instID"]>=1000 and gt["pixelCount"]>=minRegionSize and gt["medDist"]<=distanceTh and gt["distConf"]>=distanceConf ]
+
+                    if gtInstances:
+                        haveGt = True
+                    if predInstances:
+                        havePred = True
+
+                    curTrue  = np.ones ( len(gtInstances) )
+                    curScore = np.ones ( len(gtInstances) ) * (-float("inf"))
+                    curMatch = np.zeros( len(gtInstances) , dtype=np.bool )
+
+                    # collect matches
+                    for (gtI,gt) in enumerate(gtInstances):
+                        foundMatch = False
+                        for pred in gt["matchedPred"]:
+                            overlap = float(pred["intersection"]) / (gt["pixelCount"]+pred["pixelCount"]-pred["intersection"])
+                            if overlap > overlapTh:
+                                # the score
+                                confidence = pred["confidence"]
+
+                                # if we already hat a prediction for this groundtruth
+                                # the prediction with the lower score is automatically a false positive
+                                if curMatch[gtI]:
+                                    maxScore = max( curScore[gtI] , confidence )
+                                    minScore = min( curScore[gtI] , confidence )
+                                    curScore[gtI] = maxScore
+                                    # append false positive
+                                    curTrue  = np.append(curTrue,0)
+                                    curScore = np.append(curScore,minScore)
+                                    curMatch = np.append(curMatch,True)
+                                # otherwise set score
+                                else:
+                                    foundMatch = True
+                                    curMatch[gtI] = True
+                                    curScore[gtI] = confidence
+
+                        if not foundMatch:
+                            hardFns += 1
+
+                    # remove non-matched ground truth instances
+                    curTrue  = curTrue [ curMatch==True ]
+                    curScore = curScore[ curMatch==True ]
+
+                    # collect non-matched predictions as false positive
+                    for pred in predInstances:
+                        foundGt = False
+                        for gt in pred["matchedGt"]:
+                            overlap = float(gt["intersection"]) / (gt["pixelCount"]+pred["pixelCount"]-gt["intersection"])
+                            if overlap > overlapTh:
+                                foundGt = True
+                                break
+                        if not foundGt:
+                            # collect number of void and *group pixels
+                            nbIgnorePixels = pred["voidIntersection"]
+                            for gt in pred["matchedGt"]:
+                                # group?
+                                if gt["instID"] < 1000:
+                                    nbIgnorePixels += gt["intersection"]
+                                # small ground truth instances
+                                if gt["pixelCount"] < minRegionSize or gt["medDist"]>distanceTh or gt["distConf"]<distanceConf:
+                                    nbIgnorePixels += gt["intersection"]
+                            proportionIgnore = float(nbIgnorePixels)/pred["pixelCount"]
+                            # if not ignored
+                            # append false positive
+                            if proportionIgnore <= overlapTh:
+                                curTrue = np.append(curTrue,0)
+                                confidence = pred["confidence"]
+                                curScore = np.append(curScore,confidence)
+
+                    # append to overall results
+                    y_true  = np.append(y_true,curTrue)
+                    y_score = np.append(y_score,curScore)
+
+                # compute the average precision
+                if haveGt and havePred:
+                    # compute precision recall curve first
+
+                    # sorting and cumsum
+                    scoreArgSort      = np.argsort(y_score)
+                    yScoreSorted      = y_score[scoreArgSort]
+                    yTrueSorted       = y_true[scoreArgSort]
+                    yTrueSortedCumsum = np.cumsum(yTrueSorted)
+
+                    # unique thresholds
+                    (thresholds,uniqueIndices) = np.unique( yScoreSorted , return_index=True )
+
+                    # since we need to add an artificial point to the precision-recall curve
+                    # increase its length by 1
+                    nbPrecRecall = len(uniqueIndices) + 1
+
+                    # prepare precision recall
+                    nbExamples     = len(yScoreSorted)
+                    nbTrueExamples = yTrueSortedCumsum[-1]
+                    precision      = np.zeros(nbPrecRecall)
+                    recall         = np.zeros(nbPrecRecall)
+
+                    # deal with the first point
+                    # only thing we need to do, is to append a zero to the cumsum at the end.
+                    # an index of -1 uses that zero then
+                    yTrueSortedCumsum = np.append( yTrueSortedCumsum , 0 )
+
+                    # deal with remaining
+                    for idxRes,idxScores in enumerate(uniqueIndices):
+                        cumSum = yTrueSortedCumsum[idxScores-1]
+                        tp = nbTrueExamples - cumSum
+                        fp = nbExamples     - idxScores - tp
+                        fn = cumSum + hardFns
+                        p  = float(tp)/(tp+fp)
+                        r  = float(tp)/(tp+fn)
+                        precision[idxRes] = p
+                        recall   [idxRes] = r
+
+                    # first point in curve is artificial
+                    precision[-1] = 1.
+                    recall   [-1] = 0.
+
+                    # compute average of precision-recall curve
+                    # integration is performed via zero order, or equivalently step-wise integration
+                    # first compute the widths of each step:
+                    # use a convolution with appropriate kernel, manually deal with the boundaries first
+                    recallForConv = np.copy(recall)
+                    recallForConv = np.append( recallForConv[0] , recallForConv )
+                    recallForConv = np.append( recallForConv    , 0.            )
+
+                    stepWidths = np.convolve(recallForConv,[-0.5,0,0.5],'valid')
+
+                    # integrate is now simply a dot product
+                    apCurrent = np.dot( precision , stepWidths )
+
+                elif haveGt:
+                    apCurrent = 0.0
+                else:
+                    apCurrent = float('nan')
+                ap[dI,lI,oI] = apCurrent
+
+    return ap
+
+def computeAverages(aps,args):
+    # max distance index
+    dInf  = np.argmax( args.distanceThs )
+    d50m  = np.where( np.isclose( args.distanceThs ,  50. ) )
+    d100m = np.where( np.isclose( args.distanceThs , 100. ) )
+    o50   = np.where(np.isclose(args.overlaps,0.5  ))
+
+    avgDict = {}
+    avgDict["allAp"]       = np.nanmean(aps[ dInf,:,:  ])
+    avgDict["allAp50%"]    = np.nanmean(aps[ dInf,:,o50])
+
+    if args.distanceAvailable:
+        avgDict["allAp50m"]    = np.nanmean(aps[ d50m,:,  :])
+        avgDict["allAp100m"]   = np.nanmean(aps[d100m,:,  :])
+        avgDict["allAp50%50m"] = np.nanmean(aps[ d50m,:,o50])
+
+    avgDict["classes"]  = {}
+    for (lI,labelName) in enumerate(args.instLabels):
+        avgDict["classes"][labelName]             = {}
+        avgDict["classes"][labelName]["ap"]       = np.average(aps[ dInf,lI,  :])
+        avgDict["classes"][labelName]["ap50%"]    = np.average(aps[ dInf,lI,o50])
+        if args.distanceAvailable:
+            avgDict["classes"][labelName]["ap50m"]    = np.average(aps[ d50m,lI,  :])
+            avgDict["classes"][labelName]["ap100m"]   = np.average(aps[d100m,lI,  :])
+            avgDict["classes"][labelName]["ap50%50m"] = np.average(aps[ d50m,lI,o50])
+
+    return avgDict
+
+def printResults(avgDict, args):
+    sep     = (","         if args.csv       else "")
+    col1    = (":"         if not args.csv   else "")
+    noCol   = (colors.ENDC if args.colorized else "")
+    bold    = (colors.BOLD if args.colorized else "")
+    lineLen = 50
+    if args.distanceAvailable:
+        lineLen += 40
+
+    print("")
+    if not args.csv:
+        print("#"*lineLen)
+    line  = bold
+    line += "{:<15}".format("what"      ) + sep + col1
+    line += "{:>15}".format("AP"        ) + sep
+    line += "{:>15}".format("AP_50%"    ) + sep
+    if args.distanceAvailable:
+        line += "{:>15}".format("AP_50m"    ) + sep
+        line += "{:>15}".format("AP_100m"   ) + sep
+        line += "{:>15}".format("AP_50%50m" ) + sep
+    line += noCol
+    print(line)
+    if not args.csv:
+        print("#"*lineLen)
+
+    for (lI,labelName) in enumerate(args.instLabels):
+        apAvg  = avgDict["classes"][labelName]["ap"]
+        ap50o  = avgDict["classes"][labelName]["ap50%"]
+        if args.distanceAvailable:
+            ap50m  = avgDict["classes"][labelName]["ap50m"]
+            ap100m = avgDict["classes"][labelName]["ap100m"]
+            ap5050 = avgDict["classes"][labelName]["ap50%50m"]
+
+        line  = "{:<15}".format(labelName) + sep + col1
+        line += getColorEntry(apAvg , args) + sep + "{:>15.3f}".format(apAvg ) + sep
+        line += getColorEntry(ap50o , args) + sep + "{:>15.3f}".format(ap50o ) + sep
+        if args.distanceAvailable:
+            line += getColorEntry(ap50m , args) + sep + "{:>15.3f}".format(ap50m ) + sep
+            line += getColorEntry(ap100m, args) + sep + "{:>15.3f}".format(ap100m) + sep
+            line += getColorEntry(ap5050, args) + sep + "{:>15.3f}".format(ap5050) + sep
+        line += noCol
+        print(line)
+
+    allApAvg  = avgDict["allAp"]
+    allAp50o  = avgDict["allAp50%"]
+    if args.distanceAvailable:
+        allAp50m  = avgDict["allAp50m"]
+        allAp100m = avgDict["allAp100m"]
+        allAp5050 = avgDict["allAp50%50m"]
+
+    if not args.csv:
+            print("-"*lineLen)
+    line  = "{:<15}".format("average") + sep + col1
+    line += getColorEntry(allApAvg , args) + sep + "{:>15.3f}".format(allApAvg)  + sep
+    line += getColorEntry(allAp50o , args) + sep + "{:>15.3f}".format(allAp50o)  + sep
+    if args.distanceAvailable:
+        line += getColorEntry(allAp50m , args) + sep + "{:>15.3f}".format(allAp50m)  + sep
+        line += getColorEntry(allAp100m, args) + sep + "{:>15.3f}".format(allAp100m) + sep
+        line += getColorEntry(allAp5050, args) + sep + "{:>15.3f}".format(allAp5050) + sep
+    line += noCol
+    print(line)
+    print("")
+
+def prepareJSONDataForResults(avgDict, aps, args):
+    JSONData = {}
+    JSONData["averages"] = avgDict
+    JSONData["overlaps"] = args.overlaps.tolist()
+    JSONData["minRegionSizes"]      = args.minRegionSizes.tolist()
+    JSONData["distanceThresholds"]  = args.distanceThs.tolist()
+    JSONData["minStereoDensities"]  = args.distanceConfs.tolist()
+    JSONData["instLabels"] = args.instLabels
+    JSONData["resultApMatrix"] = aps.tolist()
+
+    return JSONData
+
+# Work through image list
+def evaluateImgLists(predictionList, groundTruthList, args):
+    # determine labels of interest
+    setInstanceLabels(args)
+    # get dictionary of all ground truth instances
+    gtInstances = getGtInstances(groundTruthList,args)
+    # match predictions and ground truth
+    matches = matchGtWithPreds(predictionList,groundTruthList,gtInstances,args)
+    writeDict2JSON(matches,"matches.json")
+    # evaluate matches
+    apScores = evaluateMatches(matches, args)
+    # averages
+    avgDict = computeAverages(apScores,args)
+    # result dict
+    resDict = prepareJSONDataForResults(avgDict, apScores, args)
+    if args.JSONOutput:
+        # create output folder if necessary
+        path = os.path.dirname(args.exportFile)
+        ensurePath(path)
+        # Write APs to JSON
+        writeDict2JSON(resDict, args.exportFile)
+
+    if not args.quiet:
+         # Print results
+        printResults(avgDict, args)
+
+    return resDict
+
+# The main method
+def main():
+    global args
+    argv = sys.argv[1:]
+
+    predictionImgList = []
+    groundTruthImgList = []
+
+    # the image lists can either be provided as arguments
+    if (len(argv) > 3):
+        for arg in argv:
+            if ("gt" in arg or "groundtruth" in arg):
+                groundTruthImgList.append(arg)
+            elif ("pred" in arg):
+                predictionImgList.append(arg)
+    # however the no-argument way is prefered
+    elif len(argv) == 0:
+        # use the ground truth search string specified above
+        groundTruthImgList = glob.glob(args.groundTruthSearch)
+        if not groundTruthImgList:
+            printError("Cannot find any ground truth images to use for evaluation. Searched for: {}".format(args.groundTruthSearch))
+        # get the corresponding prediction for each ground truth imag
+        for gt in groundTruthImgList:
+            predictionImgList.append( getPrediction(gt,args) )
+
+    # print some info for user
+    print("Note that this tool uses the file '{}' to cache the ground truth instances.".format(args.gtInstancesFile))
+    print("If anything goes wrong, or if you change the ground truth, please delete the file.")
+
+    # evaluate
+    evaluateImgLists(predictionImgList, groundTruthImgList, args)
+
+    return
+
+# call the main method
+if __name__ == "__main__":
+    main()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalPixelLevelSemanticLabeling.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalPixelLevelSemanticLabeling.py
new file mode 100644
index 0000000..f31c975
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/evalPixelLevelSemanticLabeling.py
@@ -0,0 +1,683 @@
+#!/usr/bin/python
+#
+# The evaluation script for pixel-level img labeling.
+# We use this script to evaluate your approach on the test set.
+# You can use the script to evaluate on the validation set.
+#
+# Please check the description of the "getPrediction" method below
+# and set the required environment variables as needed, such that
+# this script can locate your results.
+# If the default implementation of the method works, then it's most likely
+# that our evaluation server will be able to process your results as well.
+#
+# Note that the script is a lot faster, if you enable cython support.
+# WARNING: Cython only tested for Ubuntu 64bit OS.
+# To enable cython, run
+# setup.py build_ext --inplace
+#
+# To run this script, make sure that your results are images,
+# where pixels encode the class IDs as defined in labels.py.
+# Note that the regular ID is used, not the train ID.
+# Further note that many classes are ignored from evaluation.
+# Thus, authors are not expected to predict these classes and all
+# pixels with a ground truth label that is ignored are ignored in
+# evaluation.
+
+# python imports
+from __future__ import print_function
+
+import fnmatch
+import os
+import platform
+import sys
+
+try:
+    from itertools import izip
+except ImportError:
+    izip = zip
+
+# Cityscapes imports
+sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
+from csHelpers import *
+
+# C Support
+# Enable the cython support for faster evaluation
+# Only tested for Ubuntu 64bit OS
+CSUPPORT = True
+# Check if C-Support is available for better performance
+if CSUPPORT:
+    try:
+        import addToConfusionMatrix
+    except:
+        CSUPPORT = False
+
+
+###################################
+# PLEASE READ THESE INSTRUCTIONS!!!
+###################################
+# Provide the prediction file for the given ground truth file.
+#
+# The current implementation expects the results to be in a certain root folder.
+# This folder is one of the following with decreasing priority:
+#   - environment variable CITYSCAPES_RESULTS
+#   - environment variable CITYSCAPES_DATASET/results
+#   - ../../results/"
+#
+# Within the root folder, a matching prediction file is recursively searched.
+# A file matches, if the filename follows the pattern
+# <city>_123456_123456*.png
+# for a ground truth filename
+# <city>_123456_123456_gtFine_labelIds.png
+def getPrediction( args, groundTruthFile ):
+    # determine the prediction path, if the method is first called
+    if not args.predictionPath:
+        rootPath = None
+        if 'CITYSCAPES_RESULTS' in os.environ:
+            rootPath = os.environ['CITYSCAPES_RESULTS']
+        elif 'CITYSCAPES_DATASET' in os.environ:
+            rootPath = os.path.join( os.environ['CITYSCAPES_DATASET'] , "results" )
+        else:
+            rootPath = os.path.join(os.path.dirname(os.path.realpath(__file__)),'..','..','results')
+
+        if not os.path.isdir(rootPath):
+            printError("Could not find a result root folder. Please read the instructions of this method.")
+
+        args.predictionPath = rootPath
+
+    # walk the prediction path, if not happened yet
+    if not args.predictionWalk:
+        walk = []
+        for root, dirnames, filenames in os.walk(args.predictionPath):
+            walk.append( (root,filenames) )
+        args.predictionWalk = walk
+
+    csFile = getCsFileInfo(groundTruthFile)
+    filePattern = "{}_{}_{}*.png".format( csFile.city , csFile.sequenceNb , csFile.frameNb )
+
+    predictionFile = None
+    for root, filenames in args.predictionWalk:
+        for filename in fnmatch.filter(filenames, filePattern):
+            if not predictionFile:
+                predictionFile = os.path.join(root, filename)
+            else:
+                printError("Found multiple predictions for ground truth {}".format(groundTruthFile))
+
+    if not predictionFile:
+        printError("Found no prediction for ground truth {}".format(groundTruthFile))
+
+    return predictionFile
+
+
+######################
+# Parameters
+######################
+
+
+# A dummy class to collect all bunch of data
+class CArgs(object):
+    pass
+# And a global object of that class
+args = CArgs()
+
+# Where to look for Cityscapes
+if 'CITYSCAPES_DATASET' in os.environ:
+    args.cityscapesPath = os.environ['CITYSCAPES_DATASET']
+else:
+    args.cityscapesPath = os.path.join(os.path.dirname(os.path.realpath(__file__)),'..','..')
+
+if 'CITYSCAPES_EXPORT_DIR' in os.environ:
+    export_dir = os.environ['CITYSCAPES_EXPORT_DIR']
+    if not os.path.isdir(export_dir):
+        raise ValueError("CITYSCAPES_EXPORT_DIR {} is not a directory".format(export_dir))
+    args.exportFile = "{}/resultPixelLevelSemanticLabeling.json".format(export_dir)
+else:
+    args.exportFile = os.path.join(args.cityscapesPath, "evaluationResults", "resultPixelLevelSemanticLabeling.json")
+# Parameters that should be modified by user
+args.groundTruthSearch  = os.path.join( args.cityscapesPath , "gtFine" , "val" , "*", "*_gtFine_labelIds.png" )
+
+# Remaining params
+args.evalInstLevelScore = True
+args.evalPixelAccuracy  = False
+args.evalLabels         = []
+args.printRow           = 5
+args.normalized         = True
+args.colorized          = hasattr(sys.stderr, "isatty") and sys.stderr.isatty() and platform.system()=='Linux'
+args.bold               = colors.BOLD if args.colorized else ""
+args.nocol              = colors.ENDC if args.colorized else ""
+args.JSONOutput         = True
+args.quiet              = False
+
+args.avgClassSize       = {
+    "bicycle"    :  4672.3249222261 ,
+    "caravan"    : 36771.8241758242 ,
+    "motorcycle" :  6298.7200839748 ,
+    "rider"      :  3930.4788056518 ,
+    "bus"        : 35732.1511111111 ,
+    "train"      : 67583.7075812274 ,
+    "car"        : 12794.0202738185 ,
+    "person"     :  3462.4756337644 ,
+    "truck"      : 27855.1264367816 ,
+    "trailer"    : 16926.9763313609 ,
+}
+
+# store some parameters for finding predictions in the args variable
+# the values are filled when the method getPrediction is first called
+args.predictionPath = None
+args.predictionWalk = None
+
+
+#########################
+# Methods
+#########################
+
+
+# Generate empty confusion matrix and create list of relevant labels
+def generateMatrix(args):
+    args.evalLabels = []
+    for label in labels:
+        if (label.id < 0):
+            continue
+        # we append all found labels, regardless of being ignored
+        args.evalLabels.append(label.id)
+    maxId = max(args.evalLabels)
+    # We use longlong type to be sure that there are no overflows
+    return np.zeros(shape=(maxId+1, maxId+1),dtype=np.ulonglong)
+
+def generateInstanceStats(args):
+    instanceStats = {}
+    instanceStats["classes"   ] = {}
+    instanceStats["categories"] = {}
+    for label in labels:
+        if label.hasInstances and not label.ignoreInEval:
+            instanceStats["classes"][label.name] = {}
+            instanceStats["classes"][label.name]["tp"] = 0.0
+            instanceStats["classes"][label.name]["tpWeighted"] = 0.0
+            instanceStats["classes"][label.name]["fn"] = 0.0
+            instanceStats["classes"][label.name]["fnWeighted"] = 0.0
+    for category in category2labels:
+        labelIds = []
+        allInstances = True
+        for label in category2labels[category]:
+            if label.id < 0:
+                continue
+            if not label.hasInstances:
+                allInstances = False
+                break
+            labelIds.append(label.id)
+        if not allInstances:
+            continue
+
+        instanceStats["categories"][category] = {}
+        instanceStats["categories"][category]["tp"] = 0.0
+        instanceStats["categories"][category]["tpWeighted"] = 0.0
+        instanceStats["categories"][category]["fn"] = 0.0
+        instanceStats["categories"][category]["fnWeighted"] = 0.0
+        instanceStats["categories"][category]["labelIds"] = labelIds
+
+    return instanceStats
+
+
+# Get absolute or normalized value from field in confusion matrix.
+def getMatrixFieldValue(confMatrix, i, j, args):
+    if args.normalized:
+        rowSum = confMatrix[i].sum()
+        if (rowSum == 0):
+            return float('nan')
+        return float(confMatrix[i][j]) / rowSum
+    else:
+        return confMatrix[i][j]
+
+# Calculate and return IOU score for a particular label
+def getIouScoreForLabel(label, confMatrix, args):
+    if id2label[label].ignoreInEval:
+        return float('nan')
+
+    # the number of true positive pixels for this label
+    # the entry on the diagonal of the confusion matrix
+    tp = np.longlong(confMatrix[label,label])
+
+    # the number of false negative pixels for this label
+    # the row sum of the matching row in the confusion matrix
+    # minus the diagonal entry
+    fn = np.longlong(confMatrix[label,:].sum()) - tp
+
+    # the number of false positive pixels for this labels
+    # Only pixels that are not on a pixel with ground truth label that is ignored
+    # The column sum of the corresponding column in the confusion matrix
+    # without the ignored rows and without the actual label of interest
+    notIgnored = [l for l in args.evalLabels if not id2label[l].ignoreInEval and not l==label]
+    fp = np.longlong(confMatrix[notIgnored,label].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+# Calculate and return IOU score for a particular label
+def getInstanceIouScoreForLabel(label, confMatrix, instStats, args):
+    if id2label[label].ignoreInEval:
+        return float('nan')
+
+    labelName = id2label[label].name
+    if not labelName in instStats["classes"]:
+        return float('nan')
+
+    tp = instStats["classes"][labelName]["tpWeighted"]
+    fn = instStats["classes"][labelName]["fnWeighted"]
+    # false postives computed as above
+    notIgnored = [l for l in args.evalLabels if not id2label[l].ignoreInEval and not l==label]
+    fp = np.longlong(confMatrix[notIgnored,label].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+# Calculate prior for a particular class id.
+def getPrior(label, confMatrix):
+    return float(confMatrix[label,:].sum()) / confMatrix.sum()
+
+# Get average of scores.
+# Only computes the average over valid entries.
+def getScoreAverage(scoreList, args):
+    validScores = 0
+    scoreSum    = 0.0
+    for score in scoreList:
+        if not math.isnan(scoreList[score]):
+            validScores += 1
+            scoreSum += scoreList[score]
+    if validScores == 0:
+        return float('nan')
+    return scoreSum / validScores
+
+# Calculate and return IOU score for a particular category
+def getIouScoreForCategory(category, confMatrix, args):
+    # All labels in this category
+    labels = category2labels[category]
+    # The IDs of all valid labels in this category
+    labelIds = [label.id for label in labels if not label.ignoreInEval and label.id in args.evalLabels]
+    # If there are no valid labels, then return NaN
+    if not labelIds:
+        return float('nan')
+
+    # the number of true positive pixels for this category
+    # this is the sum of all entries in the confusion matrix
+    # where row and column belong to a label ID of this category
+    tp = np.longlong(confMatrix[labelIds,:][:,labelIds].sum())
+
+    # the number of false negative pixels for this category
+    # that is the sum of all rows of labels within this category
+    # minus the number of true positive pixels
+    fn = np.longlong(confMatrix[labelIds,:].sum()) - tp
+
+    # the number of false positive pixels for this category
+    # we count the column sum of all labels within this category
+    # while skipping the rows of ignored labels and of labels within this category
+    notIgnoredAndNotInCategory = [l for l in args.evalLabels if not id2label[l].ignoreInEval and id2label[l].category != category]
+    fp = np.longlong(confMatrix[notIgnoredAndNotInCategory,:][:,labelIds].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+# Calculate and return IOU score for a particular category
+def getInstanceIouScoreForCategory(category, confMatrix, instStats, args):
+    if not category in instStats["categories"]:
+        return float('nan')
+    labelIds = instStats["categories"][category]["labelIds"]
+
+    tp = instStats["categories"][category]["tpWeighted"]
+    fn = instStats["categories"][category]["fnWeighted"]
+
+    # the number of false positive pixels for this category
+    # same as above
+    notIgnoredAndNotInCategory = [l for l in args.evalLabels if not id2label[l].ignoreInEval and id2label[l].category != category]
+    fp = np.longlong(confMatrix[notIgnoredAndNotInCategory,:][:,labelIds].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+
+# create a dictionary containing all relevant results
+def createResultDict( confMatrix, classScores, classInstScores, categoryScores, categoryInstScores, perImageStats, args ):
+    # write JSON result file
+    wholeData = {}
+    wholeData["confMatrix"] = confMatrix.tolist()
+    wholeData["priors"] = {}
+    wholeData["labels"] = {}
+    for label in args.evalLabels:
+        wholeData["priors"][id2label[label].name] = getPrior(label, confMatrix)
+        wholeData["labels"][id2label[label].name] = label
+    wholeData["classScores"] = classScores
+    wholeData["classInstScores"] = classInstScores
+    wholeData["categoryScores"] = categoryScores
+    wholeData["categoryInstScores"] = categoryInstScores
+    wholeData["averageScoreClasses"] = getScoreAverage(classScores, args)
+    wholeData["averageScoreInstClasses"] = getScoreAverage(classInstScores, args)
+    wholeData["averageScoreCategories"] = getScoreAverage(categoryScores, args)
+    wholeData["averageScoreInstCategories"] = getScoreAverage(categoryInstScores, args)
+
+    if perImageStats:
+        wholeData["perImageScores"] = perImageStats
+
+    return wholeData
+
+def writeJSONFile(wholeData, args):
+    path = os.path.dirname(args.exportFile)
+    ensurePath(path)
+    writeDict2JSON(wholeData, args.exportFile)
+
+# Print confusion matrix
+def printConfMatrix(confMatrix, args):
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "))
+
+    # print label names
+    print("\b{text:>{width}} |".format(width=13, text=""), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:^{width}} |".format(width=args.printRow, text=id2label[label].name[0]), end=' ')
+    print("\b{text:>{width}} |".format(width=6, text="Prior"))
+
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "))
+
+    # print matrix
+    for x in range(0, confMatrix.shape[0]):
+        if (not x in args.evalLabels):
+            continue
+        # get prior of this label
+        prior = getPrior(x, confMatrix)
+        # skip if label does not exist in ground truth
+        if prior < 1e-9:
+            continue
+
+        # print name
+        name = id2label[x].name
+        if len(name) > 13:
+            name = name[:13]
+        print("\b{text:>{width}} |".format(width=13,text=name), end=' ')
+        # print matrix content
+        for y in range(0, len(confMatrix[x])):
+            if (not y in args.evalLabels):
+                continue
+            matrixFieldValue = getMatrixFieldValue(confMatrix, x, y, args)
+            print(getColorEntry(matrixFieldValue, args) + "\b{text:>{width}.2f}  ".format(width=args.printRow, text=matrixFieldValue) + args.nocol, end=' ')
+        # print prior
+        print(getColorEntry(prior, args) + "\b{text:>{width}.4f} ".format(width=6, text=prior) + args.nocol)
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "), end=' ')
+
+# Print intersection-over-union scores for all classes.
+def printClassScores(scoreList, instScoreList, args):
+    if (args.quiet):
+        return
+    print(args.bold + "classes          IoU      nIoU" + args.nocol)
+    print("--------------------------------")
+    for label in args.evalLabels:
+        if (id2label[label].ignoreInEval):
+            continue
+        labelName = str(id2label[label].name)
+        iouStr = getColorEntry(scoreList[labelName], args) + "{val:>5.3f}".format(val=scoreList[labelName]) + args.nocol
+        niouStr = getColorEntry(instScoreList[labelName], args) + "{val:>5.3f}".format(val=instScoreList[labelName]) + args.nocol
+        print("{:<14}: ".format(labelName) + iouStr + "    " + niouStr)
+
+# Print intersection-over-union scores for all categorys.
+def printCategoryScores(scoreDict, instScoreDict, args):
+    if (args.quiet):
+        return
+    print(args.bold + "categories       IoU      nIoU" + args.nocol)
+    print("--------------------------------")
+    for categoryName in scoreDict:
+        if all( label.ignoreInEval for label in category2labels[categoryName] ):
+            continue
+        iouStr  = getColorEntry(scoreDict[categoryName], args) + "{val:>5.3f}".format(val=scoreDict[categoryName]) + args.nocol
+        niouStr = getColorEntry(instScoreDict[categoryName], args) + "{val:>5.3f}".format(val=instScoreDict[categoryName]) + args.nocol
+        print("{:<14}: ".format(categoryName) + iouStr + "    " + niouStr)
+
+# Evaluate image lists pairwise.
+def evaluateImgLists(predictionImgList, groundTruthImgList, args):
+    if len(predictionImgList) != len(groundTruthImgList):
+        printError("List of images for prediction and groundtruth are not of equal size.")
+    confMatrix    = generateMatrix(args)
+    instStats     = generateInstanceStats(args)
+    perImageStats = {}
+    nbPixels      = 0
+
+    if not args.quiet:
+        print("Evaluating {} pairs of images...".format(len(predictionImgList)))
+
+    # Evaluate all pairs of images and save them into a matrix
+    for i in range(len(predictionImgList)):
+        predictionImgFileName = predictionImgList[i]
+        groundTruthImgFileName = groundTruthImgList[i]
+        #print "Evaluate ", predictionImgFileName, "<>", groundTruthImgFileName
+        nbPixels += evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instStats, perImageStats, args)
+
+        # sanity check
+        if confMatrix.sum() != nbPixels:
+            printError('Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(confMatrix.sum(),nbPixels))
+
+        if not args.quiet:
+            print("\rImages Processed: {}".format(i+1), end=' ')
+            sys.stdout.flush()
+    if not args.quiet:
+        print("\n")
+
+    # sanity check
+    if confMatrix.sum() != nbPixels:
+        printError('Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(confMatrix.sum(),nbPixels))
+
+    # print confusion matrix
+    if (not args.quiet):
+        printConfMatrix(confMatrix, args)
+
+    # Calculate IOU scores on class level from matrix
+    classScoreList = {}
+    for label in args.evalLabels:
+        labelName = id2label[label].name
+        classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
+
+    # Calculate instance IOU scores on class level from matrix
+    classInstScoreList = {}
+    for label in args.evalLabels:
+        labelName = id2label[label].name
+        classInstScoreList[labelName] = getInstanceIouScoreForLabel(label, confMatrix, instStats, args)
+
+    # Print IOU scores
+    if (not args.quiet):
+        print("")
+        print("")
+        printClassScores(classScoreList, classInstScoreList, args)
+        iouAvgStr  = getColorEntry(getScoreAverage(classScoreList, args), args) + "{avg:5.3f}".format(avg=getScoreAverage(classScoreList, args)) + args.nocol
+        niouAvgStr = getColorEntry(getScoreAverage(classInstScoreList , args), args) + "{avg:5.3f}".format(avg=getScoreAverage(classInstScoreList , args)) + args.nocol
+        print("--------------------------------")
+        print("Score Average : " + iouAvgStr + "    " + niouAvgStr)
+        print("--------------------------------")
+        print("")
+
+    # Calculate IOU scores on category level from matrix
+    categoryScoreList = {}
+    for category in category2labels.keys():
+        categoryScoreList[category] = getIouScoreForCategory(category,confMatrix,args)
+
+    # Calculate instance IOU scores on category level from matrix
+    categoryInstScoreList = {}
+    for category in category2labels.keys():
+        categoryInstScoreList[category] = getInstanceIouScoreForCategory(category,confMatrix,instStats,args)
+
+    # Print IOU scores
+    if (not args.quiet):
+        print("")
+        printCategoryScores(categoryScoreList, categoryInstScoreList, args)
+        iouAvgStr = getColorEntry(getScoreAverage(categoryScoreList, args), args) + "{avg:5.3f}".format(avg=getScoreAverage(categoryScoreList, args)) + args.nocol
+        niouAvgStr = getColorEntry(getScoreAverage(categoryInstScoreList, args), args) + "{avg:5.3f}".format(avg=getScoreAverage(categoryInstScoreList, args)) + args.nocol
+        print("--------------------------------")
+        print("Score Average : " + iouAvgStr + "    " + niouAvgStr)
+        print("--------------------------------")
+        print("")
+
+    # write result file
+    allResultsDict = createResultDict( confMatrix, classScoreList, classInstScoreList, categoryScoreList, categoryInstScoreList, perImageStats, args )
+    writeJSONFile( allResultsDict, args)
+
+    # return confusion matrix
+    return allResultsDict
+
+# Main evaluation method. Evaluates pairs of prediction and ground truth
+# images which are passed as arguments.
+def evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instanceStats, perImageStats, args):
+    # Loading all resources for evaluation.
+    try:
+        predictionImg = Image.open(predictionImgFileName)
+        predictionNp  = np.array(predictionImg)
+    except:
+        printError("Unable to load " + predictionImgFileName)
+    try:
+        groundTruthImg = Image.open(groundTruthImgFileName)
+        groundTruthNp = np.array(groundTruthImg)
+    except:
+        printError("Unable to load " + groundTruthImgFileName)
+    # load ground truth instances, if needed
+    if args.evalInstLevelScore:
+        groundTruthInstanceImgFileName = groundTruthImgFileName.replace("labelIds","instanceIds")
+        try:
+            instanceImg = Image.open(groundTruthInstanceImgFileName)
+            instanceNp  = np.array(instanceImg)
+        except:
+            printError("Unable to load " + groundTruthInstanceImgFileName)
+
+    # Check for equal image sizes
+    if (predictionImg.size[0] != groundTruthImg.size[0]):
+        printError("Image widths of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.")
+    if (predictionImg.size[1] != groundTruthImg.size[1]):
+        printError("Image heights of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.")
+    if ( len(predictionNp.shape) != 2 ):
+        printError("Predicted image has multiple channels.")
+
+    imgWidth  = predictionImg.size[0]
+    imgHeight = predictionImg.size[1]
+    nbPixels  = imgWidth*imgHeight
+
+    # Evaluate images
+    if (CSUPPORT):
+        # using cython
+        confMatrix = addToConfusionMatrix.cEvaluatePair(predictionNp, groundTruthNp, confMatrix, args.evalLabels)
+    else:
+        # the slower python way
+        for (groundTruthImgPixel,predictionImgPixel) in izip(groundTruthImg.getdata(),predictionImg.getdata()):
+            if (not groundTruthImgPixel in args.evalLabels):
+                printError("Unknown label with id {:}".format(groundTruthImgPixel))
+
+            confMatrix[groundTruthImgPixel][predictionImgPixel] += 1
+
+    if args.evalInstLevelScore:
+        # Generate category masks
+        categoryMasks = {}
+        for category in instanceStats["categories"]:
+            categoryMasks[category] = np.in1d( predictionNp , instanceStats["categories"][category]["labelIds"] ).reshape(predictionNp.shape)
+
+        instList = np.unique(instanceNp[instanceNp > 1000])
+        for instId in instList:
+            labelId = int(instId/1000)
+            label = id2label[ labelId ]
+            if label.ignoreInEval:
+                continue
+
+            mask = instanceNp==instId
+            instSize = np.count_nonzero( mask )
+
+            tp = np.count_nonzero( predictionNp[mask] == labelId )
+            fn = instSize - tp
+
+            weight = args.avgClassSize[label.name] / float(instSize)
+            tpWeighted = float(tp) * weight
+            fnWeighted = float(fn) * weight
+
+            instanceStats["classes"][label.name]["tp"]         += tp
+            instanceStats["classes"][label.name]["fn"]         += fn
+            instanceStats["classes"][label.name]["tpWeighted"] += tpWeighted
+            instanceStats["classes"][label.name]["fnWeighted"] += fnWeighted
+
+            category = label.category
+            if category in instanceStats["categories"]:
+                catTp = 0
+                catTp = np.count_nonzero( np.logical_and( mask , categoryMasks[category] ) )
+                catFn = instSize - catTp
+
+                catTpWeighted = float(catTp) * weight
+                catFnWeighted = float(catFn) * weight
+
+                instanceStats["categories"][category]["tp"]         += catTp
+                instanceStats["categories"][category]["fn"]         += catFn
+                instanceStats["categories"][category]["tpWeighted"] += catTpWeighted
+                instanceStats["categories"][category]["fnWeighted"] += catFnWeighted
+
+    if args.evalPixelAccuracy:
+        notIgnoredLabels = [l for l in args.evalLabels if not id2label[l].ignoreInEval]
+        notIgnoredPixels = np.in1d( groundTruthNp , notIgnoredLabels , invert=True ).reshape(groundTruthNp.shape)
+        erroneousPixels = np.logical_and( notIgnoredPixels , ( predictionNp != groundTruthNp ) )
+        perImageStats[predictionImgFileName] = {}
+        perImageStats[predictionImgFileName]["nbNotIgnoredPixels"] = np.count_nonzero(notIgnoredPixels)
+        perImageStats[predictionImgFileName]["nbCorrectPixels"]    = np.count_nonzero(erroneousPixels)
+
+    return nbPixels
+
+# The main method
+def main():
+    global args
+    argv = sys.argv[1:]
+
+    predictionImgList = []
+    groundTruthImgList = []
+
+    # the image lists can either be provided as arguments
+    if (len(argv) > 3):
+        for arg in argv:
+            if ("gt" in arg or "groundtruth" in arg):
+                groundTruthImgList.append(arg)
+            elif ("pred" in arg):
+                predictionImgList.append(arg)
+    # however the no-argument way is prefered
+    elif len(argv) == 0:
+        # use the ground truth search string specified above
+        groundTruthImgList = glob.glob(args.groundTruthSearch)
+        if not groundTruthImgList:
+            printError("Cannot find any ground truth images to use for evaluation. Searched for: {}".format(args.groundTruthSearch))
+        # get the corresponding prediction for each ground truth imag
+        for gt in groundTruthImgList:
+            predictionImgList.append( getPrediction(args,gt) )
+    #print(predictionImgList[1]) 
+    #print(groundTruthImgList[1])
+    #exit()                   
+    #evaluate
+    evaluateImgLists(predictionImgList, groundTruthImgList, args) # evaluate two list of images
+
+    return
+
+# call the main method
+if __name__ == "__main__":
+    main()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instance.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instance.py
new file mode 100644
index 0000000..dca3353
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instance.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+#
+# Instance class
+#
+
+class Instance(object):
+    instID     = 0
+    labelID    = 0
+    pixelCount = 0
+    medDist    = -1
+    distConf   = 0.0
+
+    def __init__(self, imgNp, instID):
+        if (instID == -1):
+            return
+        self.instID     = int(instID)
+        self.labelID    = int(self.getLabelID(instID))
+        self.pixelCount = int(self.getInstancePixels(imgNp, instID))
+
+    def getLabelID(self, instID):
+        if (instID < 1000):
+            return instID
+        else:
+            return int(instID / 1000)
+
+    def getInstancePixels(self, imgNp, instLabel):
+        return (imgNp == instLabel).sum()
+
+    def toJSON(self):
+        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+
+    def toDict(self):
+        buildDict = {}
+        buildDict["instID"]     = self.instID
+        buildDict["labelID"]    = self.labelID
+        buildDict["pixelCount"] = self.pixelCount
+        buildDict["medDist"]    = self.medDist
+        buildDict["distConf"]   = self.distConf
+        return buildDict
+
+    def fromJSON(self, data):
+        self.instID     = int(data["instID"])
+        self.labelID    = int(data["labelID"])
+        self.pixelCount = int(data["pixelCount"])
+        if ("medDist" in data):
+            self.medDist    = float(data["medDist"])
+            self.distConf   = float(data["distConf"])
+
+    def __str__(self):
+        return "("+str(self.instID)+")"
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instances2dict.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instances2dict.py
new file mode 100644
index 0000000..065563b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/evaluation/instances2dict.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python
+#
+# Convert instances from png files to a dictionary
+#
+
+from __future__ import print_function
+import os, sys
+
+# Cityscapes imports
+from instance import *
+sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
+from csHelpers import *
+
+def instances2dict(imageFileList, verbose=False):
+    imgCount     = 0
+    instanceDict = {}
+
+    if not isinstance(imageFileList, list):
+        imageFileList = [imageFileList]
+
+    if verbose:
+        print("Processing {} images...".format(len(imageFileList)))
+
+    for imageFileName in imageFileList:
+        # Load image
+        img = Image.open(imageFileName)
+
+        # Image as numpy array
+        imgNp = np.array(img)
+
+        # Initialize label categories
+        instances = {}
+        for label in labels:
+            instances[label.name] = []
+
+        # Loop through all instance ids in instance image
+        for instanceId in np.unique(imgNp):
+            instanceObj = Instance(imgNp, instanceId)
+
+            instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict())
+
+        imgKey = os.path.abspath(imageFileName)
+        instanceDict[imgKey] = instances
+        imgCount += 1
+
+        if verbose:
+            print("\rImages Processed: {}".format(imgCount), end=' ')
+            sys.stdout.flush()
+
+    if verbose:
+        print("")
+
+    return instanceDict
+
+def main(argv):
+    fileList = []
+    if (len(argv) > 2):
+        for arg in argv:
+            if ("png" in arg):
+                fileList.append(arg)
+    instances2dict(fileList, True)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/annotation.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/annotation.py
new file mode 100644
index 0000000..76fa549
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/annotation.py
@@ -0,0 +1,238 @@
+#!/usr/bin/python
+#
+# Classes to store, read, and write annotations
+#
+
+import os
+import json
+from collections import namedtuple
+  
+# get current date and time
+import datetime
+import locale
+
+# A point in a polygon
+Point = namedtuple('Point', ['x', 'y'])
+
+from abc import ABCMeta, abstractmethod
+
+# Type of an object
+class CsObjectType():
+    POLY = 1 # polygon
+    BBOX = 2 # bounding box
+
+# Abstract base class for annotation objects
+class CsObject:
+    __metaclass__ = ABCMeta
+
+    def __init__(self, objType):
+        self.objectType = objType
+        # the label
+        self.label    = ""
+
+        # If deleted or not
+        self.deleted  = 0
+        # If verified or not
+        self.verified = 0
+        # The date string
+        self.date     = ""
+        # The username
+        self.user     = ""
+        # Draw the object
+        # Not read from or written to JSON
+        # Set to False if deleted object
+        # Might be set to False by the application for other reasons
+        self.draw     = True
+
+    @abstractmethod
+    def __str__(self): pass
+
+    @abstractmethod
+    def fromJsonText(self, jsonText, objId=-1): pass
+
+    @abstractmethod
+    def toJsonText(self): pass
+
+    def updateDate( self ):
+        try:
+            locale.setlocale( locale.LC_ALL , 'en_US' )
+        except locale.Error:
+            locale.setlocale( locale.LC_ALL , 'us_us' )
+        except:
+            pass
+        self.date = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
+
+    # Mark the object as deleted
+    def delete(self):
+        self.deleted = 1
+        self.draw    = False
+
+# Class that contains the information of a single annotated object as polygon
+class CsPoly(CsObject):
+    # Constructor
+    def __init__(self):
+        CsObject.__init__(self, CsObjectType.POLY)
+        # the polygon as list of points
+        self.polygon    = []
+        # the object ID
+        self.id         = -1
+
+    def __str__(self):
+        polyText = ""
+        if self.polygon:
+            if len(self.polygon) <= 4:
+                for p in self.polygon:
+                    polyText += '({},{}) '.format( p.x , p.y )
+            else:
+                polyText += '({},{}) ({},{}) ... ({},{}) ({},{})'.format(
+                    self.polygon[ 0].x , self.polygon[ 0].y ,
+                    self.polygon[ 1].x , self.polygon[ 1].y ,
+                    self.polygon[-2].x , self.polygon[-2].y ,
+                    self.polygon[-1].x , self.polygon[-1].y )
+        else:
+            polyText = "none"
+        text = "Object: {} - {}".format( self.label , polyText )
+        return text
+
+    def fromJsonText(self, jsonText, objId):
+        self.id = objId
+        self.label = str(jsonText['label'])
+        self.polygon = [ Point(p[0],p[1]) for p in jsonText['polygon'] ]
+        if 'deleted' in jsonText.keys():
+            self.deleted = jsonText['deleted']
+        else:
+            self.deleted = 0
+        if 'verified' in jsonText.keys():
+            self.verified = jsonText['verified']
+        else:
+            self.verified = 1
+        if 'user' in jsonText.keys():
+            self.user = jsonText['user']
+        else:
+            self.user = ''
+        if 'date' in jsonText.keys():
+            self.date = jsonText['date']
+        else:
+            self.date = ''
+        if self.deleted == 1:
+            self.draw = False
+        else:
+            self.draw = True
+
+    def toJsonText(self):
+        objDict = {}
+        objDict['label'] = self.label
+        objDict['id'] = self.id
+        objDict['deleted'] = self.deleted
+        objDict['verified'] = self.verified
+        objDict['user'] = self.user
+        objDict['date'] = self.date
+        objDict['polygon'] = []
+        for pt in self.polygon:
+            objDict['polygon'].append([pt.x, pt.y])
+
+        return objDict
+
+# Class that contains the information of a single annotated object as bounding box
+class CsBbox(CsObject):
+    # Constructor
+    def __init__(self):
+        CsObject.__init__(self, CsObjectType.BBOX)
+        # the polygon as list of points
+        self.bbox  = []
+        self.bboxVis  = []
+
+        # the ID of the corresponding object
+        self.instanceId = -1
+
+    def __str__(self):
+        bboxText = ""
+        bboxText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format( 
+            self.bbox[0] , self.bbox[1] ,  self.bbox[2] ,  self.bbox[3] )
+
+        bboxVisText = ""
+        bboxVisText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format( 
+            self.bboxVis[0] , self.bboxVis[1] , self.bboxVis[2], self.bboxVis[3] )
+
+        text = "Object: {} - bbox {} - visible {}".format( self.label , bboxText, bboxVisText )
+        return text
+
+    def fromJsonText(self, jsonText, objId=-1):
+        self.bbox = jsonText['bbox']
+        self.bboxVis = jsonText['bboxVis']
+        self.label = str(jsonText['label'])
+        self.instanceId = jsonText['instanceId']
+    
+    def toJsonText(self):
+        objDict = {}
+        objDict['label'] = self.label
+        objDict['instanceId'] = self.instanceId
+        objDict['bbox'] = self.bbox
+        objDict['bboxVis'] = self.bboxVis
+
+        return objDict
+
+# The annotation of a whole image (doesn't support mixed annotations, i.e. combining CsPoly and CsBbox)
+class Annotation:
+    # Constructor
+    def __init__(self, objType=CsObjectType.POLY):
+        # the width of that image and thus of the label image
+        self.imgWidth  = 0
+        # the height of that image and thus of the label image
+        self.imgHeight = 0
+        # the list of objects
+        self.objects = []
+        assert objType in CsObjectType.__dict__.values()
+        self.objectType = objType
+
+    def toJson(self):
+        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+
+    def fromJsonText(self, jsonText):
+        jsonDict = json.loads(jsonText)
+        self.imgWidth  = int(jsonDict['imgWidth'])
+        self.imgHeight = int(jsonDict['imgHeight'])
+        self.objects   = []
+        for objId, objIn in enumerate(jsonDict[ 'objects' ]):
+            if self.objectType == CsObjectType.POLY:
+                obj = CsPoly()
+            elif self.objectType == CsObjectType.BBOX:
+                obj = CsBbox()
+            obj.fromJsonText(objIn, objId)
+            self.objects.append(obj)
+
+    def toJsonText(self):
+        jsonDict = {}
+        jsonDict['imgWidth'] = self.imgWidth
+        jsonDict['imgHeight'] = self.imgHeight
+        jsonDict['objects'] = []
+        for obj in self.objects:
+            objDict = obj.toJsonText()
+            jsonDict['objects'].append(objDict)
+  
+        return jsonDict
+
+    # Read a json formatted polygon file and return the annotation
+    def fromJsonFile(self, jsonFile):
+        if not os.path.isfile(jsonFile):
+            print('Given json file not found: {}'.format(jsonFile))
+            return
+        with open(jsonFile, 'r') as f:
+            jsonText = f.read()
+            self.fromJsonText(jsonText)
+
+    def toJsonFile(self, jsonFile):
+        with open(jsonFile, 'w') as f:
+            f.write(self.toJson())
+            
+
+# a dummy example
+if __name__ == "__main__":
+    obj = CsPoly()
+    obj.label = 'car'
+    obj.polygon.append( Point( 0 , 0 ) )
+    obj.polygon.append( Point( 1 , 0 ) )
+    obj.polygon.append( Point( 1 , 1 ) )
+    obj.polygon.append( Point( 0 , 1 ) )
+
+    print(obj)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/csHelpers.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/csHelpers.py
new file mode 100644
index 0000000..7f165d2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/csHelpers.py
@@ -0,0 +1,128 @@
+#!/usr/bin/python
+#
+# Various helper methods and includes for Cityscapes
+#
+
+# Python imports
+import os, sys, getopt
+import glob
+import math
+import json
+from collections import namedtuple
+
+# Image processing
+# Check if PIL is actually Pillow as expected
+try:
+    from PIL import PILLOW_VERSION
+except:
+    print("Please install the module 'Pillow' for image processing, e.g.")
+    print("pip install pillow")
+    sys.exit(-1)
+
+try:
+    import PIL.Image     as Image
+    import PIL.ImageDraw as ImageDraw
+except:
+    print("Failed to import the image processing packages.")
+    sys.exit(-1)
+
+# Numpy for datastructures
+try:
+    import numpy as np
+except:
+    print("Failed to import numpy package.")
+    sys.exit(-1)
+
+# Cityscapes modules
+try:
+    from lib.metrics.cityscapes.helpers.annotation   import Annotation
+    from lib.metrics.cityscapes.helpers.labels       import labels, name2label, id2label, trainId2label, category2labels
+except:
+    print("Failed to find all Cityscapes modules")
+    sys.exit(-1)
+
+# Print an error message and quit
+def printError(message):
+    print('ERROR: ' + str(message))
+    sys.exit(-1)
+
+# Class for colors
+class colors:
+    RED       = '\033[31;1m'
+    GREEN     = '\033[32;1m'
+    YELLOW    = '\033[33;1m'
+    BLUE      = '\033[34;1m'
+    MAGENTA   = '\033[35;1m'
+    CYAN      = '\033[36;1m'
+    BOLD      = '\033[1m'
+    UNDERLINE = '\033[4m'
+    ENDC      = '\033[0m'
+
+# Colored value output if colorized flag is activated.
+def getColorEntry(val, args):
+    if not args.colorized:
+        return ""
+    if not isinstance(val, float) or math.isnan(val):
+        return colors.ENDC
+    if (val < .20):
+        return colors.RED
+    elif (val < .40):
+        return colors.YELLOW
+    elif (val < .60):
+        return colors.BLUE
+    elif (val < .80):
+        return colors.CYAN
+    else:
+        return colors.GREEN
+
+# Cityscapes files have a typical filename structure
+# <city>_<sequenceNb>_<frameNb>_<type>[_<type2>].<ext>
+# This class contains the individual elements as members
+# For the sequence and frame number, the strings are returned, including leading zeros
+CsFile = namedtuple( 'csFile' , [ 'city' , 'sequenceNb' , 'frameNb' , 'type' , 'type2' , 'ext' ] )
+
+# Returns a CsFile object filled from the info in the given filename
+def getCsFileInfo(fileName):
+    baseName = os.path.basename(fileName)
+    parts = baseName.split('_')
+    parts = parts[:-1] + parts[-1].split('.')
+    if not parts:
+        printError( 'Cannot parse given filename ({}). Does not seem to be a valid Cityscapes file.'.format(fileName) )
+    if len(parts) == 5:
+        csFile = CsFile( *parts[:-1] , type2="" , ext=parts[-1] )
+    elif len(parts) == 6:
+        csFile = CsFile( *parts )
+    else:
+        printError( 'Found {} part(s) in given filename ({}). Expected 5 or 6.'.format(len(parts) , fileName) )
+
+    return csFile
+
+# Returns the part of Cityscapes filenames that is common to all data types
+# e.g. for city_123456_123456_gtFine_polygons.json returns city_123456_123456
+def getCoreImageFileName(filename):
+    csFile = getCsFileInfo(filename)
+    return "{}_{}_{}".format( csFile.city , csFile.sequenceNb , csFile.frameNb )
+
+# Returns the directory name for the given filename, e.g.
+# fileName = "/foo/bar/foobar.txt"
+# return value is "bar"
+# Not much error checking though
+def getDirectory(fileName):
+    dirName = os.path.dirname(fileName)
+    return os.path.basename(dirName)
+
+# Make sure that the given path exists
+def ensurePath(path):
+    if not path:
+        return
+    if not os.path.isdir(path):
+        os.makedirs(path)
+
+# Write a dictionary as json file
+def writeDict2JSON(dictName, fileName):
+    with open(fileName, 'w') as f:
+        f.write(json.dumps(dictName, default=lambda o: o.__dict__, sort_keys=True, indent=4))
+
+# dummy main
+if __name__ == "__main__":
+    printError("Only for include, not executable on its own.")
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels.py
new file mode 100644
index 0000000..4fa00f4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels.py
@@ -0,0 +1,181 @@
+#!/usr/bin/python
+#
+# Cityscapes labels
+#
+
+from collections import namedtuple
+
+
+#--------------------------------------------------------------------------------
+# Definitions
+#--------------------------------------------------------------------------------
+
+# a label and all meta information
+Label = namedtuple( 'Label' , [
+
+    'name'        , # The identifier of this label, e.g. 'car', 'person', ... .
+                    # We use them to uniquely name a class
+
+    'id'          , # An integer ID that is associated with this label.
+                    # The IDs are used to represent the label in ground truth images
+                    # An ID of -1 means that this label does not have an ID and thus
+                    # is ignored when creating ground truth images (e.g. license plate).
+                    # Do not modify these IDs, since exactly these IDs are expected by the
+                    # evaluation server.
+
+    'trainId'     , # Feel free to modify these IDs as suitable for your method. Then create
+                    # ground truth images with train IDs, using the tools provided in the
+                    # 'preparation' folder. However, make sure to validate or submit results
+                    # to our evaluation server using the regular IDs above!
+                    # For trainIds, multiple labels might have the same ID. Then, these labels
+                    # are mapped to the same class in the ground truth images. For the inverse
+                    # mapping, we use the label that is defined first in the list below.
+                    # For example, mapping all void-type classes to the same ID in training,
+                    # might make sense for some approaches.
+                    # Max value is 255!
+
+    'category'    , # The name of the category that this label belongs to
+
+    'categoryId'  , # The ID of this category. Used to create ground truth images
+                    # on category level.
+
+    'hasInstances', # Whether this label distinguishes between single instances or not
+
+    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
+                    # during evaluations or not
+
+    'color'       , # The color of this label
+    ] )
+
+
+#--------------------------------------------------------------------------------
+# A list of all labels
+#--------------------------------------------------------------------------------
+
+# Please adapt the train IDs as appropriate for your approach.
+# Note that you might want to ignore labels with ID 255 during training.
+# Further note that the current train IDs are only a suggestion. You can use whatever you like.
+# Make sure to provide your results using the original IDs and not the training IDs.
+# Note that many IDs are ignored in evaluation and thus you never need to predict these!
+
+labels = [
+    #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
+    Label(  'unlabeled'            ,  0 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
+    Label(  'ego vehicle'          ,  1 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
+    Label(  'rectification border' ,  2 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
+    Label(  'out of roi'           ,  3 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
+    Label(  'static'               ,  4 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
+    Label(  'dynamic'              ,  5 ,      255 , 'void'            , 0       , False        , True         , (111, 74,  0) ),
+    Label(  'ground'               ,  6 ,      255 , 'void'            , 0       , False        , True         , ( 81,  0, 81) ),
+    Label(  'road'                 ,  7 ,        0 , 'flat'            , 1       , False        , False        , (128, 64,128) ),
+    Label(  'sidewalk'             ,  8 ,        1 , 'flat'            , 1       , False        , False        , (244, 35,232) ),
+    Label(  'parking'              ,  9 ,      255 , 'flat'            , 1       , False        , True         , (250,170,160) ),
+    Label(  'rail track'           , 10 ,      255 , 'flat'            , 1       , False        , True         , (230,150,140) ),
+    Label(  'building'             , 11 ,        2 , 'construction'    , 2       , False        , False        , ( 70, 70, 70) ),
+    Label(  'wall'                 , 12 ,        3 , 'construction'    , 2       , False        , False        , (102,102,156) ),
+    Label(  'fence'                , 13 ,        4 , 'construction'    , 2       , False        , False        , (190,153,153) ),
+    Label(  'guard rail'           , 14 ,      255 , 'construction'    , 2       , False        , True         , (180,165,180) ),
+    Label(  'bridge'               , 15 ,      255 , 'construction'    , 2       , False        , True         , (150,100,100) ),
+    Label(  'tunnel'               , 16 ,      255 , 'construction'    , 2       , False        , True         , (150,120, 90) ),
+    Label(  'pole'                 , 17 ,        5 , 'object'          , 3       , False        , False        , (153,153,153) ),
+    Label(  'polegroup'            , 18 ,      255 , 'object'          , 3       , False        , True         , (153,153,153) ),
+    Label(  'traffic light'        , 19 ,        6 , 'object'          , 3       , False        , False        , (250,170, 30) ),
+    Label(  'traffic sign'         , 20 ,        7 , 'object'          , 3       , False        , False        , (220,220,  0) ),
+    Label(  'vegetation'           , 21 ,        8 , 'nature'          , 4       , False        , False        , (107,142, 35) ),
+    Label(  'terrain'              , 22 ,        9 , 'nature'          , 4       , False        , False        , (152,251,152) ),
+    Label(  'sky'                  , 23 ,       10 , 'sky'             , 5       , False        , False        , ( 70,130,180) ),
+    Label(  'person'               , 24 ,       11 , 'human'           , 6       , True         , False        , (220, 20, 60) ),
+    Label(  'rider'                , 25 ,       12 , 'human'           , 6       , True         , False        , (255,  0,  0) ),
+    Label(  'car'                  , 26 ,       13 , 'vehicle'         , 7       , True         , False        , (  0,  0,142) ),
+    Label(  'truck'                , 27 ,       14 , 'vehicle'         , 7       , True         , False        , (  0,  0, 70) ),
+    Label(  'bus'                  , 28 ,       15 , 'vehicle'         , 7       , True         , False        , (  0, 60,100) ),
+    Label(  'caravan'              , 29 ,      255 , 'vehicle'         , 7       , True         , True         , (  0,  0, 90) ),
+    Label(  'trailer'              , 30 ,      255 , 'vehicle'         , 7       , True         , True         , (  0,  0,110) ),
+    Label(  'train'                , 31 ,       16 , 'vehicle'         , 7       , True         , False        , (  0, 80,100) ),
+    Label(  'motorcycle'           , 32 ,       17 , 'vehicle'         , 7       , True         , False        , (  0,  0,230) ),
+    Label(  'bicycle'              , 33 ,       18 , 'vehicle'         , 7       , True         , False        , (119, 11, 32) ),
+    Label(  'license plate'        , -1 ,       -1 , 'vehicle'         , 7       , False        , True         , (  0,  0,142) ),
+]
+
+
+#--------------------------------------------------------------------------------
+# Create dictionaries for a fast lookup
+#--------------------------------------------------------------------------------
+
+# Please refer to the main method below for example usages!
+
+# name to label object
+name2label      = { label.name    : label for label in labels           }
+# id to label object
+id2label        = { label.id      : label for label in labels           }
+# trainId to label object
+trainId2label   = { label.trainId : label for label in reversed(labels) }
+# category to list of label objects
+category2labels = {}
+for label in labels:
+    category = label.category
+    if category in category2labels:
+        category2labels[category].append(label)
+    else:
+        category2labels[category] = [label]
+
+#--------------------------------------------------------------------------------
+# Assure single instance name
+#--------------------------------------------------------------------------------
+
+# returns the label name that describes a single instance (if possible)
+# e.g.     input     |   output
+#        ----------------------
+#          car       |   car
+#          cargroup  |   car
+#          foo       |   None
+#          foogroup  |   None
+#          skygroup  |   None
+def assureSingleInstanceName( name ):
+    # if the name is known, it is not a group
+    if name in name2label:
+        return name
+    # test if the name actually denotes a group
+    if not name.endswith("group"):
+        return None
+    # remove group
+    name = name[:-len("group")]
+    # test if the new name exists
+    if not name in name2label:
+        return None
+    # test if the new name denotes a label that actually has instances
+    if not name2label[name].hasInstances:
+        return None
+    # all good then
+    return name
+
+#--------------------------------------------------------------------------------
+# Main for testing
+#--------------------------------------------------------------------------------
+
+# just a dummy main
+if __name__ == "__main__":
+    # Print all the labels
+    print("List of cityscapes labels:")
+    print("")
+    print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( 'name', 'id', 'trainId', 'category', 'categoryId', 'hasInstances', 'ignoreInEval' ))
+    print("    " + ('-' * 98))
+    for label in labels:
+        print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( label.name, label.id, label.trainId, label.category, label.categoryId, label.hasInstances, label.ignoreInEval ))
+    print("")
+
+    print("Example usages:")
+
+    # Map from name to label
+    name = 'car'
+    id   = name2label[name].id
+    print("ID of label '{name}': {id}".format( name=name, id=id ))
+
+    # Map from ID to label
+    category = id2label[id].category
+    print("Category of label with ID '{id}': {category}".format( id=id, category=category ))
+
+    # Map from trainID to label
+    trainId = 0
+    name = trainId2label[trainId].name
+    print("Name of label with trainID '{id}': {name}".format( id=trainId, name=name ))
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels_cityPersons.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels_cityPersons.py
new file mode 100644
index 0000000..91e5fd6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/helpers/labels_cityPersons.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+#
+# CityPersons (cp) labels
+#
+
+from collections import namedtuple
+
+
+#--------------------------------------------------------------------------------
+# Definitions
+#--------------------------------------------------------------------------------
+
+# a label and all meta information
+LabelCp = namedtuple( 'LabelCp' , [
+
+    'name'        , # The identifier of this label, e.g. 'pedestrian', 'rider', ... .
+                    # We use them to uniquely name a class
+
+    'id'          , # An integer ID that is associated with this label.
+                    # The IDs are used to represent the label in ground truth
+
+    'hasInstances', # Whether this label distinguishes between single instances or not
+
+    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
+                    # during evaluations or not
+
+    'color'       , # The color of this label
+    ] )
+
+
+#--------------------------------------------------------------------------------
+# A list of all labels
+#--------------------------------------------------------------------------------
+
+# The 'ignore' label covers representations of humans, e.g. people on posters, reflections etc.
+# Each annotation includes both the full bounding box (bbox) as well as a bounding box covering the visible area (bboxVis).
+# The latter is obtained automatically from the segmentation masks.  
+
+labelsCp = [
+    #         name                     id   hasInstances   ignoreInEval   color
+    LabelCp(  'ignore'               ,  0 , False        , True         , (250,170, 30) ),
+    LabelCp(  'pedestrian'           ,  1 , True         , False        , (220, 20, 60) ),
+    LabelCp(  'rider'                ,  2 , True         , False        , (  0,  0,142) ),
+    LabelCp(  'sitting person'       ,  3 , True         , False        , (107,142, 35) ),
+    LabelCp(  'person (other)'       ,  4 , True         , False        , (190,153,153) ),
+    LabelCp(  'person group'         ,  5 , False        , True         , (255,  0,  0) ),
+]
+
+
+#--------------------------------------------------------------------------------
+# Create dictionaries for a fast lookup
+#--------------------------------------------------------------------------------
+
+# Please refer to the main method below for example usages!
+
+# name to label object
+name2labelCp      = { label.name    : label for label in labelsCp }
+# id to label object
+id2labelCp        = { label.id      : label for label in labelsCp }
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/make.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/make.sh
new file mode 100644
index 0000000..41209bd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/make.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# check the enviroment info
+
+# PYTHON="/root/miniconda3/bin/python"
+PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
+export PYTHONPATH="/msravcshare/yuyua/code/segmentation/openseg.pytorch":$PYTHONPATH
+
+cd ../../../
+${PYTHON} lib/metrics/cityscapes/setup.py build_ext --inplace
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/setup.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/setup.py
new file mode 100644
index 0000000..abf3f31
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes/setup.py
@@ -0,0 +1,22 @@
+#!/usr/bin/python
+#
+# Enable cython support for eval metrics
+# Run as
+# setup.py build_ext --inplace
+#
+# WARNING: Only tested for Ubuntu 64bit OS.
+
+try:
+    from distutils.core import setup
+    from Cython.Build import cythonize
+except:
+    print("Unable to setup. Please use pip to install: cython")
+    print("sudo pip install cython")
+import os
+import numpy
+
+os.environ["CC"]  = "g++"
+os.environ["CXX"] = "g++"
+
+setup(ext_modules = cythonize("lib/metrics/cityscapes/evaluation/addToConfusionMatrix.pyx"),
+      include_dirs=[numpy.get_include()])
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes_evaluator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes_evaluator.py
new file mode 100644
index 0000000..076d020
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cityscapes_evaluator.py
@@ -0,0 +1,710 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Evaluation of cityscapes.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import fnmatch
+import platform
+import pdb
+
+try:
+    from itertools import izip
+except ImportError:
+    izip = zip
+
+# Cityscapes imports
+try:
+    from lib.metrics.cityscapes.evaluation.csHelpers import *
+except:
+    from cityscapes.evaluation.csHelpers import *
+
+# C Support
+# Enable the cython support for faster evaluation, this is necessary for speeding up your model results
+# Only tested for Ubuntu 64bit OS
+CSUPPORT = True
+# Check if C-Support is available for better performance
+if CSUPPORT:
+    try:
+        import lib.metrics.cityscapes.evaluation.addToConfusionMatrix as addToConfusionMatrix
+    except:
+        CSUPPORT = False
+
+
+# A class to collect all bunch of data
+class CArgs(object):
+    def __init__(self, data_path=None, out_path=None, predict_path=None):
+        # Where to look for Cityscapes, note that data path is equal to gt path
+        if 'CITYSCAPES_DATASET' in os.environ:
+            self.cityscapesPath = os.environ['CITYSCAPES_DATASET']
+        else:
+            self.cityscapesPath = os.path.join(data_path)
+
+        if 'CITYSCAPES_EXPORT_DIR' in os.environ:
+            export_dir = os.environ['CITYSCAPES_EXPORT_DIR']
+            if not os.path.isdir(export_dir):
+                raise ValueError("CITYSCAPES_EXPORT_DIR {} is not a directory".format(export_dir))
+            self.exportFile = "{}/resultPixelLevelSemanticLabeling.json".format(export_dir)
+        else:
+            self.exportFile = os.path.join(out_path, "evaluationResults", "resultPixelLevelSemanticLabeling.json")
+        # Parameters that should be modified by user
+        self.groundTruthSearch  = os.path.join( self.cityscapesPath, "*.png" )
+
+        # Remaining params
+        self.evalInstLevelScore = True
+        self.evalPixelAccuracy  = True
+        self.evalLabels         = []
+        self.printRow           = 5
+        self.normalized         = True
+        self.colorized          = hasattr(sys.stderr, "isatty") and sys.stderr.isatty() and platform.system()=='Linux'
+        self.bold               = colors.BOLD if self.colorized else ""
+        self.nocol              = colors.ENDC if self.colorized else ""
+        self.JSONOutput         = True
+        self.quiet              = False
+
+        self.avgClassSize       = {
+        "bicycle"    :  4672.3249222261 ,
+        "caravan"    : 36771.8241758242 ,
+        "motorcycle" :  6298.7200839748 ,
+        "rider"      :  3930.4788056518 ,
+        "bus"        : 35732.1511111111 ,
+        "train"      : 67583.7075812274 ,
+        "car"        : 12794.0202738185 ,
+        "person"     :  3462.4756337644 ,
+        "truck"      : 27855.1264367816 ,
+        "trailer"    : 16926.9763313609 ,
+    }
+
+        # store some parameters for finding predictions in the args variable
+        # the values are filled when the method getPrediction is first called
+        self.predictionPath = predict_path
+        self.predictionWalk = None
+
+
+## method part
+def getPrediction( args, groundTruthFile ):
+    # determine the prediction path, if the method is first called
+    if not args.predictionPath:
+        rootPath = None
+        if 'CITYSCAPES_RESULTS' in os.environ:
+            rootPath = os.environ['CITYSCAPES_RESULTS']
+        elif 'CITYSCAPES_DATASET' in os.environ:
+            rootPath = os.path.join( os.environ['CITYSCAPES_DATASET'] , "results" )
+        else:
+            rootPath = os.path.join(os.path.dirname(os.path.realpath(__file__)),'..','..','results')
+
+        if not os.path.isdir(rootPath):
+            printError("Could not find a result root folder. Please read the instructions of this method.")
+
+        args.predictionPath = rootPath
+
+    # walk the prediction path, if not happened yet
+    if not args.predictionWalk:
+        walk = []
+        for root, dirnames, filenames in os.walk(args.predictionPath):
+            walk.append( (root,filenames) )
+        args.predictionWalk = walk
+
+    csFile = getCsFileInfo(groundTruthFile)
+    filePattern = "{}_{}_{}*.png".format( csFile.city , csFile.sequenceNb , csFile.frameNb )
+
+    predictionFile = None
+    for root, filenames in args.predictionWalk:
+        for filename in fnmatch.filter(filenames, filePattern):
+            if not predictionFile:
+                predictionFile = os.path.join(root, filename)
+            else:
+                printError("Found multiple predictions for ground truth {}".format(groundTruthFile))
+
+    if not predictionFile:
+        printError("Found no prediction for ground truth {}".format(groundTruthFile))
+
+    return predictionFile
+
+# Generate empty confusion matrix and create list of relevant labels
+def generateMatrix(args):
+    args.evalLabels = []
+    for label in labels:
+        if (label.id < 0):
+            continue
+        # we append all found labels, regardless of being ignored
+        args.evalLabels.append(label.id)
+    maxId = max(args.evalLabels)
+    # We use longlong type to be sure that there are no overflows
+    return np.zeros(shape=(maxId + 1, maxId + 1), dtype=np.ulonglong)
+
+
+def generateInstanceStats(args):
+    instanceStats = {}
+    instanceStats["classes"] = {}
+    instanceStats["categories"] = {}
+    for label in labels:
+        if label.hasInstances and not label.ignoreInEval:
+            instanceStats["classes"][label.name] = {}
+            instanceStats["classes"][label.name]["tp"] = 0.0
+            instanceStats["classes"][label.name]["tpWeighted"] = 0.0
+            instanceStats["classes"][label.name]["fn"] = 0.0
+            instanceStats["classes"][label.name]["fnWeighted"] = 0.0
+    for category in category2labels:
+        labelIds = []
+        allInstances = True
+        for label in category2labels[category]:
+            if label.id < 0:
+                continue
+            if not label.hasInstances:
+                allInstances = False
+                break
+            labelIds.append(label.id)
+        if not allInstances:
+            continue
+
+        instanceStats["categories"][category] = {}
+        instanceStats["categories"][category]["tp"] = 0.0
+        instanceStats["categories"][category]["tpWeighted"] = 0.0
+        instanceStats["categories"][category]["fn"] = 0.0
+        instanceStats["categories"][category]["fnWeighted"] = 0.0
+        instanceStats["categories"][category]["labelIds"] = labelIds
+
+    return instanceStats
+
+
+# Get absolute or normalized value from field in confusion matrix.
+def getMatrixFieldValue(confMatrix, i, j, args):
+    if args.normalized:
+        rowSum = confMatrix[i].sum()
+        if (rowSum == 0):
+            return float('nan')
+        return float(confMatrix[i][j]) / rowSum
+    else:
+        return confMatrix[i][j]
+
+
+# Calculate and return IOU score for a particular label
+def getIouScoreForLabel(label, confMatrix, args):
+    if id2label[label].ignoreInEval:
+        return float('nan')
+
+    # the number of true positive pixels for this label
+    # the entry on the diagonal of the confusion matrix
+    tp = np.longlong(confMatrix[label, label])
+
+    # the number of false negative pixels for this label
+    # the row sum of the matching row in the confusion matrix
+    # minus the diagonal entry
+    fn = np.longlong(confMatrix[label, :].sum()) - tp
+
+    # the number of false positive pixels for this labels
+    # Only pixels that are not on a pixel with ground truth label that is ignored
+    # The column sum of the corresponding column in the confusion matrix
+    # without the ignored rows and without the actual label of interest
+    notIgnored = [l for l in args.evalLabels if not id2label[l].ignoreInEval and not l == label]
+    fp = np.longlong(confMatrix[notIgnored, label].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+
+
+
+
+# Calculate and return IOU score for a particular label
+def getInstanceIouScoreForLabel(label, confMatrix, instStats, args):
+    if id2label[label].ignoreInEval:
+        return float('nan')
+
+    labelName = id2label[label].name
+    if not labelName in instStats["classes"]:
+        return float('nan')
+
+    tp = instStats["classes"][labelName]["tpWeighted"]
+    fn = instStats["classes"][labelName]["fnWeighted"]
+    # false postives computed as above
+    notIgnored = [l for l in args.evalLabels if not id2label[l].ignoreInEval and not l == label]
+    fp = np.longlong(confMatrix[notIgnored, label].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+
+# Calculate prior for a particular class id.
+def getPrior(label, confMatrix):
+    return float(confMatrix[label, :].sum()) / confMatrix.sum()
+
+
+# Get average of scores.
+# Only computes the average over valid entries.
+def getScoreAverage(scoreList, args):
+    validScores = 0
+    scoreSum = 0.0
+    for score in scoreList:
+        if not math.isnan(scoreList[score]):
+            validScores += 1
+            scoreSum += scoreList[score]
+    if validScores == 0:
+        return float('nan')
+    return scoreSum / validScores
+
+
+# Calculate and return IOU score for a particular category
+def getIouScoreForCategory(category, confMatrix, args):
+    # All labels in this category
+    labels = category2labels[category]
+    # The IDs of all valid labels in this category
+    labelIds = [label.id for label in labels if not label.ignoreInEval and label.id in args.evalLabels]
+    # If there are no valid labels, then return NaN
+    if not labelIds:
+        return float('nan')
+
+    # the number of true positive pixels for this category
+    # this is the sum of all entries in the confusion matrix
+    # where row and column belong to a label ID of this category
+    tp = np.longlong(confMatrix[labelIds, :][:, labelIds].sum())
+
+    # the number of false negative pixels for this category
+    # that is the sum of all rows of labels within this category
+    # minus the number of true positive pixels
+    fn = np.longlong(confMatrix[labelIds, :].sum()) - tp
+
+    # the number of false positive pixels for this category
+    # we count the column sum of all labels within this category
+    # while skipping the rows of ignored labels and of labels within this category
+    notIgnoredAndNotInCategory = [l for l in args.evalLabels if
+                                  not id2label[l].ignoreInEval and id2label[l].category != category]
+    fp = np.longlong(confMatrix[notIgnoredAndNotInCategory, :][:, labelIds].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+
+# Calculate and return IOU score for a particular category
+def getInstanceIouScoreForCategory(category, confMatrix, instStats, args):
+    if not category in instStats["categories"]:
+        return float('nan')
+    labelIds = instStats["categories"][category]["labelIds"]
+
+    tp = instStats["categories"][category]["tpWeighted"]
+    fn = instStats["categories"][category]["fnWeighted"]
+
+    # the number of false positive pixels for this category
+    # same as above
+    notIgnoredAndNotInCategory = [l for l in args.evalLabels if
+                                  not id2label[l].ignoreInEval and id2label[l].category != category]
+    fp = np.longlong(confMatrix[notIgnoredAndNotInCategory, :][:, labelIds].sum())
+
+    # the denominator of the IOU score
+    denom = (tp + fp + fn)
+    if denom == 0:
+        return float('nan')
+
+    # return IOU
+    return float(tp) / denom
+
+
+# create a dictionary containing all relevant results
+def createResultDict(confMatrix, classScores, classInstScores, categoryScores, categoryInstScores,
+                     perImageStats, args):
+    # write JSON result file
+    wholeData = {}
+    wholeData["confMatrix"] = confMatrix.tolist()
+    wholeData["priors"] = {}
+    wholeData["labels"] = {}
+    for label in args.evalLabels:
+        wholeData["priors"][id2label[label].name] = getPrior(label, confMatrix)
+        wholeData["labels"][id2label[label].name] = label
+    wholeData["classScores"] = classScores
+    wholeData["classInstScores"] = classInstScores
+    wholeData["categoryScores"] = categoryScores
+    wholeData["categoryInstScores"] = categoryInstScores
+    wholeData["averageScoreClasses"] = getScoreAverage(classScores, args)
+    wholeData["averageScoreInstClasses"] = getScoreAverage(classInstScores, args)
+    wholeData["averageScoreCategories"] = getScoreAverage(categoryScores, args)
+    wholeData["averageScoreInstCategories"] = getScoreAverage(categoryInstScores, args)
+
+    if perImageStats:
+        wholeData["perImageScores"] = perImageStats
+
+    return wholeData
+
+
+def writeJSONFile(wholeData, args):
+    path = os.path.dirname(args.exportFile)
+    ensurePath(path)
+    writeDict2JSON(wholeData, args.exportFile)
+
+
+# Print confusion matrix
+def printConfMatrix(confMatrix, args):
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "))
+
+    # print label names
+    print("\b{text:>{width}} |".format(width=13, text=""), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:^{width}} |".format(width=args.printRow, text=id2label[label].name[0]), end=' ')
+    print("\b{text:>{width}} |".format(width=6, text="Prior"))
+
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "))
+
+    # print matrix
+    for x in range(0, confMatrix.shape[0]):
+        if (not x in args.evalLabels):
+            continue
+        # get prior of this label
+        prior = getPrior(x, confMatrix)
+        # skip if label does not exist in ground truth
+        if prior < 1e-9:
+            continue
+
+        # print name
+        name = id2label[x].name
+        if len(name) > 13:
+            name = name[:13]
+        print("\b{text:>{width}} |".format(width=13, text=name), end=' ')
+        # print matrix content
+        for y in range(0, len(confMatrix[x])):
+            if (not y in args.evalLabels):
+                continue
+            matrixFieldValue = getMatrixFieldValue(confMatrix, x, y, args)
+            print(getColorEntry(matrixFieldValue, args) + "\b{text:>{width}.2f}  ".format(width=args.printRow,
+                                                                                          text=matrixFieldValue) + args.nocol,
+                  end=' ')
+        # print prior
+        print(getColorEntry(prior, args) + "\b{text:>{width}.4f} ".format(width=6, text=prior) + args.nocol)
+    # print line
+    print("\b{text:{fill}>{width}}".format(width=15, fill='-', text=" "), end=' ')
+    for label in args.evalLabels:
+        print("\b{text:{fill}>{width}}".format(width=args.printRow + 2, fill='-', text=" "), end=' ')
+    print("\b{text:{fill}>{width}}".format(width=args.printRow + 3, fill='-', text=" "), end=' ')
+
+
+# Print intersection-over-union scores for all classes.
+def printClassScores(scoreList, instScoreList, args):
+    if (args.quiet):
+        return
+    print(args.bold + "classes          IoU      nIoU" + args.nocol)
+    print("--------------------------------")
+    for label in args.evalLabels:
+        if (id2label[label].ignoreInEval):
+            continue
+        labelName = str(id2label[label].name)
+        iouStr = getColorEntry(scoreList[labelName], args) + "{val:>5.6f}".format(
+            val=scoreList[labelName]) + args.nocol
+        niouStr = getColorEntry(instScoreList[labelName], args) + "{val:>5.6f}".format(
+            val=instScoreList[labelName]) + args.nocol
+        print("{:<14}: ".format(labelName) + iouStr + "    " + niouStr)
+
+
+# Print intersection-over-union scores for all categorys.
+def printCategoryScores(scoreDict, instScoreDict, args):
+    if (args.quiet):
+        return
+    print(args.bold + "categories       IoU      nIoU" + args.nocol)
+    print("--------------------------------")
+    for categoryName in scoreDict:
+        if all(label.ignoreInEval for label in category2labels[categoryName]):
+            continue
+        iouStr = getColorEntry(scoreDict[categoryName], args) + "{val:>5.6f}".format(
+            val=scoreDict[categoryName]) + args.nocol
+        niouStr = getColorEntry(instScoreDict[categoryName], args) + "{val:>5.6f}".format(
+            val=instScoreDict[categoryName]) + args.nocol
+        print("{:<14}: ".format(categoryName) + iouStr + "    " + niouStr)
+
+
+class EvalPixel():
+    def __init__(self, args, predictionImgList = None, groundTruthImgList = None):
+        self.args = args
+        self.predictionImgList = predictionImgList
+        self.groundTruthImgList = groundTruthImgList
+        if predictionImgList is None or groundTruthImgList is None:
+            self.groundTruthImgList,  self.predictionImgList = self.getDefaultData(self.args)
+
+    # evaluate image in two lists
+    def evaluateImgLists(self,predictionImgList, groundTruthImgList, args):
+        if len(predictionImgList) != len(groundTruthImgList):
+            printError("List of images for prediction and groundtruth are not of equal size.")
+        confMatrix = generateMatrix(args)
+        instStats = generateInstanceStats(args)
+        perImageStats = {}
+        nbPixels = 0
+
+        if not args.quiet:
+            print("Evaluating {} pairs of images...".format(len(predictionImgList)))
+
+        # Evaluate all pairs of images and save them into a matrix
+        for i in range(len(predictionImgList)):
+            predictionImgFileName = predictionImgList[i]
+            groundTruthImgFileName = groundTruthImgList[i]
+            # print "Evaluate ", predictionImgFileName, "<>", groundTruthImgFileName
+            nbPixels += self.evaluatePair(predictionImgFileName, groundTruthImgFileName, confMatrix, instStats,
+                                     perImageStats, args)
+
+            # sanity check
+
+            if confMatrix.sum() != nbPixels:
+                pass
+                # printError(
+                #     'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
+                #         confMatrix.sum(), nbPixels))
+
+            if not args.quiet:
+                print("\rImages Processed: {}".format(i + 1), end=' ')
+                sys.stdout.flush()
+        if not args.quiet:
+            print("\n")
+
+        # sanity check
+        if confMatrix.sum() != nbPixels:
+            pass
+        #     printError(
+        #         'Number of analyzed pixels and entries in confusion matrix disagree: contMatrix {}, pixels {}'.format(
+        #             confMatrix.sum(), nbPixels))
+
+        # print confusion matrix
+        if (not args.quiet):
+            printConfMatrix(confMatrix, args)
+
+        # print accuracy
+        # Calculate and return IOU score for a particular label
+        # pdb.set_trace()
+        acc = np.diag(confMatrix).sum() / confMatrix.sum()
+        print("pixel accuracy")
+        print(acc)
+
+        # Calculate IOU scores on class level from matrix
+        classScoreList = {}
+        for label in args.evalLabels:
+            labelName = id2label[label].name
+            classScoreList[labelName] = getIouScoreForLabel(label, confMatrix, args)
+
+        # Calculate instance IOU scores on class level from matrix
+        classInstScoreList = {}
+        for label in args.evalLabels:
+            labelName = id2label[label].name
+            classInstScoreList[labelName] = getInstanceIouScoreForLabel(label, confMatrix, instStats, args)
+
+        # Print IOU scores
+        if (not args.quiet):
+            print("")
+            print("")
+            printClassScores(classScoreList, classInstScoreList, args)
+            iouAvgStr = getColorEntry(getScoreAverage(classScoreList, args), args) + "{avg:5.6f}".format(
+                avg=getScoreAverage(classScoreList, args)) + args.nocol
+            niouAvgStr = getColorEntry(getScoreAverage(classInstScoreList, args), args) + "{avg:5.6f}".format(
+                avg=getScoreAverage(classInstScoreList, args)) + args.nocol
+            print("--------------------------------")
+            print("Score Average : " + iouAvgStr + "    " + niouAvgStr)
+            print("--------------------------------")
+            print("")
+
+        # Calculate IOU scores on category level from matrix
+        categoryScoreList = {}
+        for category in category2labels.keys():
+            categoryScoreList[category] = getIouScoreForCategory(category, confMatrix, args)
+
+        # Calculate instance IOU scores on category level from matrix
+        categoryInstScoreList = {}
+        for category in category2labels.keys():
+            categoryInstScoreList[category] = getInstanceIouScoreForCategory(category, confMatrix, instStats, args)
+
+        # Print IOU scores
+        if (not args.quiet):
+            print("")
+            printCategoryScores(categoryScoreList, categoryInstScoreList, args)
+            iouAvgStr = getColorEntry(getScoreAverage(categoryScoreList, args), args) + "{avg:5.6f}".format(
+                avg=getScoreAverage(categoryScoreList, args)) + args.nocol
+            niouAvgStr = getColorEntry(getScoreAverage(categoryInstScoreList, args), args) + "{avg:5.6f}".format(
+                avg=getScoreAverage(categoryInstScoreList, args)) + args.nocol
+            print("--------------------------------")
+            print("Score Average : " + iouAvgStr + "    " + niouAvgStr)
+            print("--------------------------------")
+            print("")
+
+        # write result file
+        allResultsDict = createResultDict(confMatrix, classScoreList, classInstScoreList, categoryScoreList,
+                                          categoryInstScoreList, perImageStats, args)
+        # writeJSONFile(allResultsDict, args)
+
+        # return confusion matrix
+        return allResultsDict
+
+    # Main evaluation method. Evaluates pairs of prediction and ground truth
+    # images which are passed as arguments.
+    def evaluatePair(self,predictionImgFileName, groundTruthImgFileName, confMatrix, instanceStats, perImageStats, args):
+        # Loading all resources for evaluation.
+        try:
+            predictionImg = Image.open(predictionImgFileName)
+            predictionNp = np.array(predictionImg)
+        except:
+            printError("Unable to load " + predictionImgFileName)
+        try:
+            groundTruthImg = Image.open(groundTruthImgFileName)
+            groundTruthNp = np.array(groundTruthImg)
+        except:
+            printError("Unable to load " + groundTruthImgFileName)
+        # load ground truth instances, if needed
+        if args.evalInstLevelScore:
+            groundTruthInstanceImgFileName = groundTruthImgFileName.replace("labelIds", "instanceIds")
+            try:
+                instanceImg = Image.open(groundTruthInstanceImgFileName)
+                instanceNp = np.array(instanceImg)
+            except:
+                printError("Unable to load " + groundTruthInstanceImgFileName)
+
+        # Check for equal image sizes
+        if (predictionImg.size[0] != groundTruthImg.size[0]):
+            printError(
+                "Image widths of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.")
+        if (predictionImg.size[1] != groundTruthImg.size[1]):
+            printError(
+                "Image heights of " + predictionImgFileName + " and " + groundTruthImgFileName + " are not equal.")
+        if (len(predictionNp.shape) != 2):
+            printError("Predicted image has multiple channels.")
+
+        imgWidth = predictionImg.size[0]
+        imgHeight = predictionImg.size[1]
+        nbPixels = imgWidth * imgHeight
+
+        # Evaluate images
+        if (CSUPPORT):
+            # using cython
+            confMatrix = addToConfusionMatrix.cEvaluatePair(predictionNp, groundTruthNp, confMatrix, args.evalLabels)
+
+        else:
+            # the slower python way
+            for (groundTruthImgPixel, predictionImgPixel) in izip(groundTruthImg.getdata(), predictionImg.getdata()):
+                if (not groundTruthImgPixel in args.evalLabels):
+                    printError("Unknown label with id {:}".format(groundTruthImgPixel))
+
+                confMatrix[groundTruthImgPixel][predictionImgPixel] += 1
+
+        if args.evalInstLevelScore:
+            # Generate category masks
+            categoryMasks = {}
+            for category in instanceStats["categories"]:
+                categoryMasks[category] = np.in1d(predictionNp,
+                                                  instanceStats["categories"][category]["labelIds"]).reshape(
+                    predictionNp.shape)
+
+            instList = np.unique(instanceNp[instanceNp > 1000])
+            for instId in instList:
+                labelId = int(instId / 1000)
+                label = id2label[labelId]
+                if label.ignoreInEval:
+                    continue
+
+                mask = instanceNp == instId
+                instSize = np.count_nonzero(mask)
+
+                tp = np.count_nonzero(predictionNp[mask] == labelId)
+                fn = instSize - tp
+
+                weight = args.avgClassSize[label.name] / float(instSize)
+                tpWeighted = float(tp) * weight
+                fnWeighted = float(fn) * weight
+
+                instanceStats["classes"][label.name]["tp"] += tp
+                instanceStats["classes"][label.name]["fn"] += fn
+                instanceStats["classes"][label.name]["tpWeighted"] += tpWeighted
+                instanceStats["classes"][label.name]["fnWeighted"] += fnWeighted
+
+                category = label.category
+                if category in instanceStats["categories"]:
+                    catTp = 0
+                    catTp = np.count_nonzero(np.logical_and(mask, categoryMasks[category]))
+                    catFn = instSize - catTp
+
+                    catTpWeighted = float(catTp) * weight
+                    catFnWeighted = float(catFn) * weight
+
+                    instanceStats["categories"][category]["tp"] += catTp
+                    instanceStats["categories"][category]["fn"] += catFn
+                    instanceStats["categories"][category]["tpWeighted"] += catTpWeighted
+                    instanceStats["categories"][category]["fnWeighted"] += catFnWeighted
+
+        if True: # evaluate pixel accuracy
+            notIgnoredLabels = [l for l in args.evalLabels if not id2label[l].ignoreInEval]
+            notIgnoredPixels = np.in1d(groundTruthNp, notIgnoredLabels, invert=True).reshape(groundTruthNp.shape)
+            erroneousPixels = np.logical_and(notIgnoredPixels, (predictionNp != groundTruthNp))
+            perImageStats[predictionImgFileName] = {}
+            perImageStats[predictionImgFileName]["nbNotIgnoredPixels"] = np.count_nonzero(notIgnoredPixels)
+            perImageStats[predictionImgFileName]["nbCorrectPixels"] = np.count_nonzero(erroneousPixels)
+            # print("all pixel_count:"+str(perImageStats[predictionImgFileName]["nbNotIgnoredPixels"]))
+            # print("correct pixel_count:"+str(perImageStats[predictionImgFileName]["nbCorrectPixels"]))
+            # print("pixel_accuracy:"+str((float(perImageStats[predictionImgFileName]["nbCorrectPixels"])/perImageStats[predictionImgFileName]["nbNotIgnoredPixels"])))
+
+        return nbPixels
+
+
+    # launch the process
+    def run(self):
+        self.evaluateImgLists(self.predictionImgList, self.groundTruthImgList, self.args)
+
+    # get the default data
+    def getDefaultData(self, args):
+        groundTruthImgList, predictionImgList = [], []
+        groundTruthImgList = glob.glob(args.groundTruthSearch)
+        if not groundTruthImgList:
+            printError("Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+                args.groundTruthSearch))
+        # get the corresponding prediction for each ground truth imag
+        for gt in groundTruthImgList:
+            predictionImgList.append(getPrediction(args, gt))
+        return groundTruthImgList, predictionImgList
+
+
+class CityscapesEvaluator(object):
+
+    def evaluate(self, pred_dir=None, gt_dir=None):
+        """
+        :param pred_dir: directory of model output results(must be consistent with val directory)
+        :param gt_dir: directory of  cityscapes data(root)
+        :return:
+        """
+        pred_path = pred_dir
+        data_path = gt_dir
+        print("evaluate the result...")
+        args = CArgs(data_path=data_path, out_path=data_path, predict_path=pred_path)
+        ob = EvalPixel(args)
+        ob.run()
+
+
+if __name__ == '__main__':
+    # python cityscapes_evaluator.py --gt_dir ~/DataSet/CityScape/gtFine/val
+    #                               --pred_dir ~/Projects/PyTorchCV/val/results/seg/cityscapes/test_dir/image/label
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gt_dir', default=None, type=str,
+                        dest='gt_dir', help='The directory of ground truth.')
+    parser.add_argument('--pred_dir', default=None, type=str,
+                        dest='pred_dir', help='The directory of predicted labels.')
+
+    args = parser.parse_args()
+
+    cityscapes_evaluator = CityscapesEvaluator()
+    cityscapes_evaluator.evaluate(pred_dir=args.pred_dir, gt_dir=args.gt_dir)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cocostuff_evaluator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cocostuff_evaluator.py
new file mode 100644
index 0000000..c60daac
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/cocostuff_evaluator.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: RainbowSecret(yuyua@microsoft.com)
+
+
+import argparse
+import os
+import pdb
+
+import numpy as np
+
+from lib.utils.helpers.image_helper import ImageHelper
+from lib.utils.tools.logger import Logger as Log
+from lib.utils.tools.configer import Configer
+from lib.metrics.running_score import RunningScore
+
+
+class COCOStuffEvaluator(object):
+    def __init__(self, configer):
+        self.configer = configer
+        self.seg_running_score = RunningScore(configer)
+        self.id_to_trainid = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 
+                              11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 
+                              21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 
+                              33: 29, 34: 30, 35: 31, 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 
+                              42: 38, 43: 39, 44: 40, 46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 
+                              52: 47, 53: 48, 54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 
+                              61: 56, 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, 
+                              74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72, 82: 73, 
+                              84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80, 92: 81, 93: 82, 
+                              94: 83, 95: 84, 96: 85, 97: 86, 98: 87, 99: 88, 100: 89, 101: 90, 102: 91, 
+                              103: 92, 104: 93, 105: 94, 106: 95, 107: 96, 108: 97, 109: 98, 110: 99, 
+                              111: 100, 112: 101, 113: 102, 114: 103, 115: 104, 116: 105, 117: 106, 118: 107, 
+                              119: 108, 120: 109, 121: 110, 122: 111, 123: 112, 124: 113, 125: 114, 126: 115, 
+                              127: 116, 128: 117, 129: 118, 130: 119, 131: 120, 132: 121, 133: 122, 134: 123, 
+                              135: 124, 136: 125, 137: 126, 138: 127, 139: 128, 140: 129, 141: 130, 142: 131, 
+                              143: 132, 144: 133, 145: 134, 146: 135, 147: 136, 148: 137, 149: 138, 150: 139, 
+                              151: 140, 152: 141, 153: 142, 154: 143, 155: 144, 156: 145, 157: 146, 158: 147, 
+                              159: 148, 160: 149, 161: 150, 162: 151, 163: 152, 164: 153, 165: 154, 166: 155, 
+                              167: 156, 168: 157, 169: 158, 170: 159, 171: 160, 172: 161, 173: 162, 174: 163, 
+                              175: 164, 176: 165, 177: 166, 178: 167, 179: 168, 180: 169, 181: 170, 182: 171,
+                              12: 0, 26: 0, 29: 0, 30: 0, 45: 0, 66: 0, 68: 0, 69: 0, 71: 0, 83: 0, 91: 0}
+
+
+    def relabel(self, labelmap):
+        # label
+        label_copy = labelmap.copy()
+        for k, v in self.id_to_trainid.items():
+            label_copy[labelmap == k] = v
+        return label_copy.astype(np.uint8)
+
+    def reduce_one(self, labelmap):
+        return (labelmap - 1).astype(np.uint8)
+
+    def add_one(self, labelmap):
+        return (labelmap + 1).astype(np.uint8)
+
+    def evaluate(self, pred_dir, gt_dir):
+        img_cnt = 0
+        for filename in os.listdir(pred_dir):
+            print(filename)
+            pred_path = os.path.join(pred_dir, filename)
+            gt_path = os.path.join(gt_dir, filename)
+            predmap = ImageHelper.img2np(ImageHelper.read_image(pred_path, tool='pil', mode='P'))
+            gtmap = ImageHelper.img2np(ImageHelper.read_image(gt_path, tool='pil', mode='P'))
+
+            predmap = self.relabel(predmap)
+            gtmap = self.relabel(gtmap)
+            gtmap[gtmap == 0] = 255
+
+            self.seg_running_score.update(predmap[np.newaxis, :, :], gtmap[np.newaxis, :, :])
+            img_cnt += 1
+
+        Log.info('Evaluate {} images'.format(img_cnt))
+        Log.info('mIOU: {}'.format(self.seg_running_score.get_mean_iou()))
+        Log.info('Pixel ACC: {}'.format(self.seg_running_score.get_pixel_acc()))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--configs', default=None, type=str,
+                        dest='configs', help='The configs file of pose.')
+    parser.add_argument('--gt_dir', default=None, type=str,
+                        dest='gt_dir', help='The groundtruth annotations.')
+    parser.add_argument('--pred_dir', default=None, type=str,
+                        dest='pred_dir', help='The label dir of predict annotations.')
+    args = parser.parse_args()
+
+    cocostuff_evaluator = COCOStuffEvaluator(Configer(configs=args.configs))
+    cocostuff_evaluator.evaluate(args.pred_dir, args.gt_dir)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/pascal_context_evaluator.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/pascal_context_evaluator.py
new file mode 100644
index 0000000..455b5b6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/pascal_context_evaluator.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: RainbowSecret(yuyua@microsoft.com)
+
+
+import argparse
+import os
+import pdb
+
+import numpy as np
+
+from lib.utils.helpers.image_helper import ImageHelper
+from lib.utils.tools.logger import Logger as Log
+from lib.utils.tools.configer import Configer
+from lib.metrics.running_score import RunningScore
+
+
+class PascalContextEvaluator(object):
+    def __init__(self, configer):
+        self.configer = configer
+        self.seg_running_score = RunningScore(configer)
+
+    def relabel(self, labelmap):
+        return (labelmap - 1).astype(np.uint8)
+
+    def evaluate(self, pred_dir, gt_dir):
+        img_cnt = 0
+        for filename in os.listdir(pred_dir):
+            print(filename)
+            
+            pred_path = os.path.join(pred_dir, filename)
+            gt_path = os.path.join(gt_dir, filename)
+            predmap = ImageHelper.img2np(ImageHelper.read_image(pred_path, tool='pil', mode='P'))
+            gtmap = ImageHelper.img2np(ImageHelper.read_image(gt_path, tool='pil', mode='P'))
+
+            predmap = self.relabel(predmap)
+            gtmap = self.relabel(gtmap)
+
+            self.seg_running_score.update(predmap[np.newaxis, :, :], gtmap[np.newaxis, :, :])
+            img_cnt += 1
+
+        Log.info('Evaluate {} images'.format(img_cnt))
+        Log.info('mIOU: {}'.format(self.seg_running_score.get_mean_iou()))
+        Log.info('Pixel ACC: {}'.format(self.seg_running_score.get_pixel_acc()))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--configs', default=None, type=str,
+                        dest='configs', help='The configs file of pose.')
+    parser.add_argument('--gt_dir', default=None, type=str,
+                        dest='gt_dir', help='The groundtruth annotations.')
+    parser.add_argument('--pred_dir', default=None, type=str,
+                        dest='pred_dir', help='The label dir of predict annotations.')
+    args = parser.parse_args()
+
+    pcontext_evaluator = PascalContextEvaluator(Configer(configs=args.configs))
+    pcontext_evaluator.evaluate(args.pred_dir, args.gt_dir)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score.py
new file mode 100644
index 0000000..af610cd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score.py
@@ -0,0 +1,213 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie, RainbowSecret, Donny You
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import numpy as np
+import models.protoseg_core.lib.utils.distributed as dist
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+class SimpleCounterRunningScore(object):
+
+    def __init__(self, ignore_index=-1):
+        self.correct_count = 0
+        self.total_count = 0
+        self.reduced_counters = None
+
+    def update(self, pred, gt):
+        self.reduced_counters = None
+        self.correct_count += (pred == gt).sum()
+        self.total_count += (gt != -1).sum()
+
+    def reduce_scores(self):
+        counters = np.array([self.total_count, self.correct_count])
+        if dist.is_distributed():
+            counters = dist.all_reduce_numpy(counters)
+        self.reduced_counters = counters
+
+    def get_mean_acc(self):
+        if self.reduced_counters is None:
+            self.reduce_scores()
+        total_count, correct_count = list(self.reduced_counters)        
+        return correct_count / max(1, total_count)
+
+    def reset(self):
+        self.correct_count = self.total_count = 0
+        self.reduced_counters = None
+        
+
+class MultiLabelRunningScore(object):
+    """
+    Suppose label[p] is N-dim multi-hot vector, and pred[p] is N-dim logits. THRESHOLD is the threshold.
+    We consider a location `p` as correct, if either is true:
+     1) label[p] has at least one non-zero elements and label[p][argmax(pred[p])] == 1, i.e., prediction with highest confidence is correct.
+     2) label[p] are all zeros, and all elements of pred[p] are lower than threshold.
+    """
+
+    def __init__(self, ignore_index=-1):
+        self.ignore_index = ignore_index
+        self.correct_count = 0
+        self.total_count = 0
+        self.reduced_counters = None
+
+    def update(self, dir_pred, dir_gt, keep_mask):
+        self.reduced_counters = None
+        keep_mask = keep_mask & (dir_gt.sum(axis=-1) > 0)
+        dir_gt = dir_gt[keep_mask, :]
+
+        dir_pred = dir_pred[keep_mask]
+        guess_index = dir_pred
+        no_offset_mask = dir_pred == dir_gt.shape[-1]
+        dir_pred[no_offset_mask] = 0
+
+        guess_index = np.arange(guess_index.shape[0]) * dir_gt.shape[-1] + guess_index
+
+        correct = np.take(
+            dir_gt,
+            guess_index,
+        )
+
+        correct = ((correct != 0) & ~no_offset_mask).sum()
+        total = dir_gt.shape[0]
+
+        self.total_count += total
+        self.correct_count += correct
+
+    def reduce_scores(self):
+        counters = np.array([self.total_count, self.correct_count])
+        if dist.is_distributed():
+            counters = dist.all_reduce_numpy(counters)
+        self.reduced_counters = counters
+
+    def _get_scores(self):
+        """Returns accuracy score evaluation result.
+            - overall accuracy
+            - mean accuracy
+            - mean IU
+            - fwavacc
+        """
+        if self.reduced_counters is None:
+            self.reduce_scores()
+        total_count, correct_count = list(self.reduced_counters)
+        return correct_count / max(1, total_count), 0, 0, 0
+
+    def get_mean_iou(self):
+        return self._get_scores()[3]
+
+    def get_pixel_acc(self):
+        return self._get_scores()[0]
+
+    def get_mean_acc(self):
+        return self._get_scores()[1]
+
+    def reset(self):
+        self.total_count = self.correct_count = 0
+        self.reduced_counters = None
+
+
+
+class RunningScore(object):
+
+    def __init__(self, configer, num_classes=None, ignore_index=None):
+        self.configer = configer
+        if num_classes is None:
+            self.n_classes = self.configer.get('data', 'num_classes')
+        else:
+            self.n_classes = num_classes
+        Log.info(self.n_classes)
+        self.ignore_index = ignore_index
+        self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
+        self.reduced_confusion_matrix = None
+
+    def get_F1_score(self):
+        assert self.n_classes == 2
+        TN, FN, FP, TP = self.confusion_matrix.flatten()
+        precision = TP / (TP + FP)
+        recall = TP / (TP + FN)
+        return 2 / (1 / precision + 1 / recall), precision, recall       
+
+    def _fast_hist(self, label_true, label_pred, n_class):
+        mask = (label_true >= 0) & (label_true < n_class)
+        mask &= (label_pred >= 0) & (label_pred < n_class)
+
+        if self.ignore_index is not None:
+            mask = mask & (label_true != self.ignore_index)
+            
+        hist = np.bincount(
+            n_class * label_true[mask].astype(int) +
+            label_pred[mask], minlength=n_class**2)
+
+        # print(np.unique(label_true))
+        # print(np.unique(label_pred))
+        hist = hist.reshape(n_class, n_class)
+
+        return hist
+
+    def update(self, label_preds, label_trues):
+        self.reduced_confusion_matrix = None
+        for lt, lp in zip(label_trues, label_preds):
+            self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+
+    def reduce_scores(self):
+        if dist.is_distributed():
+            hist = dist.all_reduce_numpy(self.confusion_matrix)
+        else:
+            hist = self.confusion_matrix
+        self.reduced_confusion_matrix = hist
+
+    def _get_scores(self):
+        """Returns accuracy score evaluation result.
+            - overall accuracy
+            - mean accuracy
+            - mean IU
+            - fwavacc
+        """
+        if self.reduced_confusion_matrix is None:
+            self.reduce_scores()
+        hist = self.reduced_confusion_matrix
+
+        acc = np.diag(hist).sum() / hist.sum()
+        acc_cls_list = acc_cls = np.diag(hist) / hist.sum(axis=1)
+        # print(acc_cls)
+        # print('category-wise mean accuracy: ', acc_cls)
+
+        acc_cls = np.nanmean(acc_cls)
+        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+        # print(iu)
+        # print(np.nanmean(iu[iu > 0]))
+        # print('category-wise mean iou: ', iu)
+
+        mean_iu = np.nanmean(iu)
+        freq = hist.sum(axis=1) / hist.sum()
+        fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+        cls_iu = dict(zip(range(self.n_classes), iu))
+
+        return acc, acc_cls_list, fwavacc, mean_iu, cls_iu
+
+    def get_mean_iou(self):
+        return self._get_scores()[3]
+
+    def get_pixel_acc(self):
+        return self._get_scores()[0]
+
+    def get_mean_acc(self):
+        return self._get_scores()[1]
+
+    def get_cls_iou(self):
+        return self._get_scores()[-1]
+
+    def reset(self):
+        self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
+        self.reduced_confusion_matrix = None
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score_mp.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score_mp.py
new file mode 100644
index 0000000..6d0ccd7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/metrics/running_score_mp.py
@@ -0,0 +1,99 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie, RainbowSecret, Donny You
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import numpy as np
+
+class RunningScore(object):
+
+    def __init__(self, configer, num_classes=None, ignore_index=None):
+        self.configer = configer
+        if num_classes is None:
+            self.n_classes = self.configer.get('data', 'num_classes')
+        else:
+            self.n_classes = num_classes
+        self.ignore_index = ignore_index
+        self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
+
+    def get_F1_score(self):
+        assert self.n_classes == 2
+        TN, FN, FP, TP = self.confusion_matrix.flatten()
+        precision = TP / (TP + FP)
+        recall = TP / (TP + FN)
+        return 2 / (1 / precision + 1 / recall), precision, recall        
+
+    def _fast_hist(self, label_true, label_pred, n_class):
+        mask = (label_true >= 0) & (label_true < n_class)
+
+        if self.ignore_index is not None:
+            mask = mask & (label_true != self.ignore_index)
+            
+        hist = np.bincount(
+            n_class * label_true[mask].astype(int) +
+            label_pred[mask], minlength=n_class**2)
+
+        # print(np.unique(label_true))
+        # print(np.unique(label_pred))
+        hist = hist.reshape(n_class, n_class)
+
+        return hist
+
+    def hist(self, label_preds, label_trues):
+        cm = 0.
+        for lt, lp in zip(label_trues, label_preds):
+            cm = cm + self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+        return cm
+
+    def gather_hist(self, hists):
+        for x in hists:
+            self.confusion_matrix += x
+
+    def _get_scores(self):
+        """Returns accuracy score evaluation result.
+            - overall accuracy
+            - mean accuracy
+            - mean IU
+            - fwavacc
+        """
+        hist = self.confusion_matrix
+        acc = np.diag(hist).sum() / hist.sum()
+        acc_cls = np.diag(hist) / hist.sum(axis=1)
+        # print('category-wise mean accuracy: ', acc_cls)
+
+        acc_cls = np.nanmean(acc_cls)
+        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+        # print('category-wise mean iou: ', iu)
+
+        mean_iu = np.nanmean(iu)
+        freq = hist.sum(axis=1) / hist.sum()
+        fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+        cls_iu = dict(zip(range(self.n_classes), iu))
+
+        return acc, acc_cls, fwavacc, mean_iu, cls_iu
+
+    def get_mean_iou(self):
+        return self._get_scores()[3]
+
+    def get_cls_iu(self):
+        return self._get_scores()[4]
+
+    def get_pixel_acc(self):
+        return self._get_scores()[0]
+
+    def get_mean_acc(self):
+        return self._get_scores()[1]
+
+    def reset(self):
+        self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/backbone_selector.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/backbone_selector.py
new file mode 100644
index 0000000..9569963
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/backbone_selector.py
@@ -0,0 +1,63 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Donny You, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from models.protoseg_core.lib.models.backbones.resnet.resnet_backbone import ResNetBackbone
+from models.protoseg_core.lib.models.backbones.hrnet.hrnet_backbone import HRNetBackbone
+from models.protoseg_core.lib.models.backbones.pvt.pvt_backbone import PVTBackbone
+from models.protoseg_core.lib.models.backbones.pvt.pcpvt_backbone import PCPVTBackbone
+from models.protoseg_core.lib.models.backbones.pvt.svt_backbone import SVTBackbone
+from models.protoseg_core.lib.models.backbones.mobilenet.mobilenet_v1 import MobileNetV1Backbone
+from models.protoseg_core.lib.models.backbones.mobilenet.mobilenet_v2 import MobileNetV2Backbone
+from models.protoseg_core.lib.models.backbones.mobilenet.mobilenet_v3 import MobileNetV3Backbone
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class BackboneSelector(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    def get_backbone(self, **params):
+        backbone = self.configer.get('network', 'backbone')
+
+        model = None
+        if ('resnet' in backbone or 'resnext' in backbone or 'resnest' in backbone) and 'senet' not in backbone:
+            model = ResNetBackbone(self.configer)(**params)
+
+        elif 'hrne' in backbone:
+            model = HRNetBackbone(self.configer)(**params)
+
+        elif 'pcpvt' in backbone:
+            model = PCPVTBackbone(self.configer)(**params)
+
+        elif 'pvt' in backbone:
+            model = PVTBackbone(self.configer)(**params)
+
+        elif 'svt' in backbone:
+            model = SVTBackbone(self.configer)(**params)
+
+        elif 'mobilenet_v1' in backbone:
+            model = MobileNetV1Backbone(self.configer)(**params)
+        elif 'mobilenet_v2' in backbone:
+            model = MobileNetV2Backbone(self.configer)(**params)
+        elif 'mobilenet_v3' in backbone:
+            model = MobileNetV3Backbone(self.configer)(**params)
+
+        else:
+            Log.error('Backbone {} is invalid.'.format(backbone))
+            exit(1)
+
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_backbone.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_backbone.py
new file mode 100644
index 0000000..ec2cc8e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_backbone.py
@@ -0,0 +1,803 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# Modified by Rainbowsecret (yuyua@microsoft.com)
+# "High-Resolution Representations for Labeling Pixels and Regions"
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pdb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+if torch.__version__.startswith('1'):
+    relu_inplace = True
+else:
+    relu_inplace = False
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, bn_type=None, bn_momentum=0.1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes, momentum=bn_momentum)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes, momentum=bn_momentum)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, bn_type=None, bn_momentum=0.1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes, momentum=bn_momentum)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes, momentum=bn_momentum)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * 4, momentum=bn_momentum)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method, multi_scale_output=True, bn_type=None, bn_momentum=0.1):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(
+            num_branches, blocks, num_blocks, num_inchannels, num_channels)
+
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+
+        self.multi_scale_output = multi_scale_output
+
+        self.branches = self._make_branches(
+            num_branches, blocks, num_blocks, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+        self.fuse_layers = self._make_fuse_layers(bn_type=bn_type, bn_momentum=bn_momentum)
+        self.relu = nn.ReLU(inplace=False)
+
+    def _check_branches(self, num_branches, blocks, num_blocks,
+                        num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+                num_branches, len(num_blocks))
+            Log.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_channels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+                num_branches, len(num_channels))
+            Log.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_inchannels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+                num_branches, len(num_inchannels))
+            Log.error(error_msg)
+            raise ValueError(error_msg)
+
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
+                         stride=1, bn_type=None, bn_momentum=0.1):
+        downsample = None
+        if stride != 1 or \
+                self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(
+                    num_channels[branch_index] * block.expansion,
+                    momentum=bn_momentum
+                ),
+            )
+
+        layers = []
+        layers.append(
+            block(
+                self.num_inchannels[branch_index],
+                num_channels[branch_index],
+                stride,
+                downsample,
+                bn_type=bn_type,
+                bn_momentum=bn_momentum
+            )
+        )
+        self.num_inchannels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(
+                block(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index],
+                    bn_type=bn_type,
+                    bn_momentum=bn_momentum
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def _make_branches(self, num_branches, block, num_blocks, num_channels, bn_type, bn_momentum=0.1):
+        branches = []
+
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+            )
+
+        return nn.ModuleList(branches)
+
+    def _make_fuse_layers(self, bn_type, bn_momentum=0.1):
+        if self.num_branches == 1:
+            return None
+
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_inchannels[j],
+                                num_inchannels[i],
+                                1,
+                                1,
+                                0,
+                                bias=False
+                            ),
+                            ModuleHelper.BatchNorm2d(bn_type=bn_type)(num_inchannels[i], momentum=bn_momentum),
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False
+                                    ),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(num_outchannels_conv3x3,
+                                                                              momentum=bn_momentum)
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False
+                                    ),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(num_outchannels_conv3x3,
+                                                                              momentum=bn_momentum),
+                                    nn.ReLU(inplace=False)
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+
+        return nn.ModuleList(fuse_layers)
+
+    def get_num_inchannels(self):
+        return self.num_inchannels
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+
+        x_fuse = []
+
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                elif j > i:
+                    width_output = x[i].shape[-1]
+                    height_output = x[i].shape[-2]
+                    y = y + F.interpolate(
+                        self.fuse_layers[i][j](x[j]),
+                        size=[height_output, width_output],
+                        mode='bilinear',
+                        align_corners=True)
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+
+        return x_fuse
+
+
+blocks_dict = {
+    'BASIC': BasicBlock,
+    'BOTTLENECK': Bottleneck
+}
+
+
+class HighResolutionNet(nn.Module):
+
+    def __init__(self, cfg, bn_type, bn_momentum, **kwargs):
+        self.inplanes = 64
+        super(HighResolutionNet, self).__init__()
+
+        if os.environ.get('full_res_stem'):
+            Log.info("using full-resolution stem with stride=1")
+            stem_stride = 1
+            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=stem_stride, padding=1,
+                                   bias=False)
+            self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(64, momentum=bn_momentum)
+            self.relu = nn.ReLU(inplace=False)
+            self.layer1 = self._make_layer(Bottleneck, 64, 64, 4, bn_type=bn_type, bn_momentum=bn_momentum)
+        else:
+            stem_stride = 2
+            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=stem_stride, padding=1,
+                                   bias=False)
+            self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(64, momentum=bn_momentum)
+            self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=stem_stride, padding=1,
+                                   bias=False)
+            self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(64, momentum=bn_momentum)
+            self.relu = nn.ReLU(inplace=False)
+            self.layer1 = self._make_layer(Bottleneck, 64, 64, 4, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        self.stage2_cfg = cfg['STAGE2']
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+
+        self.transition1 = self._make_transition_layer([256], num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        self.stage3_cfg = cfg['STAGE3']
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        self.stage4_cfg = cfg['STAGE4']
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True, bn_type=bn_type, bn_momentum=bn_momentum)
+
+        if os.environ.get('keep_imagenet_head'):
+            self.incre_modules, self.downsamp_modules, \
+            self.final_layer = self._make_head(pre_stage_channels, bn_type=bn_type, bn_momentum=bn_momentum)
+
+    def _make_head(self, pre_stage_channels, bn_type, bn_momentum):
+        head_block = Bottleneck
+        head_channels = [32, 64, 128, 256]
+
+        Log.info("pre_stage_channels: {}".format(pre_stage_channels))
+        Log.info("head_channels: {}".format(head_channels))
+
+        # Increasing the #channels on each resolution 
+        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
+        incre_modules = []
+        for i, channels in enumerate(pre_stage_channels):
+            incre_module = self._make_layer(head_block,
+                                            channels,
+                                            head_channels[i],
+                                            1,
+                                            bn_type=bn_type,
+                                            bn_momentum=bn_momentum
+                                            )
+            incre_modules.append(incre_module)
+        incre_modules = nn.ModuleList(incre_modules)
+
+        # downsampling modules
+        downsamp_modules = []
+        for i in range(len(pre_stage_channels) - 1):
+            in_channels = head_channels[i] * head_block.expansion
+            out_channels = head_channels[i + 1] * head_block.expansion
+
+            downsamp_module = nn.Sequential(
+                nn.Conv2d(in_channels=in_channels,
+                          out_channels=out_channels,
+                          kernel_size=3,
+                          stride=2,
+                          padding=1),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(out_channels, momentum=bn_momentum),
+                nn.ReLU(inplace=False)
+            )
+            downsamp_modules.append(downsamp_module)
+        downsamp_modules = nn.ModuleList(downsamp_modules)
+
+        final_layer = nn.Sequential(
+            nn.Conv2d(
+                in_channels=head_channels[3] * head_block.expansion,
+                out_channels=2048,
+                kernel_size=1,
+                stride=1,
+                padding=0
+            ),
+            ModuleHelper.BatchNorm2d(bn_type=bn_type)(2048, momentum=bn_momentum),
+            nn.ReLU(inplace=False)
+        )
+        return incre_modules, downsamp_modules, final_layer
+
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer, bn_type, bn_momentum):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3,
+                                1,
+                                1,
+                                bias=False
+                            ),
+                            ModuleHelper.BatchNorm2d(bn_type=bn_type)(num_channels_cur_layer[i], momentum=bn_momentum),
+                            nn.ReLU(inplace=False)
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                inchannels,
+                                outchannels,
+                                3,
+                                2,
+                                1,
+                                bias=False
+                            ),
+                            ModuleHelper.BatchNorm2d(bn_type=bn_type)(outchannels, momentum=bn_momentum),
+                            nn.ReLU(inplace=False)
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_layer(self, block, inplanes, planes, blocks, stride=1, bn_type=None, bn_momentum=0.1):
+        downsample = None
+        if stride != 1 or inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes, planes * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * block.expansion, momentum=bn_momentum)
+            )
+
+        layers = []
+        layers.append(block(inplanes, planes, stride, downsample, bn_type=bn_type, bn_momentum=bn_momentum))
+
+        inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(inplanes, planes, bn_type=bn_type, bn_momentum=bn_momentum))
+
+        return nn.Sequential(*layers)
+
+    def _make_stage(self, layer_config, num_inchannels,
+                    multi_scale_output=True, bn_type=None, bn_momentum=0.1):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        fuse_method = layer_config['FUSE_METHOD']
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    fuse_method,
+                    reset_multi_scale_output,
+                    bn_type,
+                    bn_momentum
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+
+        return nn.Sequential(*modules), num_inchannels
+
+    def forward(self, x):
+
+        if os.environ.get('full_res_stem'):
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.relu(x)
+        else:
+            x = self.conv1(x)
+            x = self.bn1(x)
+            x = self.relu(x)
+            x = self.conv2(x)
+            x = self.bn2(x)
+            x = self.relu(x)
+
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        if os.environ.get('drop_stage4'):
+            return y_list
+
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+
+        if os.environ.get('keep_imagenet_head'):
+            # Classification Head
+            x_list = []
+            y = self.incre_modules[0](y_list[0])
+            x_list.append(y)
+            for i in range(len(self.downsamp_modules)):
+                y = self.incre_modules[i + 1](y_list[i + 1]) + \
+                    self.downsamp_modules[i](y)
+                x_list.append(y)
+
+            y = self.final_layer(y)
+            del x_list[-1]
+            x_list.append(y)
+
+            return x_list
+
+        return y_list
+
+
+class HighResolutionNext(nn.Module):
+
+    def __init__(self, cfg, bn_type, **kwargs):
+        super(HighResolutionNext, self).__init__()
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(64)
+        self.relu = nn.ReLU(relu_inplace)
+
+        self.stage1_cfg = cfg['STAGE1']
+        num_channels = self.stage1_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage1_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition0 = self._make_transition_layer([64], num_channels, bn_type=bn_type)
+        self.stage1, pre_stage_channels = self._make_stage(
+            self.stage1_cfg, num_channels, bn_type=bn_type)
+
+        self.stage2_cfg = cfg['STAGE2']
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer(
+            pre_stage_channels, num_channels, bn_type=bn_type)
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels, bn_type=bn_type)
+
+        self.stage3_cfg = cfg['STAGE3']
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_channels, bn_type=bn_type)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels, bn_type=bn_type)
+
+        self.stage4_cfg = cfg['STAGE4']
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_channels, bn_type=bn_type)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True, bn_type=bn_type)
+
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer, bn_type):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3, 1, 1, bias=False
+                            ),
+                            ModuleHelper.BatchNorm2d(bn_type=bn_type)(num_channels_cur_layer[i]),
+                            nn.ReLU(relu_inplace)
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i - num_branches_pre else inchannels
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                inchannels, outchannels, 3, 2, 1, bias=False
+                            ),
+                            ModuleHelper.BatchNorm2d(bn_type=bn_type)(outchannels),
+                            nn.ReLU(relu_inplace)
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_stage(self, layer_config, num_inchannels,
+                    multi_scale_output=True, bn_type=None):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        fuse_method = layer_config['FUSE_METHOD']
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    fuse_method,
+                    reset_multi_scale_output,
+                    bn_type
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+
+        return nn.Sequential(*modules), num_inchannels
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x_list = []
+        for i in range(self.stage1_cfg['NUM_BRANCHES']):
+            if self.transition0[i] is not None:
+                x_list.append(self.transition0[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage1(x_list)
+
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                if i == 0:
+                    x_list.append(self.transition1[i](y_list[0]))
+                else:
+                    x_list.append(self.transition1[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage2(x_list)
+
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        x = self.stage4(x_list)
+        return x
+
+
+class HRNetBackbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+        resume = self.configer.get('network', 'resume')
+        from models.protoseg_core.lib.models.backbones.hrnet.hrnet_config import MODEL_CONFIGS
+
+        if arch == 'hrnet18':
+            arch_net = HighResolutionNet(MODEL_CONFIGS['hrnet18'],
+                                         bn_type='torchsyncbn',
+                                         bn_momentum=0.1)
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='hrnet')
+
+        elif arch == 'hrnet32':
+            arch_net = HighResolutionNet(MODEL_CONFIGS['hrnet32'],
+                                         bn_type='torchsyncbn',
+                                         bn_momentum=0.1)
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='hrnet')
+
+        elif arch == 'hrnet48':
+            arch_net = HighResolutionNet(MODEL_CONFIGS['hrnet48'],
+                                         bn_type='torchsyncbn',
+                                         bn_momentum=0.1)
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='hrnet')
+
+        elif arch == 'hrnet64':
+            arch_net = HighResolutionNet(MODEL_CONFIGS['hrnet64'],
+                                         bn_type='torchsyncbn',
+                                         bn_momentum=0.1)
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='hrnet')
+
+        elif arch == 'hrnet2x20':
+            arch_net = HighResolutionNext(MODEL_CONFIGS['hrnet2x20'],
+                                          bn_type=self.configer.get('network', 'bn_type'))
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='hrnet')
+
+        else:
+            raise Exception('Architecture undefined!')
+
+        return arch_net
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_config.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_config.py
new file mode 100644
index 0000000..fa22ea4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/hrnet/hrnet_config.py
@@ -0,0 +1,181 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Create by Bin Xiao (Bin.Xiao@microsoft.com)
+# Modified by Ke Sun (sunk@mail.ustc.edu.cn), Rainbowsecret (yuyua@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from yacs.config import CfgNode as CN
+
+# configs for HRNet64
+HRNET_64 = CN()
+HRNET_64.STEM_INPLANES = 64
+HRNET_64.FINAL_CONV_KERNEL = 1
+HRNET_64.WITH_HEAD = True
+
+HRNET_64.STAGE2 = CN()
+HRNET_64.STAGE2.NUM_MODULES = 1
+HRNET_64.STAGE2.NUM_BRANCHES = 2
+HRNET_64.STAGE2.NUM_BLOCKS = [4, 4]
+HRNET_64.STAGE2.NUM_CHANNELS = [64, 128]
+HRNET_64.STAGE2.BLOCK = 'BASIC'
+HRNET_64.STAGE2.FUSE_METHOD = 'SUM'
+
+HRNET_64.STAGE3 = CN()
+HRNET_64.STAGE3.NUM_MODULES = 4
+HRNET_64.STAGE3.NUM_BRANCHES = 3
+HRNET_64.STAGE3.NUM_BLOCKS = [4, 4, 4]
+HRNET_64.STAGE3.NUM_CHANNELS = [64, 128, 256]
+HRNET_64.STAGE3.BLOCK = 'BASIC'
+HRNET_64.STAGE3.FUSE_METHOD = 'SUM'
+
+HRNET_64.STAGE4 = CN()
+HRNET_64.STAGE4.NUM_MODULES = 3
+HRNET_64.STAGE4.NUM_BRANCHES = 4
+HRNET_64.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+HRNET_64.STAGE4.NUM_CHANNELS = [64, 128, 256, 512]
+HRNET_64.STAGE4.BLOCK = 'BASIC'
+HRNET_64.STAGE4.FUSE_METHOD = 'SUM'
+
+
+# configs for HRNet48
+HRNET_48 = CN()
+HRNET_48.STEM_INPLANES = 64
+HRNET_48.FINAL_CONV_KERNEL = 1
+HRNET_48.WITH_HEAD = True
+
+HRNET_48.STAGE2 = CN()
+HRNET_48.STAGE2.NUM_MODULES = 1
+HRNET_48.STAGE2.NUM_BRANCHES = 2
+HRNET_48.STAGE2.NUM_BLOCKS = [4, 4]
+HRNET_48.STAGE2.NUM_CHANNELS = [48, 96]
+HRNET_48.STAGE2.BLOCK = 'BASIC'
+HRNET_48.STAGE2.FUSE_METHOD = 'SUM'
+
+HRNET_48.STAGE3 = CN()
+HRNET_48.STAGE3.NUM_MODULES = 4
+HRNET_48.STAGE3.NUM_BRANCHES = 3
+HRNET_48.STAGE3.NUM_BLOCKS = [4, 4, 4]
+HRNET_48.STAGE3.NUM_CHANNELS = [48, 96, 192]
+HRNET_48.STAGE3.BLOCK = 'BASIC'
+HRNET_48.STAGE3.FUSE_METHOD = 'SUM'
+
+HRNET_48.STAGE4 = CN()
+HRNET_48.STAGE4.NUM_MODULES = 3
+HRNET_48.STAGE4.NUM_BRANCHES = 4
+HRNET_48.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+HRNET_48.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
+HRNET_48.STAGE4.BLOCK = 'BASIC'
+HRNET_48.STAGE4.FUSE_METHOD = 'SUM'
+
+
+# configs for HRNet32
+HRNET_32 = CN()
+HRNET_32.PRETRAINED_LAYERS = ['*']
+HRNET_32.STEM_INPLANES = 64
+HRNET_32.FINAL_CONV_KERNEL = 1
+HRNET_32.WITH_HEAD = True
+
+HRNET_32.STAGE2 = CN()
+HRNET_32.STAGE2.NUM_MODULES = 1
+HRNET_32.STAGE2.NUM_BRANCHES = 2
+HRNET_32.STAGE2.NUM_BLOCKS = [4, 4]
+HRNET_32.STAGE2.NUM_CHANNELS = [32, 64]
+HRNET_32.STAGE2.BLOCK = 'BASIC'
+HRNET_32.STAGE2.FUSE_METHOD = 'SUM'
+
+HRNET_32.STAGE3 = CN()
+HRNET_32.STAGE3.NUM_MODULES = 4
+HRNET_32.STAGE3.NUM_BRANCHES = 3
+HRNET_32.STAGE3.NUM_BLOCKS = [4, 4, 4]
+HRNET_32.STAGE3.NUM_CHANNELS = [32, 64, 128]
+HRNET_32.STAGE3.BLOCK = 'BASIC'
+HRNET_32.STAGE3.FUSE_METHOD = 'SUM'
+
+HRNET_32.STAGE4 = CN()
+HRNET_32.STAGE4.NUM_MODULES = 3
+HRNET_32.STAGE4.NUM_BRANCHES = 4
+HRNET_32.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+HRNET_32.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+HRNET_32.STAGE4.BLOCK = 'BASIC'
+HRNET_32.STAGE4.FUSE_METHOD = 'SUM'
+
+
+# configs for HRNet18
+HRNET_18 = CN()
+HRNET_18.PRETRAINED_LAYERS = ['*']
+HRNET_18.STEM_INPLANES = 64
+HRNET_18.FINAL_CONV_KERNEL = 1
+HRNET_18.WITH_HEAD = True
+
+HRNET_18.STAGE2 = CN()
+HRNET_18.STAGE2.NUM_MODULES = 1
+HRNET_18.STAGE2.NUM_BRANCHES = 2
+HRNET_18.STAGE2.NUM_BLOCKS = [4, 4]
+HRNET_18.STAGE2.NUM_CHANNELS = [18, 36]
+HRNET_18.STAGE2.BLOCK = 'BASIC'
+HRNET_18.STAGE2.FUSE_METHOD = 'SUM'
+
+HRNET_18.STAGE3 = CN()
+HRNET_18.STAGE3.NUM_MODULES = 4
+HRNET_18.STAGE3.NUM_BRANCHES = 3
+HRNET_18.STAGE3.NUM_BLOCKS = [4, 4, 4]
+HRNET_18.STAGE3.NUM_CHANNELS = [18, 36, 72]
+HRNET_18.STAGE3.BLOCK = 'BASIC'
+HRNET_18.STAGE3.FUSE_METHOD = 'SUM'
+
+HRNET_18.STAGE4 = CN()
+HRNET_18.STAGE4.NUM_MODULES = 3
+HRNET_18.STAGE4.NUM_BRANCHES = 4
+HRNET_18.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+HRNET_18.STAGE4.NUM_CHANNELS = [18, 36, 72, 144]
+HRNET_18.STAGE4.BLOCK = 'BASIC'
+HRNET_18.STAGE4.FUSE_METHOD = 'SUM'
+
+# configs for HRNet2x20
+HRNET2X_20 = CN()
+HRNET2X_20.FINAL_CONV_KERNEL = 1
+
+HRNET2X_20.STAGE1 = CN()
+HRNET2X_20.STAGE1.NUM_MODULES = 1
+HRNET2X_20.STAGE1.NUM_BRANCHES = 2
+HRNET2X_20.STAGE1.NUM_BLOCKS = [4, 4]
+HRNET2X_20.STAGE1.NUM_CHANNELS = [32, 64]
+HRNET2X_20.STAGE1.BLOCK = 'BOTTLENECK'
+HRNET2X_20.STAGE1.FUSE_METHOD = 'SUM'
+
+HRNET2X_20.STAGE2 = CN()
+HRNET2X_20.STAGE2.NUM_MODULES = 1
+HRNET2X_20.STAGE2.NUM_BRANCHES = 3
+HRNET2X_20.STAGE2.NUM_BLOCKS = [4, 4, 4]
+HRNET2X_20.STAGE2.NUM_CHANNELS = [20, 40, 80]
+HRNET2X_20.STAGE2.BLOCK = 'BASIC'
+HRNET2X_20.STAGE2.FUSE_METHOD = 'SUM'
+
+HRNET2X_20.STAGE3 = CN()
+HRNET2X_20.STAGE3.NUM_MODULES = 4
+HRNET2X_20.STAGE3.NUM_BRANCHES = 4
+HRNET2X_20.STAGE3.NUM_BLOCKS = [4, 4, 4, 4]
+HRNET2X_20.STAGE3.NUM_CHANNELS = [20, 40, 80, 160]
+HRNET2X_20.STAGE3.BLOCK = 'BASIC'
+HRNET2X_20.STAGE3.FUSE_METHOD = 'SUM'
+
+HRNET2X_20.STAGE4 = CN()
+HRNET2X_20.STAGE4.NUM_MODULES = 3
+HRNET2X_20.STAGE4.NUM_BRANCHES = 5
+HRNET2X_20.STAGE4.NUM_BLOCKS = [4, 4, 4, 4, 4]
+HRNET2X_20.STAGE4.NUM_CHANNELS = [20, 40, 80, 160, 320]
+HRNET2X_20.STAGE4.BLOCK = 'BASIC'
+HRNET2X_20.STAGE4.FUSE_METHOD = 'SUM'
+
+MODEL_CONFIGS = {
+    'hrnet18': HRNET_18,
+    'hrnet32': HRNET_32,
+    'hrnet48': HRNET_48,
+    'hrnet64': HRNET_64,
+    'hrnet2x20': HRNET2X_20,
+}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v1.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v1.py
new file mode 100644
index 0000000..fb3015f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v1.py
@@ -0,0 +1,90 @@
+import torch.nn as nn
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+class MobileNetV1(nn.Module):
+    def __init__(self, alpha=1.0, input_size=224, include_top=False):
+        self.alpha = alpha
+        self.input_size = input_size
+        self.include_top = include_top
+        super(MobileNetV1, self).__init__()
+
+        def conv_bn(inp, oup, stride):
+            oup = int(oup * self.alpha)
+            return nn.Sequential(
+                nn.ConstantPad2d((0, 1, 0, 1), 0),
+                nn.Conv2d(inp, oup, 3, stride, 0, bias=False),
+                nn.BatchNorm2d(oup, eps=1e-3),
+                nn.ReLU6(inplace=True)
+            )
+
+        def conv_dw(inp, oup, stride):
+            inp = int(inp * self.alpha)
+            oup = int(oup * self.alpha)
+            if stride == 2:
+                return nn.Sequential(
+                    # DepthwiseConv2D
+                    nn.ConstantPad2d((0, 1, 0, 1), 0),
+                    nn.Conv2d(inp, inp, 3, stride, 0, groups=inp, bias=False),
+                    nn.BatchNorm2d(inp, eps=1e-3),
+                    nn.ReLU6(inplace=True),
+
+                    nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                    nn.BatchNorm2d(oup, eps=1e-3),
+                    nn.ReLU6(inplace=True),
+                )
+            else:
+                return nn.Sequential(
+                    # DepthwiseConv2D
+                    nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                    nn.BatchNorm2d(inp, eps=1e-3),
+                    nn.ReLU6(inplace=True),
+
+                    nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                    nn.BatchNorm2d(oup, eps=1e-3),
+                    nn.ReLU6(inplace=True),
+                )
+
+        self.model = nn.Sequential(
+            conv_bn(3, 32, 2),
+            conv_dw(32, 64, 1),
+            conv_dw(64, 128, 2),
+            conv_dw(128, 128, 1),
+            conv_dw(128, 256, 2),
+            conv_dw(256, 256, 1),
+            conv_dw(256, 512, 2),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 1024, 2),
+            conv_dw(1024, 1024, 1),
+        )
+
+    def forward(self, x):
+        x = self.model(x)
+
+        return x
+
+
+class MobileNetV1Backbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+        resume = self.configer.get('network', 'resume')
+
+        if arch == 'mobilenet_v1':
+            arch_net = MobileNetV1()
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='mobilenet_v1')
+
+        else:
+            raise Exception('Architecture undefined!')
+
+        return arch_net
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v2.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v2.py
new file mode 100644
index 0000000..45e42e7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v2.py
@@ -0,0 +1,154 @@
+import torch.nn as nn
+import math
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def conv_3x3_bn(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.ReLU6(inplace=True)
+    )
+
+
+def conv_1x1_bn(inp, oup):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.ReLU6(inplace=True)
+    )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+
+        hidden_dim = round(inp * expand_ratio)
+        self.identity = stride == 1 and inp == oup
+
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.identity:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+    def __init__(self, num_classes=1000, width_mult=1.):
+        super(MobileNetV2, self).__init__()
+        # setting of inverted residual blocks
+        self.cfgs = [
+            # t, c, n, s
+            [1,  16, 1, 1],
+            [6,  24, 2, 2],
+            [6,  32, 3, 2],
+            [6,  64, 4, 2],
+            [6,  96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+
+        # building first layer
+        input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8)
+        layers = [conv_3x3_bn(3, input_channel, 2)]
+        # building inverted residual blocks
+        block = InvertedResidual
+        for t, c, n, s in self.cfgs:
+            output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
+            for i in range(n):
+                layers.append(block(input_channel, output_channel, s if i == 0 else 1, t))
+                input_channel = output_channel
+        self.features = nn.Sequential(*layers)
+        # building last several layers
+        output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280
+        self.conv = conv_1x1_bn(input_channel, output_channel)
+
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.conv(x)
+        return x
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+
+
+class MobileNetV2Backbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+        resume = self.configer.get('network', 'resume')
+
+        if arch == 'mobilenet_v2':
+            arch_net = MobileNetV2()
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='mobilenet_v2')
+
+        else:
+            raise Exception('Architecture undefined!')
+
+        return arch_net
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v3.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v3.py
new file mode 100644
index 0000000..fb660e6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/mobilenet/mobilenet_v3.py
@@ -0,0 +1,254 @@
+import torch.nn as nn
+import math
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+__all__ = ['mobilenetv3_large', 'mobilenetv3_small']
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class h_sigmoid(nn.Module):
+    def __init__(self, inplace=True):
+        super(h_sigmoid, self).__init__()
+        self.relu = nn.ReLU6(inplace=inplace)
+
+    def forward(self, x):
+        return self.relu(x + 3) / 6
+
+
+class h_swish(nn.Module):
+    def __init__(self, inplace=True):
+        super(h_swish, self).__init__()
+        self.sigmoid = h_sigmoid(inplace=inplace)
+
+    def forward(self, x):
+        return x * self.sigmoid(x)
+
+
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=4):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+                nn.Linear(channel, _make_divisible(channel // reduction, 8)),
+                nn.ReLU(inplace=True),
+                nn.Linear(_make_divisible(channel // reduction, 8), channel),
+                h_sigmoid()
+        )
+
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+def conv_3x3_bn(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        h_swish()
+    )
+
+
+def conv_1x1_bn(inp, oup):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        h_swish()
+    )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+
+        self.identity = stride == 1 and inp == oup
+
+        if inp == hidden_dim:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                h_swish() if use_hs else nn.ReLU(inplace=True),
+                # Squeeze-and-Excite
+                SELayer(hidden_dim) if use_se else nn.Identity(),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                h_swish() if use_hs else nn.ReLU(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                # Squeeze-and-Excite
+                SELayer(hidden_dim) if use_se else nn.Identity(),
+                h_swish() if use_hs else nn.ReLU(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+
+    def forward(self, x):
+        if self.identity:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV3(nn.Module):
+    def __init__(self, cfgs, mode, num_classes=1000, width_mult=1.):
+        super(MobileNetV3, self).__init__()
+        # setting of inverted residual blocks
+        self.cfgs = cfgs
+        assert mode in ['large', 'small']
+
+        # building first layer
+        input_channel = _make_divisible(16 * width_mult, 8)
+        layers = [conv_3x3_bn(3, input_channel, 2)]
+        # building inverted residual blocks
+        block = InvertedResidual
+        for k, t, c, use_se, use_hs, s in self.cfgs:
+            output_channel = _make_divisible(c * width_mult, 8)
+            exp_size = _make_divisible(input_channel * t, 8)
+            layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
+            input_channel = output_channel
+        self.features = nn.Sequential(*layers)
+        # building last several layers
+        self.conv = conv_1x1_bn(input_channel, exp_size)
+        output_channel = {'large': 1280, 'small': 1024}
+        output_channel = _make_divisible(output_channel[mode] * width_mult, 8) if width_mult > 1.0 else output_channel[mode]
+
+        self._initialize_weights()
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.conv(x)
+
+        return x
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+
+class MobileNetV3Backbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+        resume = self.configer.get('network', 'resume')
+
+        if arch == 'mobilenet_v3':
+            cfgs = [
+        # k, t, c, SE, HS, s
+        [3,   1,  16, 0, 0, 1],
+        [3,   4,  24, 0, 0, 2],
+        [3,   3,  24, 0, 0, 1],
+        [5,   3,  40, 1, 0, 2],
+        [5,   3,  40, 1, 0, 1],
+        [5,   3,  40, 1, 0, 1],
+        [3,   6,  80, 0, 1, 2],
+        [3, 2.5,  80, 0, 1, 1],
+        [3, 2.3,  80, 0, 1, 1],
+        [3, 2.3,  80, 0, 1, 1],
+        [3,   6, 112, 1, 1, 1],
+        [3,   6, 112, 1, 1, 1],
+        [5,   6, 160, 1, 1, 2],
+        [5,   6, 160, 1, 1, 1],
+        [5,   6, 160, 1, 1, 1]
+    ]
+            arch_net = MobileNetV3(cfgs, mode='large')
+            if resume is None:
+                arch_net = ModuleHelper.load_model(arch_net,
+                                                   pretrained=self.configer.get('network', 'pretrained'),
+                                                   all_match=False,
+                                                   network='mobilenet_v3')
+
+        else:
+            raise Exception('Architecture undefined!')
+
+        return arch_net
+
+
+
+def mobilenetv3_large(**kwargs):
+    """
+    Constructs a MobileNetV3-Large model
+    """
+    cfgs = [
+        # k, t, c, SE, HS, s
+        [3,   1,  16, 0, 0, 1],
+        [3,   4,  24, 0, 0, 2],
+        [3,   3,  24, 0, 0, 1],
+        [5,   3,  40, 1, 0, 2],
+        [5,   3,  40, 1, 0, 1],
+        [5,   3,  40, 1, 0, 1],
+        [3,   6,  80, 0, 1, 2],
+        [3, 2.5,  80, 0, 1, 1],
+        [3, 2.3,  80, 0, 1, 1],
+        [3, 2.3,  80, 0, 1, 1],
+        [3,   6, 112, 1, 1, 1],
+        [3,   6, 112, 1, 1, 1],
+        [5,   6, 160, 1, 1, 2],
+        [5,   6, 160, 1, 1, 1],
+        [5,   6, 160, 1, 1, 1]
+    ]
+    return MobileNetV3(cfgs, mode='large', **kwargs)
+
+
+def mobilenetv3_small(**kwargs):
+    """
+    Constructs a MobileNetV3-Small model
+    """
+    cfgs = [
+        # k, t, c, SE, HS, s
+        [3,    1,  16, 1, 0, 2],
+        [3,  4.5,  24, 0, 0, 2],
+        [3, 3.67,  24, 0, 0, 1],
+        [5,    4,  40, 1, 1, 2],
+        [5,    6,  40, 1, 1, 1],
+        [5,    6,  40, 1, 1, 1],
+        [5,    3,  48, 1, 1, 1],
+        [5,    3,  48, 1, 1, 1],
+        [5,    6,  96, 1, 1, 2],
+        [5,    6,  96, 1, 1, 1],
+        [5,    6,  96, 1, 1, 1],
+    ]
+
+    return MobileNetV3(cfgs, mode='small', **kwargs)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pcpvt_backbone.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pcpvt_backbone.py
new file mode 100644
index 0000000..3b9cd49
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pcpvt_backbone.py
@@ -0,0 +1,330 @@
+from functools import partial
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+__all__ = [
+    'pvt_tiny', 'pvt_small', 'pvt_medium', 'pvt_large'
+]
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        assert img_size[0] % patch_size[0] == 0 and img_size[1] % patch_size[1] == 0, \
+            f"img_size {img_size} should be divided by patch_size {patch_size}."
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        self.norm = nn.LayerNorm(embed_dim)
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        H, W = H // self.patch_size[0], W // self.patch_size[1]
+
+        return x, (H, W)
+
+
+class PosCNN(nn.Module):
+    def __init__(self, in_chans, embed_dim=768, s=1):
+        super(PosCNN, self).__init__()
+        self.proj = nn.Sequential(nn.Conv2d(in_chans, embed_dim, 3, s, 1, bias=True, groups=embed_dim), )
+        self.s = s
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        feat_token = x
+        cnn_feat = feat_token.transpose(1, 2).view(B, C, H, W)
+        if self.s == 1:
+            x = self.proj(cnn_feat) + cnn_feat
+        else:
+            x = self.proj(cnn_feat)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+
+    def no_weight_decay(self):
+        return ['proj.%d.weight' % i for i in range(4)]
+
+class PyramidVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.patch_embeds = nn.ModuleList()
+        self.pos_drops = nn.ModuleList()
+        self.blocks = nn.ModuleList()
+
+        # patch_embed
+        for i in range(len(depths)):
+            if i == 0:
+                self.patch_embeds.append(PatchEmbed(img_size, patch_size, in_chans, embed_dims[i]))
+            else:
+                self.patch_embeds.append(
+                    PatchEmbed(img_size // patch_size // 2 ** (i - 1), 2, embed_dims[i - 1], embed_dims[i]))
+            self.pos_drops.append(nn.Dropout(p=drop_rate))
+
+        # pos_embed
+        # self.pos_embed1 = nn.Parameter(torch.zeros(1, self.patch_embed1.num_patches, embed_dims[0]))
+        # self.pos_drop1 = nn.Dropout(p=drop_rate)
+        # self.pos_embed2 = nn.Parameter(torch.zeros(1, self.patch_embed2.num_patches, embed_dims[1]))
+        # self.pos_drop2 = nn.Dropout(p=drop_rate)
+        # self.pos_embed3 = nn.Parameter(torch.zeros(1, self.patch_embed3.num_patches, embed_dims[2]))
+        # self.pos_drop3 = nn.Dropout(p=drop_rate)
+        # self.pos_embed4 = nn.Parameter(torch.zeros(1, self.patch_embed4.num_patches, embed_dims[3]))
+        # self.pos_drop4 = nn.Dropout(p=drop_rate)
+        self.pos_block = nn.ModuleList(
+            [PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims]
+        )
+
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        for k in range(len(depths)):
+            _block = nn.ModuleList([Block(
+                dim=embed_dims[k], num_heads=num_heads[k], mlp_ratio=mlp_ratios[k], qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+                sr_ratio=sr_ratios[k])
+                for i in range(depths[k])])
+            self.blocks.append(_block)
+            cur += depths[k]
+
+        # init weights
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        import math
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1.0)
+            m.bias.data.zero_()
+
+    def no_weight_decay(self):
+        return set(['pos_block.' + n for n, p in self.pos_block.named_parameters()])
+
+    def forward_features(self, x):
+        outs = []
+        B = x.shape[0]
+
+        for i in range(len(self.depths)):
+            x, (H, W) = self.patch_embeds[i](x)
+            x = self.pos_drops[i](x)
+            for j, blk in enumerate(self.blocks[i]):
+                x = blk(x, H, W)
+                if j == 0:
+                    x = self.pos_block[i](x, H, W)
+            x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+
+            outs.append(x)
+
+        return outs
+
+    def forward(self, x):
+        outs = self.forward_features(x)
+
+        return outs
+
+
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+
+    return out_dict
+
+
+def pvt_tiny(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2],
+                                     sr_ratios=[8, 4, 2, 1], drop_rate=0.1,
+                                     drop_path_rate=0.1,
+                                     **kwargs)
+    return model
+
+
+def pvt_small(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3],
+                                     sr_ratios=[8, 4, 2, 1], drop_rate=0.0,
+                                     drop_path_rate=0.1,
+                                     **kwargs)
+
+    return model
+
+
+def pvt_medium(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     # drop_rate=0.0, drop_path_rate=0.05)
+                                     **kwargs)
+
+    return model
+
+
+def pvt_large(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     # drop_rate=0.0, drop_path_rate=0.02)
+                                     **kwargs)
+
+    return model
+
+
+class PCPVTBackbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+
+        if arch == 'pcpvt_tiny':
+            model = pvt_tiny(configer=self.configer)
+        elif arch == 'pcpvt_small':
+            model = pvt_small(configer=self.configer)
+        elif arch == 'pcpvt_medium':
+            model = pvt_medium(configer=self.configer)
+        elif arch == 'pcpvt_large':
+            model = pvt_large(configer=self.configer)
+
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="pcpvt")
+
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pvt_backbone.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pvt_backbone.py
new file mode 100644
index 0000000..73d81fd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/pvt_backbone.py
@@ -0,0 +1,351 @@
+from functools import partial
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+__all__ = [
+    'pvt_tiny', 'pvt_small', 'pvt_medium', 'pvt_large'
+]
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        assert img_size[0] % patch_size[0] == 0 and img_size[1] % patch_size[1] == 0, \
+            f"img_size {img_size} should be divided by patch_size {patch_size}."
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        self.norm = nn.LayerNorm(embed_dim)
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        H, W = H // self.patch_size[0], W // self.patch_size[1]
+
+        return x, (H, W)
+
+
+class PyramidVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+
+        # patch_embed
+        self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans,
+                                       embed_dim=embed_dims[0])
+        self.patch_embed2 = PatchEmbed(img_size=img_size // patch_size, patch_size=2, in_chans=embed_dims[0],
+                                       embed_dim=embed_dims[1])
+        self.patch_embed3 = PatchEmbed(img_size=img_size // patch_size // 2, patch_size=2, in_chans=embed_dims[1],
+                                       embed_dim=embed_dims[2])
+        self.patch_embed4 = PatchEmbed(img_size=img_size // patch_size // 4, patch_size=2, in_chans=embed_dims[2],
+                                       embed_dim=embed_dims[3])
+
+        # pos_embed
+        self.pos_embed1 = nn.Parameter(torch.zeros(1, self.patch_embed1.num_patches, embed_dims[0]))
+        self.pos_drop1 = nn.Dropout(p=drop_rate)
+        self.pos_embed2 = nn.Parameter(torch.zeros(1, self.patch_embed2.num_patches, embed_dims[1]))
+        self.pos_drop2 = nn.Dropout(p=drop_rate)
+        self.pos_embed3 = nn.Parameter(torch.zeros(1, self.patch_embed3.num_patches, embed_dims[2]))
+        self.pos_drop3 = nn.Dropout(p=drop_rate)
+        self.pos_embed4 = nn.Parameter(torch.zeros(1, self.patch_embed4.num_patches, embed_dims[3]))
+        self.pos_drop4 = nn.Dropout(p=drop_rate)
+
+        # transformer encoder
+        en_dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=en_dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=en_dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=en_dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=en_dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+
+        # init weights
+        trunc_normal_(self.pos_embed1, std=.02)
+        trunc_normal_(self.pos_embed2, std=.02)
+        trunc_normal_(self.pos_embed3, std=.02)
+        trunc_normal_(self.pos_embed4, std=.02)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    def _get_pos_embed(self, pos_embed, patch_embed, H, W):
+        if H * W == self.patch_embed1.num_patches:
+            return pos_embed
+        else:
+            return F.interpolate(
+                pos_embed.reshape(1, patch_embed.H, patch_embed.W, -1).permute(0, 3, 1, 2),
+                size=(H, W), mode="bilinear").reshape(1, -1, H * W).permute(0, 2, 1)
+
+    def forward_features(self, x):
+        outs = []
+
+        B = x.shape[0]
+
+        # stage 1
+        x, (H, W) = self.patch_embed1(x)  # 3->64 H/4, W/4
+        pos_embed1 = self._get_pos_embed(self.pos_embed1, self.patch_embed1, H, W)
+        x = x + pos_embed1
+        x = self.pos_drop1(x)
+        for blk in self.block1:  # 64 H/4, W/4
+            x = blk(x, H, W)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 2
+        x, (H, W) = self.patch_embed2(x)  # 64->128 H/8, W/8
+        pos_embed2 = self._get_pos_embed(self.pos_embed2, self.patch_embed2, H, W)
+        x = x + pos_embed2
+        x = self.pos_drop2(x)
+        for blk in self.block2:  # 128 H/8, W/8
+            x = blk(x, H, W)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 3
+        x, (H, W) = self.patch_embed3(x)  # 128->320 H/16, W/16
+        pos_embed3 = self._get_pos_embed(self.pos_embed3, self.patch_embed3, H, W)
+        x = x + pos_embed3
+        x = self.pos_drop3(x)
+        for blk in self.block3:  # 320 H/16, W/16
+            x = blk(x, H, W)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        # stage 4
+        x, (H, W) = self.patch_embed4(x)  # 320->512 H/32, W/32
+        pos_embed4 = self._get_pos_embed(self.pos_embed4, self.patch_embed4, H, W)
+        x = x + pos_embed4
+        x = self.pos_drop4(x)
+        for blk in self.block4:  # 512 H/32, W/32
+            x = blk(x, H, W)
+        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        outs.append(x)
+
+        return outs
+
+    def forward(self, x):
+        outs = self.forward_features(x)
+
+        return outs
+
+
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+
+    return out_dict
+
+
+def pvt_tiny(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2],
+                                     sr_ratios=[8, 4, 2, 1], drop_rate=0.1,
+                                     drop_path_rate=0.1,
+                                     **kwargs)
+    return model
+
+
+def pvt_small(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3],
+                                     sr_ratios=[8, 4, 2, 1], drop_rate=0.0,
+                                     drop_path_rate=0.1,
+                                     **kwargs)
+
+    return model
+
+
+def pvt_medium(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     # drop_rate=0.0, drop_path_rate=0.05)
+                                     **kwargs)
+
+    return model
+
+
+def pvt_large(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8],
+                                     mlp_ratios=[8, 8, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     # drop_rate=0.0, drop_path_rate=0.02)
+                                     **kwargs)
+
+    return model
+
+
+class PVTBackbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+
+        if arch == 'pvt_tiny':
+            model = pvt_tiny(configer=self.configer)
+        elif arch == 'pvt_small':
+            model = pvt_small(configer=self.configer)
+        elif arch == 'pvt_medium':
+            model = pvt_medium(configer=self.configer)
+        elif arch == 'pvt_large':
+            model = pvt_large(configer=self.configer)
+
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="pvt")
+
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/svt_backbone.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/svt_backbone.py
new file mode 100644
index 0000000..57eb225
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/pvt/svt_backbone.py
@@ -0,0 +1,365 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+from timm.models.vision_transformer import _cfg
+from timm.models.vision_transformer import Block as TimmBlock
+from timm.models.vision_transformer import Attention as TimmAttention
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class GroupAttention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., ws=1):
+        assert ws != 1
+        super(GroupAttention, self).__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.ws = ws
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.view(B, H, W, C)
+        pad_l = pad_t = 0
+        pad_r = (self.ws - W % self.ws) % self.ws
+        pad_b = (self.ws - H % self.ws) % self.ws
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+        _h, _w = Hp // self.ws, Wp // self.ws
+        mask = torch.zeros((1, Hp, Wp), device=x.device)
+        mask[:, -pad_b:, :].fill_(1)
+        mask[:, :, -pad_r:].fill_(1)
+
+        x = x.reshape(B, _h, self.ws, _w, self.ws, C).transpose(2, 3)  # B, _h, _w, ws, ws, C
+        mask = mask.reshape(1, _h, self.ws, _w, self.ws).transpose(2, 3).reshape(1, _h * _w, self.ws * self.ws)
+        attn_mask = mask.unsqueeze(2) - mask.unsqueeze(3)  # 1, _h*_w, ws*ws, ws*ws
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-1000.0)).masked_fill(attn_mask == 0, float(0.0))
+        qkv = self.qkv(x).reshape(B, _h * _w, self.ws * self.ws, 3, self.num_heads,
+                                  C // self.num_heads).permute(3, 0, 1, 4, 2, 5)  # n_h, B, _w*_h, nhead, ws*ws, dim
+        q, k, v = qkv[0], qkv[1], qkv[2]  # B, _h*_w, n_head, ws*ws, dim_head
+        attn = (q @ k.transpose(-2, -1)) * self.scale  # B, _h*_w, n_head, ws*ws, ws*ws
+        attn = attn + attn_mask.unsqueeze(2)
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)  # attn @v ->  B, _h*_w, n_head, ws*ws, dim_head
+        attn = (attn @ v).transpose(2, 3).reshape(B, _h, _w, self.ws, self.ws, C)
+        x = attn.transpose(2, 3).reshape(B, _h * self.ws, _w * self.ws, C)
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+        x = x.reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+
+class GroupBlock(TimmBlock):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, ws=1):
+        super(GroupBlock, self).__init__(dim, num_heads, mlp_ratio, qkv_bias, qk_scale, drop, attn_drop,
+                                         drop_path, act_layer, norm_layer)
+        del self.attn
+        if ws == 1:
+            self.attn = Attention(dim, num_heads, qkv_bias, qk_scale, attn_drop, drop, sr_ratio)
+        else:
+            self.attn = GroupAttention(dim, num_heads, qkv_bias, qk_scale, attn_drop, drop, ws)
+
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+
+        self.img_size = img_size
+        self.patch_size = patch_size
+        assert img_size[0] % patch_size[0] == 0 and img_size[1] % patch_size[1] == 0, \
+            f"img_size {img_size} should be divided by patch_size {patch_size}."
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        self.norm = nn.LayerNorm(embed_dim)
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        x = self.norm(x)
+        H, W = H // self.patch_size[0], W // self.patch_size[1]
+
+        return x, (H, W)
+
+
+# PEG  from https://arxiv.org/abs/2102.10882
+class PosCNN(nn.Module):
+    def __init__(self, in_chans, embed_dim=768, s=1):
+        super(PosCNN, self).__init__()
+        self.proj = nn.Sequential(nn.Conv2d(in_chans, embed_dim, 3, s, 1, bias=True, groups=embed_dim), )
+        self.s = s
+
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        feat_token = x
+        cnn_feat = feat_token.transpose(1, 2).view(B, C, H, W)
+        if self.s == 1:
+            x = self.proj(cnn_feat) + cnn_feat
+        else:
+            x = self.proj(cnn_feat)
+        x = x.flatten(2).transpose(1, 2)
+        return x
+
+    def no_weight_decay(self):
+        return ['proj.%d.weight' % i for i in range(4)]
+
+
+class PyramidVisionTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], block_cls=GroupBlock, wss=[7,7,7]):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        self.wss = wss
+
+        # patch_embed
+        self.patch_embeds = nn.ModuleList()
+        self.pos_embeds = nn.ParameterList()
+        self.pos_drops = nn.ModuleList()
+        self.blocks = nn.ModuleList()
+
+        for i in range(len(depths)):
+            if i == 0:
+                self.patch_embeds.append(PatchEmbed(img_size, patch_size, in_chans, embed_dims[i]))
+            else:
+                self.patch_embeds.append(
+                    PatchEmbed(img_size // patch_size // 2 ** (i - 1), 2, embed_dims[i - 1], embed_dims[i]))
+            self.pos_drops.append(nn.Dropout(p=drop_rate))
+
+        self.pos_block = nn.ModuleList(
+            [PosCNN(embed_dim, embed_dim) for embed_dim in embed_dims]
+        )
+
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.blocks = nn.ModuleList()
+        for k in range(len(depths)):
+            _block = nn.ModuleList([block_cls(
+                    dim=embed_dims[k], num_heads=num_heads[k], mlp_ratio=mlp_ratios[k], qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+                    sr_ratio=sr_ratios[k], ws=1 if i % 2 == 1 else wss[k]) for i in range(depths[k])])
+            self.blocks.append(_block)
+            cur += depths[k]
+        self.apply(self._init_weights)
+
+    def no_weight_decay(self):
+        return set(['pos_block.' + n for n, p in self.pos_block.named_parameters()])
+
+    def reset_drop_path(self, drop_path_rate):
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
+        cur = 0
+        for k in range(len(self.depths)):
+            for i in range(self.depths[k]):
+                self.blocks[k][i].drop_path.drop_prob = dpr[cur + i]
+            cur += self.depths[k]
+
+    def _init_weights(self, m):
+        import math
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1.0)
+            m.bias.data.zero_()
+
+    def forward_features(self, x):
+        outs = []
+        B = x.shape[0]
+
+        for i in range(len(self.depths)):
+            x, (H, W) = self.patch_embeds[i](x)
+            x = self.pos_drops[i](x)
+            for j, blk in enumerate(self.blocks[i]):
+                x = blk(x, H, W)
+                if j == 0:
+                    x = self.pos_block[i](x, H, W)
+            x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+
+            outs.append(x)
+
+        return outs
+
+    def forward(self, x):
+        outs = self.forward_features(x)
+
+        return outs
+
+
+def _conv_filter(state_dict, patch_size=16):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+
+    return out_dict
+
+
+def svt_small(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[64, 128, 256, 512], num_heads=[2, 4, 8, 16],
+                                     mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 10, 4],
+                                     sr_ratios=[8, 4, 2, 1], drop_rate=0.0,
+                                     drop_path_rate=0.2,
+                                     wss=[7, 7, 7, 7],
+                                     **kwargs)
+
+    return model
+
+
+def svt_base(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[96, 192, 384, 768], num_heads=[3, 6, 12, 24],
+                                     mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 18, 2],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     wss=[7, 7, 7, 7],
+                                     drop_path_rate=0.2,
+                                     **kwargs)
+
+    return model
+
+
+def svt_large(configer, **kwargs):
+    img_size = configer.get('train', 'data_transformer')['input_size'][0]
+    num_classes = configer.get('data', 'num_classes')
+    model = PyramidVisionTransformer(img_size=img_size, num_classes=num_classes,
+                                     patch_size=4, embed_dims=[128, 256, 512, 1024], num_heads=[4, 8, 16, 32],
+                                     mlp_ratios=[4, 4, 4, 4], qkv_bias=True,
+                                     norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 18, 2],
+                                     sr_ratios=[8, 4, 2, 1],
+                                     wss=[7, 7, 7, 7],
+                                     drop_path_rate=0.3,
+                                     **kwargs)
+
+    return model
+
+
+class SVTBackbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+
+        if arch == 'svt_small':
+            model = svt_small(configer=self.configer)
+        elif arch == 'svt_base':
+            model = svt_base(configer=self.configer)
+        elif arch == 'svt_large':
+            model = svt_large(configer=self.configer)
+
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="svt")
+
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/dcn_resnet_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/dcn_resnet_models.py
new file mode 100644
index 0000000..29e6ae8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/dcn_resnet_models.py
@@ -0,0 +1,399 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Deformable ConvNets v2: More Deformable, Better Results
+# Modified by: RainbowSecret(yuyua@microsoft.com)
+# Select Seg Model for img segmentation.
+
+import pdb
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+from collections import OrderedDict
+
+from lib.models.tools.module_helper import ModuleHelper
+from lib.extensions.dcn import ModulatedDeformConv, ModulatedDeformRoIPoolingPack, DeformConv 
+
+def conv3x3(in_planes, out_planes, stride=1, dilation=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        dilation=dilation,
+        bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False,
+                 bn_type=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride, dilation)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        assert not with_cp
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 dilation=1,
+                 downsample=None,
+                 style='pytorch',
+                 with_cp=False,
+                 with_dcn=False,
+                 num_deformable_groups=1,
+                 dcn_offset_lr_mult=0.1,
+                 use_regular_conv_on_stride=False,
+                 use_modulated_dcn=False,
+                 bn_type=None):
+        """Bottleneck block.
+        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
+        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
+        """
+        super(Bottleneck, self).__init__()
+        conv1_stride = 1
+        conv2_stride = stride
+
+        self.conv1 = nn.Conv2d(
+            inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
+
+        self.with_dcn = with_dcn
+        self.use_modulated_dcn = use_modulated_dcn
+        if use_regular_conv_on_stride and stride > 1:
+            self.with_dcn = False
+        if self.with_dcn:
+            print("--->> use {}dcn in block where c_in={} and c_out={}".format(
+                'modulated ' if self.use_modulated_dcn else '', planes, inplanes))
+            if use_modulated_dcn:
+                self.conv_offset_mask = nn.Conv2d(
+                    planes,
+                    num_deformable_groups * 27,
+                    kernel_size=3,
+                    stride=conv2_stride,
+                    padding=dilation,
+                    dilation=dilation)
+                self.conv_offset_mask.lr_mult = dcn_offset_lr_mult
+                self.conv_offset_mask.zero_init = True
+
+                self.conv2 = ModulatedDeformConv(planes, planes, 3, stride=conv2_stride,
+                                          padding=dilation, dilation=dilation,
+                                          deformable_groups=num_deformable_groups, no_bias=True)
+            else:
+                self.conv2_offset = nn.Conv2d(
+                    planes,
+                    num_deformable_groups * 18,
+                    kernel_size=3,
+                    stride=conv2_stride,
+                    padding=dilation,
+                    dilation=dilation)
+                self.conv2_offset.lr_mult = dcn_offset_lr_mult
+                self.conv2_offset.zero_init = True
+
+                self.conv2 = DeformConv(planes, planes, (3, 3), stride=conv2_stride,
+                    padding=dilation, dilation=dilation,
+                    num_deformable_groups=num_deformable_groups)
+        else:
+            self.conv2 = nn.Conv2d(
+                planes,
+                planes,
+                kernel_size=3,
+                stride=conv2_stride,
+                padding=dilation,
+                dilation=dilation,
+                bias=False)
+
+
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.dilation = dilation
+        self.with_cp = with_cp
+
+    def forward(self, x):
+
+        def _inner_forward(x):
+            residual = x
+
+            out = self.conv1(x)
+            out = self.bn1(out)
+            out = self.relu(out)
+
+            if self.with_dcn:
+                if self.use_modulated_dcn:
+                    offset_mask = self.conv_offset_mask(out)
+                    offset1, offset2, mask_raw = torch.chunk(offset_mask, 3, dim=1)
+                    offset = torch.cat((offset1, offset2), dim=1)
+                    mask = torch.sigmoid(mask_raw)
+                    out = self.conv2(out, offset, mask)
+                else:
+                    offset = self.conv2_offset(out)
+                    # add bias to the offset to solve the bug of dilation rates within dcn.
+                    dilation = self.conv2.dilation[0]
+                    bias_w = torch.cuda.FloatTensor([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]) * (dilation - 1)
+                    bias_h = bias_w.permute(1, 0)
+                    bias_w.requires_grad = False
+                    bias_h.requires_grad = False
+                    offset += torch.cat([bias_h.reshape(-1), bias_w.reshape(-1)]).view(1, -1, 1, 1)
+                    out = self.conv2(out, offset)
+            else:
+                out = self.conv2(out)
+            out = self.bn2(out)
+            out = self.relu(out)
+
+            out = self.conv3(out)
+            out = self.bn3(out)
+
+            if self.downsample is not None:
+                residual = self.downsample(x)
+
+            out = out + residual
+            return out
+
+
+        if self.with_cp and x.requires_grad:
+            out = cp.checkpoint(_inner_forward, x)
+        else:
+            out = _inner_forward(x)
+
+        out = self.relu_in(out)
+
+        return out
+
+
+def make_res_layer(block,
+                   inplanes,
+                   planes,
+                   blocks,
+                   stride=1,
+                   dilation=1,
+                   style='pytorch',
+                   with_cp=False,
+                   with_dcn=False,
+                   dcn_offset_lr_mult=0.1,
+                   use_regular_conv_on_stride=False,
+                   use_modulated_dcn=False,
+                   bn_type=None):
+    downsample = None
+    if stride != 1 or inplanes != planes * block.expansion:
+        downsample = nn.Sequential(
+            nn.Conv2d(
+                inplanes,
+                planes * block.expansion,
+                kernel_size=1,
+                stride=stride,
+                bias=False),
+            ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * block.expansion),
+        )
+
+    layers = []
+    layers.append(
+        block(
+            inplanes,
+            planes,
+            stride,
+            dilation,
+            downsample,
+            style=style,
+            with_cp=with_cp,
+            with_dcn=with_dcn,
+            dcn_offset_lr_mult=dcn_offset_lr_mult,
+            use_regular_conv_on_stride=use_regular_conv_on_stride,
+            use_modulated_dcn=use_modulated_dcn,
+            bn_type=bn_type))
+    inplanes = planes * block.expansion
+    for i in range(1, blocks):
+        layers.append(
+            block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp, with_dcn=with_dcn, 
+                  dcn_offset_lr_mult=dcn_offset_lr_mult, use_regular_conv_on_stride=use_regular_conv_on_stride,
+                  use_modulated_dcn=use_modulated_dcn, bn_type=bn_type))
+
+    return nn.Sequential(*layers)
+
+
+class DCNResNet(nn.Module):
+    """ResNet backbone.
+
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        num_stages (int): Resnet stages, normally 4.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
+            running stats (mean and var).
+        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+    """
+    def __init__(self,
+                 block,
+                 layers,
+                 deep_base=True,
+                 bn_type=None):
+        super(DCNResNet, self).__init__()
+        # if depth not in self.arch_settings:
+        #     raise KeyError('invalid depth {} for resnet'.format(depth))
+        # assert num_stages >= 1 and num_stages <= 4
+        # block, stage_blocks = self.arch_settings[depth]
+        # stage_blocks = stage_blocks[:num_stages]
+        # assert len(strides) == len(dilations) == num_stages
+        # assert max(out_indices) < num_stages
+        self.style = 'pytorch'
+        self.inplanes = 128 if deep_base else 64
+        if deep_base:
+            self.resinit = nn.Sequential(OrderedDict([
+                ('conv1', nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)),
+                ('bn1', ModuleHelper.BatchNorm2d(bn_type=bn_type)(64)),
+                ('relu1', nn.ReLU(inplace=False)),
+                ('conv2', nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)),
+                ('bn2', ModuleHelper.BatchNorm2d(bn_type=bn_type)(64)),
+                ('relu2', nn.ReLU(inplace=False)),
+                ('conv3', nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)),
+                ('bn3', ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)),
+                ('relu3', nn.ReLU(inplace=False))]
+            ))
+        else:
+            self.resinit = nn.Sequential(OrderedDict([
+                ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)),
+                ('bn1', ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)),
+                ('relu1', nn.ReLU(inplace=False))]
+            ))
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        self.layer1 = make_res_layer(
+                block,
+                self.inplanes,
+                64,
+                layers[0],
+                style=self.style,
+                with_dcn=False,
+                use_modulated_dcn=False,
+                bn_type=bn_type)
+
+        self.layer2 = make_res_layer(
+                block,
+                256,
+                128,
+                layers[1],
+                stride=2,
+                style=self.style,
+                with_dcn=False,
+                use_modulated_dcn=False,
+                bn_type=bn_type)
+
+        self.layer3 = make_res_layer(
+                block,
+                512,
+                256,
+                layers[2],
+                stride=2,
+                style=self.style,
+                with_dcn=True,
+                use_modulated_dcn=False,
+                bn_type=bn_type)
+
+        self.layer4 = make_res_layer(
+                block,
+                1024,
+                512,
+                layers[3],
+                stride=2,
+                style=self.style,
+                with_dcn=True,
+                use_modulated_dcn=False,
+                bn_type=bn_type)
+
+
+    def forward(self, x):
+        x = self.resinit(x)
+        x = self.maxpool(x)
+        
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        return x
+
+
+
+class DCNResNetModels(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    def deepbase_dcn_resnet50(self, **kwargs):
+        """Constructs a ResNet-50 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = DCNResNet(Bottleneck, [3, 4, 6, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, 
+                                        all_match=False, 
+                                        pretrained=self.configer.get('network', 'pretrained'),
+                                        network="dcnet")
+        return model
+
+    def deepbase_dcn_resnet101(self, **kwargs):
+        """Constructs a ResNet-101 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = DCNResNet(Bottleneck, [3, 4, 23, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, 
+                                        all_match=False, 
+                                        pretrained=self.configer.get('network', 'pretrained'),
+                                        network="dcnet")
+        return model
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnest_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnest_models.py
new file mode 100644
index 0000000..1e21a21
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnest_models.py
@@ -0,0 +1,438 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## Email: zhanghang0704@gmail.com
+## Copyright (c) 2020
+##
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import math
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn import Conv2d, Module, Linear, ReLU
+from torch.nn.modules.utils import _pair
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+__all__ = ['ResNeSt', 'Bottleneck', 'SKConv2d']
+
+
+class DropBlock2D(object):
+    def __init__(self, *args, **kwargs):
+        raise NotImplementedError
+
+class SplAtConv2d(Module):
+    """Split-Attention Conv2d
+    """
+    def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
+                 dilation=(1, 1), groups=1, bias=True,
+                 radix=2, reduction_factor=4,
+                 rectify=False, rectify_avg=False, bn_type=None,
+                 dropblock_prob=0.0, **kwargs):
+        super(SplAtConv2d, self).__init__()
+        padding = _pair(padding)
+        self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
+        self.rectify_avg = rectify_avg
+        inter_channels = max(in_channels*radix//reduction_factor, 32)
+        self.radix = radix
+        self.cardinality = groups
+        self.channels = channels
+        self.dropblock_prob = dropblock_prob
+        if self.rectify:
+            from rfconv import RFConv2d
+            self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
+                                 groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
+        else:
+            self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
+                               groups=groups*radix, bias=bias, **kwargs)
+        self.use_bn = bn_type is not None
+        self.bn0 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(channels*radix)
+        self.relu = ReLU(inplace=False)
+        self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(inter_channels)
+        self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
+        if dropblock_prob > 0.0:
+            self.dropblock = DropBlock2D(dropblock_prob, 3)
+        self.rsoftmax = rSoftMax(radix, groups)
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.use_bn:
+            x = self.bn0(x)
+        if self.dropblock_prob > 0.0:
+            x = self.dropblock(x)
+        x = self.relu(x)
+
+        batch, rchannel = x.shape[:2]
+        if self.radix > 1:
+            splited = torch.split(x, rchannel//self.radix, dim=1)
+            gap = sum(splited) 
+        else:
+            gap = x
+        gap = F.adaptive_avg_pool2d(gap, 1)
+        gap = self.fc1(gap)
+
+        if self.use_bn:
+            gap = self.bn1(gap)
+        gap = self.relu(gap)
+
+        atten = self.fc2(gap)
+        atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
+
+        if self.radix > 1:
+            attens = torch.split(atten, rchannel//self.radix, dim=1)
+            out = sum([att*split for (att, split) in zip(attens, splited)])
+        else:
+            out = atten * x
+        return out.contiguous()
+
+class rSoftMax(nn.Module):
+    def __init__(self, radix, cardinality):
+        super().__init__()
+        self.radix = radix
+        self.cardinality = cardinality
+
+    def forward(self, x):
+        batch = x.size(0)
+        if self.radix > 1:
+            x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
+            x = F.softmax(x, dim=1)
+            x = x.reshape(batch, -1)
+        else:
+            x = torch.sigmoid(x)
+        return x
+
+class DropBlock2D(object):
+    def __init__(self, *args, **kwargs):
+        raise NotImplementedError
+
+class GlobalAvgPool2d(nn.Module):
+    def __init__(self):
+        """Global average pooling over the input's spatial dimensions"""
+        super(GlobalAvgPool2d, self).__init__()
+
+    def forward(self, inputs):
+        return F.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1)
+
+class Bottleneck(nn.Module):
+    """ResNet Bottleneck
+    """
+    # pylint: disable=unused-argument
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None,
+                 radix=1, cardinality=1, bottleneck_width=64,
+                 avd=False, avd_first=False, dilation=1, is_first=False,
+                 rectified_conv=False, rectify_avg=False,
+                 bn_type=None, dropblock_prob=0.0, last_gamma=False):
+        super(Bottleneck, self).__init__()
+        group_width = int(planes * (bottleneck_width / 64.)) * cardinality
+        self.conv1 = nn.Conv2d(inplanes, group_width, kernel_size=1, bias=False)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(group_width)
+        self.dropblock_prob = dropblock_prob
+        self.radix = radix
+        self.avd = avd and (stride > 1 or is_first)
+        self.avd_first = avd_first
+
+        if self.avd:
+            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
+            stride = 1
+
+        if dropblock_prob > 0.0:
+            self.dropblock1 = DropBlock2D(dropblock_prob, 3)
+            if radix == 1:
+                self.dropblock2 = DropBlock2D(dropblock_prob, 3)
+            self.dropblock3 = DropBlock2D(dropblock_prob, 3)
+
+        if radix > 1:
+            self.conv2 = SplAtConv2d(
+                group_width, group_width, kernel_size=3,
+                stride=stride, padding=dilation,
+                dilation=dilation, groups=cardinality, bias=False,
+                radix=radix, rectify=rectified_conv,
+                rectify_avg=rectify_avg,
+                bn_type=bn_type,
+                dropblock_prob=dropblock_prob)
+        elif rectified_conv:
+            from rfconv import RFConv2d
+            self.conv2 = RFConv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False,
+                average_mode=rectify_avg)
+            self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(group_width)
+        else:
+            self.conv2 = nn.Conv2d(
+                group_width, group_width, kernel_size=3, stride=stride,
+                padding=dilation, dilation=dilation,
+                groups=cardinality, bias=False)
+            self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(group_width)
+
+        self.conv3 = nn.Conv2d(
+            group_width, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes*4)
+
+        if last_gamma:
+            from torch.nn.init import zeros_
+            zeros_(self.bn3.weight)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.dilation = dilation
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock1(out)
+        out = self.relu(out)
+
+        if self.avd and self.avd_first:
+            out = self.avd_layer(out)
+
+        out = self.conv2(out)
+        if self.radix == 1:
+            out = self.bn2(out)
+            if self.dropblock_prob > 0.0:
+                out = self.dropblock2(out)
+            out = self.relu(out)
+
+        if self.avd and not self.avd_first:
+            out = self.avd_layer(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.dropblock_prob > 0.0:
+            out = self.dropblock3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+class ResNeSt(nn.Module):
+    # pylint: disable=unused-variable
+    def __init__(self, block, layers, radix=1, groups=1, bottleneck_width=64,
+                 num_classes=1000, dilated=False, dilation=1,
+                 deep_stem=False, stem_width=64, avg_down=False,
+                 rectified_conv=False, rectify_avg=False,
+                 avd=False, avd_first=False,
+                 final_drop=0.0, dropblock_prob=0,
+                 last_gamma=False, bn_type=None):
+        self.cardinality = groups
+        self.bottleneck_width = bottleneck_width
+        # ResNet-D params
+        self.inplanes = stem_width*2 if deep_stem else 64
+        self.avg_down = avg_down
+        self.last_gamma = last_gamma
+        # ResNeSt params
+        self.radix = radix
+        self.avd = avd
+        self.avd_first = avd_first
+
+        super(ResNeSt, self).__init__()
+        self.rectified_conv = rectified_conv
+        self.rectify_avg = rectify_avg
+        if rectified_conv:
+            from rfconv import RFConv2d
+            conv_layer = RFConv2d
+        else:
+            conv_layer = nn.Conv2d
+        conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {}
+        if deep_stem:
+            self.conv1 = nn.Sequential(
+                conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, **conv_kwargs),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(stem_width),
+                nn.ReLU(inplace=False),
+                conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(stem_width),
+                nn.ReLU(inplace=False),
+                conv_layer(stem_width, stem_width*2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs),
+            )
+        else:
+            self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3,
+                                   bias=False, **conv_kwargs)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)
+        self.relu = nn.ReLU(inplace=False)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)  # change.
+        self.layer1 = self._make_layer(block, 64, layers[0], bn_type=bn_type, is_first=False)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, bn_type=bn_type)
+        
+        if dilated or dilation == 4:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
+                                           dilation=2, bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=4, bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)
+        elif dilation==2:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           dilation=1, bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
+                                           dilation=2, bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)
+        else:
+            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                           bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)
+            self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                           bn_type=bn_type,
+                                           dropblock_prob=dropblock_prob)            
+        self.avgpool = GlobalAvgPool2d()
+        self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, ModuleHelper.BatchNorm2d(bn_type=bn_type, ret_cls=True)):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_type=None,
+                    dropblock_prob=0.0, is_first=True):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            down_layers = []
+            if self.avg_down:
+                if dilation == 1:
+                    down_layers.append(nn.AvgPool2d(kernel_size=stride, stride=stride,
+                                                    ceil_mode=True, count_include_pad=False))
+                else:
+                    down_layers.append(nn.AvgPool2d(kernel_size=1, stride=1,
+                                                    ceil_mode=True, count_include_pad=False))
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=1, bias=False))
+            else:
+                down_layers.append(nn.Conv2d(self.inplanes, planes * block.expansion,
+                                             kernel_size=1, stride=stride, bias=False))
+            down_layers.append(ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * block.expansion))
+            downsample = nn.Sequential(*down_layers)
+
+        layers = []
+        if dilation == 1 or dilation == 2:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=1, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                bn_type=bn_type, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        elif dilation == 4:
+            layers.append(block(self.inplanes, planes, stride, downsample=downsample,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=2, is_first=is_first, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                bn_type=bn_type, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+        else:
+            raise RuntimeError("=> unknown dilation size: {}".format(dilation))
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,
+                                radix=self.radix, cardinality=self.cardinality,
+                                bottleneck_width=self.bottleneck_width,
+                                avd=self.avd, avd_first=self.avd_first,
+                                dilation=dilation, rectified_conv=self.rectified_conv,
+                                rectify_avg=self.rectify_avg,
+                                bn_type=bn_type, dropblock_prob=dropblock_prob,
+                                last_gamma=self.last_gamma))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        tuple_features = list()
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        tuple_features.append(x)
+        x = self.maxpool(x)
+        tuple_features.append(x)
+        x = self.layer1(x)
+        tuple_features.append(x)
+        x = self.layer2(x)
+        tuple_features.append(x)
+        x = self.layer3(x)
+        tuple_features.append(x)
+        x = self.layer4(x)
+        tuple_features.append(x)
+
+        return tuple_features
+
+
+class ResNeStModels(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    def resnest50(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 4, 6, 3],
+                       radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                       deep_stem=False, stem_width=32, avg_down=True,
+                       avd=True, avd_first=False, 
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="resnest")
+        return model
+
+    def deepbase_resnest50(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 4, 6, 3],
+                       radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                       deep_stem=True, stem_width=32, avg_down=True,
+                       avd=True, avd_first=False, 
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="resnest")
+        return model
+
+    def resnest101(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 4, 23, 3],
+                        radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                        deep_stem=False, stem_width=64, avg_down=True,
+                        avd=True, avd_first=False,
+                        bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'),
+                                        all_match=False, network="resnest")
+        return model
+
+    def deepbase_resnest101(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 4, 23, 3],
+                        radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                        deep_stem=True, stem_width=64, avg_down=True,
+                        avd=True, avd_first=False,
+                        bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+                                        all_match=False, network="resnest")
+        return model
+
+    def deepbase_resnest200(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 24, 36, 3],
+                        radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                        deep_stem=True, stem_width=64, avg_down=True,
+                        avd=True, avd_first=False,
+                        bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+                                        all_match=False, network="resnest")
+        return model
+
+    def deepbase_resnest269(self, **kwargs):
+        model = ResNeSt(Bottleneck, [3, 30, 48, 8],
+                        radix=2, groups=1, bottleneck_width=64, dilated=True, dilation=4,
+                        deep_stem=True, stem_width=64, avg_down=True,
+                        avd=True, avd_first=False,
+                        bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+                                        all_match=False, network="resnest")
+        return model
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_backbone.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_backbone.py
new file mode 100644
index 0000000..12fdaab
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_backbone.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pdb
+import torch
+import torch.nn as nn
+
+from models.protoseg_core.lib.models.backbones.resnet.resnet_models import ResNetModels
+from models.protoseg_core.lib.models.backbones.resnet.resnext_models import ResNextModels
+from models.protoseg_core.lib.models.backbones.resnet.resnest_models import ResNeStModels
+
+# if torch.__version__[:3] == '0.4':
+#     from lib.models.backbones.resnet.dcn_resnet_models import DCNResNetModels
+
+class NormalResnetBackbone(nn.Module):
+    def __init__(self, orig_resnet):
+        super(NormalResnetBackbone, self).__init__()
+
+        self.num_features = 2048
+        # take pretrained resnet, except AvgPool and FC
+        self.resinit = orig_resnet.resinit
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+
+    def get_num_features(self):
+        return self.num_features
+
+    def forward(self, x):
+        tuple_features = list()
+        x = self.resinit(x)
+        tuple_features.append(x)
+        x = self.maxpool(x)
+        tuple_features.append(x)
+        x = self.layer1(x)
+        tuple_features.append(x)
+        x = self.layer2(x)
+        tuple_features.append(x)
+        x = self.layer3(x)
+        tuple_features.append(x)
+        x = self.layer4(x)
+        tuple_features.append(x)
+
+        return tuple_features
+
+
+class DilatedResnetBackbone(nn.Module):
+    def __init__(self, orig_resnet, dilate_scale=8, multi_grid=(1, 2, 4)):
+        super(DilatedResnetBackbone, self).__init__()
+
+        self.num_features = 2048
+        from functools import partial
+
+        if dilate_scale == 8:
+            orig_resnet.layer3.apply(partial(self._nostride_dilate, dilate=2))
+            if multi_grid is None:
+                orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=4))
+            else:
+                for i, r in enumerate(multi_grid):
+                    orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(4 * r)))
+
+        elif dilate_scale == 16:
+            if multi_grid is None:
+                orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=2))
+            else:
+                for i, r in enumerate(multi_grid):
+                    orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(2 * r)))
+
+        # Take pretrained resnet, except AvgPool and FC
+        self.resinit = orig_resnet.resinit
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+
+    def _nostride_dilate(self, m, dilate):
+        classname = m.__class__.__name__
+        if classname.find('Conv') != -1:
+            # the convolution with stride
+            if m.stride == (2, 2):
+                m.stride = (1, 1)
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate // 2, dilate // 2)
+                    m.padding = (dilate // 2, dilate // 2)
+            # other convoluions
+            else:
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate, dilate)
+                    m.padding = (dilate, dilate)
+
+    def get_num_features(self):
+        return self.num_features
+
+    def forward(self, x):
+        tuple_features = list()
+        x = self.resinit(x)
+        tuple_features.append(x)
+        x = self.maxpool(x)
+        tuple_features.append(x)
+        x = self.layer1(x)
+        tuple_features.append(x)
+        x = self.layer2(x)
+        tuple_features.append(x)
+        x = self.layer3(x)
+        tuple_features.append(x)
+        x = self.layer4(x)
+        tuple_features.append(x)
+
+        return tuple_features
+
+
+class ResNetBackbone(object):
+    def __init__(self, configer):
+        self.configer = configer
+        self.resnet_models = ResNetModels(self.configer)
+        self.resnext_models = ResNextModels(self.configer)
+        self.resnest_models = ResNeStModels(self.configer)
+
+        # if torch.__version__[:3] == '0.4':
+        #     self.dcn_resnet_models = DCNResNetModels(self.configer)
+
+    def __call__(self):
+        arch = self.configer.get('network', 'backbone')
+        multi_grid = None
+        if self.configer.exists('network', 'multi_grid'):
+            multi_grid = self.configer.get('network', 'multi_grid')
+
+        if arch == 'deepbase_resnet18':
+            orig_resnet = self.resnet_models.deepbase_resnet18()
+            arch_net = NormalResnetBackbone(orig_resnet)
+            arch_net.num_features = 512
+
+        elif arch == 'deepbase_resnet18_dilated8':
+            orig_resnet = self.resnet_models.deepbase_resnet18()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+            arch_net.num_features = 512
+
+        elif arch == 'deepbase_resnet18_dilated16':
+            orig_resnet = self.resnet_models.deepbase_resnet18()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+            arch_net.num_features = 512
+
+        elif arch == 'resnet34':
+            orig_resnet = self.resnet_models.resnet34()
+            arch_net = NormalResnetBackbone(orig_resnet)
+            arch_net.num_features = 512
+
+        elif arch == 'resnet34_dilated8':
+            orig_resnet = self.resnet_models.resnet34()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+            arch_net.num_features = 512
+
+        elif arch == 'resnet34_dilated16':
+            orig_resnet = self.resnet_models.resnet34()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+            arch_net.num_features = 512
+
+        elif arch == 'resnet50':
+            orig_resnet = self.resnet_models.resnet50()
+            arch_net = NormalResnetBackbone(orig_resnet)
+
+        elif arch == 'resnet50_dilated8':
+            orig_resnet = self.resnet_models.resnet50()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'resnet50_dilated16':
+            orig_resnet = self.resnet_models.resnet50()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet50':
+            orig_resnet = self.resnet_models.deepbase_resnet50()
+            arch_net = NormalResnetBackbone(orig_resnet)
+
+        elif arch == 'deepbase_resnet50_dilated8':
+            orig_resnet = self.resnet_models.deepbase_resnet50()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet50_dilated16':
+            orig_resnet = self.resnet_models.deepbase_resnet50()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        elif arch == 'resnet101':
+            orig_resnet = self.resnet_models.resnet101()
+            arch_net = NormalResnetBackbone(orig_resnet)
+
+        elif arch == 'resnet101_dilated8':
+            orig_resnet = self.resnet_models.resnet101()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'resnet101_dilated16':
+            orig_resnet = self.resnet_models.resnet101()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet101':
+            orig_resnet = self.resnet_models.deepbase_resnet101()
+            arch_net = NormalResnetBackbone(orig_resnet)
+
+        elif arch == 'deepbase_resnet101_dilated8':
+            orig_resnet = self.resnet_models.deepbase_resnet101()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet101_dilated16':
+            orig_resnet = self.resnet_models.deepbase_resnet101()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet152_dilated8':
+            orig_resnet = self.resnet_models.deepbase_resnet152()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'deepbase_resnet152_dilated16':
+            orig_resnet = self.resnet_models.deepbase_resnet152()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        # resnext models
+        elif arch == 'resnext101_32x8d_dilated8':
+            orig_resnet = self.resnext_models.resnext101_32x8d()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'resnext101_32x16d_dilated8':
+            orig_resnet = self.resnext_models.resnext101_32x16d()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'resnext101_32x32d_dilated8':
+            orig_resnet = self.resnext_models.resnext101_32x32d()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        elif arch == 'resnext101_32x48d_dilated8':
+            orig_resnet = self.resnext_models.resnext101_32x48d()
+            arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        # deformable resnet models
+        # elif arch == 'deepbase_dcn_resnet50_dilated8':
+        #     if torch.__version__[:3] != '0.4':
+        #         raise NotImplementedError
+        #     orig_dcn_resnet = self.dcn_resnet_models.deepbase_dcn_resnet50()
+        #     arch_net = DilatedResnetBackbone(orig_dcn_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        # elif arch == 'deepbase_dcn_resnet50_dilated16':
+        #     if torch.__version__[:3] != '0.4':
+        #         raise NotImplementedError
+        #     orig_dcn_resnet = self.dcn_resnet_models.deepbase_dcn_resnet50()
+        #     arch_net = DilatedResnetBackbone(orig_dcn_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        # elif arch == 'deepbase_dcn_resnet101_dilated8':
+        #     if torch.__version__[:3] != '0.4':
+        #         raise NotImplementedError
+        #     orig_dcn_resnet = self.dcn_resnet_models.deepbase_dcn_resnet101()
+        #     arch_net = DilatedResnetBackbone(orig_dcn_resnet, dilate_scale=8, multi_grid=multi_grid)
+
+        # elif arch == 'deepbase_dcn_resnet101_dilated16':
+        #     if torch.__version__[:3] != '0.4':
+        #         raise NotImplementedError
+        #     orig_dcn_resnet = self.dcn_resnet_models.deepbase_dcn_resnet101()
+        #     arch_net = DilatedResnetBackbone(orig_dcn_resnet, dilate_scale=16, multi_grid=multi_grid)
+
+        elif arch == 'wide_resnet16_dilated8':
+            arch_net = self.resnet_models.wide_resnet16()
+
+        elif arch == 'wide_resnet20_dilated8':
+            arch_net = self.resnet_models.wide_resnet20()
+
+        elif arch == 'wide_resnet38_dilated8':
+            arch_net = self.resnet_models.wide_resnet38()
+
+        # ResNeSt series: https://github.com/zhanghang1989/ResNeSt/blob/master/resnest/torch/resnest.py
+        elif arch == 'deepbase_resnest50_dilated8':
+            arch_net = self.resnest_models.deepbase_resnest50()
+
+        elif arch == 'deepbase_resnest101_dilated8':
+            arch_net = self.resnest_models.deepbase_resnest101()
+
+        elif arch == 'deepbase_resnest200_dilated8':
+            arch_net = self.resnest_models.deepbase_resnest200()
+
+        elif arch == 'deepbase_resnest269_dilated8':
+            arch_net = self.resnest_models.deepbase_resnest269()
+
+        else:
+            raise Exception('Architecture undefined!')
+
+        return arch_net
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_models.py
new file mode 100644
index 0000000..20fbd5a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnet_models.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com), Xiangtai(lxtpku@pku.edu.cn)
+# Modified by: RainbowSecret(yuyua@microsoft.com)
+# Select Seg Model for img segmentation.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from collections import OrderedDict
+import torch.nn as nn
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.models.backbones.resnet.wide_resnet_models import WiderResNetA2
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, bn_type=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, bn_type=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * 4)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = out + residual
+        out = self.relu_in(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, deep_base=False, bn_type=None):
+        super(ResNet, self).__init__()
+        self.inplanes = 128 if deep_base else 64
+        if deep_base:
+            self.resinit = nn.Sequential(OrderedDict([
+                ('conv1', nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)),
+                ('bn1', ModuleHelper.BatchNorm2d(bn_type=bn_type)(64)),
+                ('relu1', nn.ReLU(inplace=False)),
+                ('conv2', nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)),
+                ('bn2', ModuleHelper.BatchNorm2d(bn_type=bn_type)(64)),
+                ('relu2', nn.ReLU(inplace=False)),
+                ('conv3', nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)),
+                ('bn3', ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)),
+                ('relu3', nn.ReLU(inplace=False))]
+            ))
+        else:
+            self.resinit = nn.Sequential(OrderedDict([
+                ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)),
+                ('bn1', ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)),
+                ('relu1', nn.ReLU(inplace=False))]
+            ))
+
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)  # change.
+
+        self.layer1 = self._make_layer(block, 64, layers[0], bn_type=bn_type)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, bn_type=bn_type)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, bn_type=bn_type)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, bn_type=bn_type)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, ModuleHelper.BatchNorm2d(bn_type=bn_type, ret_cls=True)):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1, bn_type=None):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, bn_type=bn_type))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, bn_type=bn_type))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.resinit(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+class ResNetModels(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    def resnet18(self, **kwargs):
+        """Constructs a ResNet-18 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(BasicBlock, [2, 2, 2, 2], deep_base=False,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def deepbase_resnet18(self, **kwargs):
+        """Constructs a ResNet-18 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(BasicBlock, [2, 2, 2, 2], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def resnet34(self, **kwargs):
+        """Constructs a ResNet-34 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(BasicBlock, [3, 4, 6, 3], deep_base=False,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def deepbase_resnet34(self, **kwargs):
+        """Constructs a ResNet-34 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(BasicBlock, [3, 4, 6, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def resnet50(self, **kwargs):
+        """Constructs a ResNet-50 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 4, 6, 3], deep_base=False,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def deepbase_resnet50(self, **kwargs):
+        """Constructs a ResNet-50 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 4, 6, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def resnet101(self, **kwargs):
+        """Constructs a ResNet-101 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 4, 23, 3], deep_base=False,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def deepbase_resnet101(self, **kwargs):
+        """Constructs a ResNet-101 model.
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 4, 23, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'))
+        return model
+
+    def resnet152(self, **kwargs):
+        """Constructs a ResNet-152 model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 8, 36, 3], deep_base=False,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, all_match=False, pretrained=self.configer.get('network', 'pretrained'), network="resnet152")
+        return model
+
+    def deepbase_resnet152(self, **kwargs):
+        """Constructs a ResNet-152 model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on Places
+        """
+        model = ResNet(Bottleneck, [3, 8, 36, 3], deep_base=True,
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, all_match=False, pretrained=self.configer.get('network', 'pretrained'), network="resnet152")
+        return model
+
+    def wide_resnet16(self, **kwargs):
+        """Constructs a WideResNet-16 model.
+        """
+        model = WiderResNetA2([1, 1, 1, 1, 1, 1],
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="wide_resnet")
+        return model
+
+    def wide_resnet20(self, **kwargs):
+        """Constructs a WideResNet-20 model.
+        """
+        model = WiderResNetA2([1, 1, 1, 3, 1, 1],
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="wide_resnet")
+        return model
+
+    def wide_resnet38(self, **kwargs):
+        """Constructs a WideResNet-38 model.
+        """
+        model = WiderResNetA2([3, 3, 6, 3, 1, 1],
+                       bn_type=self.configer.get('network', 'bn_type'), **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="wide_resnet")
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnext_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnext_models.py
new file mode 100644
index 0000000..0acec5c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/resnext_models.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from collections import OrderedDict
+import torch.nn as nn
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, bn_type=None):
+        super(BasicBlock, self).__init__()
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out = out + identity
+        out = self.relu_in(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, bn_type=None):
+        super(Bottleneck, self).__init__()
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=False)
+        self.relu_in = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out = out + identity
+        out = self.relu_in(out)
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 bn_type=None):
+        super(ResNet, self).__init__()
+
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+
+        self.resinit = nn.Sequential(OrderedDict([
+            ('conv1', nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)),
+            ('bn1', ModuleHelper.BatchNorm2d(bn_type=bn_type)(self.inplanes)),
+            ('relu1', nn.ReLU(inplace=False))]
+        ))
+
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], bn_type=bn_type)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0], bn_type=bn_type)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1], bn_type=bn_type)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2], bn_type=bn_type)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False, bn_type=None):
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                ModuleHelper.BatchNorm2d(bn_type=bn_type)(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, bn_type=bn_type))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                bn_type=bn_type))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.resinit(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.reshape(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def ResNext(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+
+
+class ResNextModels(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+
+    def resnext101_32x8d(self, **kwargs):
+        """Constructs a ResNeXt-101 32x8d model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on ImageNet
+            progress (bool): If True, displays a progress bar of the download to stderr
+        """
+        pretrained = False
+        progress = False
+        kwargs['groups'] = 32
+        kwargs['width_per_group'] = 8
+        model = ResNext('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                       pretrained, progress, bn_type=self.configer.get('network', 'bn_type'),
+                       **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="resnext")
+        return model
+
+
+    def resnext101_32x16d(self, **kwargs):
+        """Constructs a ResNeXt-101 32x16d model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on ImageNet
+            progress (bool): If True, displays a progress bar of the download to stderr
+        """
+        pretrained = False
+        progress = False
+        kwargs['groups'] = 32
+        kwargs['width_per_group'] = 16
+        model = ResNext('resnext101_32x16d', Bottleneck, [3, 4, 23, 3],
+                       pretrained, progress, bn_type=self.configer.get('network', 'bn_type'),
+                       **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="resnext")
+        return model
+
+
+    def resnext101_32x32d(self, **kwargs):
+        """Constructs a ResNeXt-101 32x32d model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on ImageNet
+            progress (bool): If True, displays a progress bar of the download to stderr
+        """
+        pretrained = False
+        progress = False
+        kwargs['groups'] = 32
+        kwargs['width_per_group'] = 32
+        model = ResNext('resnext101_32x32d', Bottleneck, [3, 4, 23, 3],
+                       pretrained, progress, bn_type=self.configer.get('network', 'bn_type'),
+                       **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="resnext")
+        return model
+
+
+    def resnext101_32x48d(self, **kwargs):
+        """Constructs a ResNeXt-101 32x48d model.
+
+        Args:
+            pretrained (bool): If True, returns a model pre-trained on ImageNet
+            progress (bool): If True, displays a progress bar of the download to stderr
+        """
+        pretrained = False
+        progress = False
+        kwargs['groups'] = 32
+        kwargs['width_per_group'] = 48
+        model = ResNext('resnext101_32x48d', Bottleneck, [3, 4, 23, 3],
+                       pretrained, progress, bn_type=self.configer.get('network', 'bn_type'),
+                       **kwargs)
+        model = ModuleHelper.load_model(model, pretrained=self.configer.get('network', 'pretrained'), 
+            all_match=False, network="resnext")
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wide_resnet_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wide_resnet_models.py
new file mode 100644
index 0000000..d9aaf13
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wide_resnet_models.py
@@ -0,0 +1,198 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import torch.nn as nn
+from collections import OrderedDict
+from functools import partial
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper 
+
+
+
+class GlobalAvgPool2d(nn.Module):
+    def __init__(self):
+        """Global average pooling over the input's spatial dimensions"""
+        super(GlobalAvgPool2d, self).__init__()
+
+    def forward(self, inputs):
+        in_size = inputs.size()
+        return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
+
+
+class IdentityResidualBlock(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 channels,
+                 stride=1,
+                 dilation=1,
+                 groups=1,
+                 bn_type=None,
+                 dropout=None):
+        """Configurable identity-mapping residual block
+
+        Parameters
+        ----------
+        in_channels : int
+            Number of input channels.
+        channels : list of int
+            Number of channels in the internal feature maps. Can either have two or three elements: if three construct
+            a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
+            `3 x 3` then `1 x 1` convolutions.
+        stride : int
+            Stride of the first `3 x 3` convolution
+        dilation : int
+            Dilation to apply to the `3 x 3` convolutions.
+        groups : int
+            Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
+            bottleneck blocks.
+        bn_type : callable
+            Function to create normalization / activation Module.
+        dropout: callable
+            Function to create Dropout Module.
+        """
+        super(IdentityResidualBlock, self).__init__()
+
+        # Check parameters for inconsistencies
+        if len(channels) != 2 and len(channels) != 3:
+            raise ValueError("channels must contain either two or three values")
+        if len(channels) == 2 and groups != 1:
+            raise ValueError("groups > 1 are only valid if len(channels) == 3")
+
+        is_bottleneck = len(channels) == 3
+        need_proj_conv = stride != 1 or in_channels != channels[-1]
+
+        self.bn1 = ModuleHelper.BNReLU(in_channels, bn_type=bn_type)
+        if not is_bottleneck:
+            layers = [
+                ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
+                                    dilation=dilation)),
+                ("bn2", ModuleHelper.BNReLU(channels[0], bn_type=bn_type)),
+                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
+                                    dilation=dilation))
+            ]
+            if dropout is not None:
+                layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
+        else:
+            layers = [
+                ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
+                ("bn2", ModuleHelper.BNReLU(channels[0], bn_type=bn_type)),
+                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
+                                    groups=groups, dilation=dilation)),
+                ("bn3", ModuleHelper.BNReLU(channels[1], bn_type=bn_type)),
+                ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
+            ]
+            if dropout is not None:
+                layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
+        self.convs = nn.Sequential(OrderedDict(layers))
+
+        if need_proj_conv:
+            self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
+
+    def forward(self, x):
+        if hasattr(self, "proj_conv"):
+            bn1 = self.bn1(x)
+            shortcut = self.proj_conv(bn1)
+        else:
+            shortcut = x.clone()
+            bn1 = self.bn1(x)
+
+        out = self.convs(bn1)
+        out.add_(shortcut)
+
+        return out
+
+
+class WiderResNetA2(nn.Module):
+    def __init__(self,
+                 structure=[3, 3, 6, 3, 1, 1],
+                 bn_type=None,
+                 classes=0,
+                 dilation=True):
+        """Wider ResNet with pre-activation (identity mapping) blocks
+
+        This variant uses down-sampling by max-pooling in the first two blocks and by strided convolution in the others.
+
+        Parameters
+        ----------
+        structure : list of int
+            Number of residual blocks in each of the six modules of the network.
+        bn_type : callable
+            Function to create normalization / activation Module.
+        classes : int
+            If not `0` also include global average pooling and a fully-connected layer with `classes` outputs at the end
+            of the network.
+        dilation : bool
+            If `True` apply dilation to the last three modules and change the down-sampling factor from 32 to 8.
+        """
+        super(WiderResNetA2, self).__init__()
+        self.structure = structure
+        self.dilation = dilation
+
+        if len(structure) != 6:
+            raise ValueError("Expected a structure with six values")
+
+        # Initial layers
+        self.mod1 = nn.Sequential(OrderedDict([
+            ("conv1", nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False))
+        ]))
+
+        # Groups of residual blocks
+        in_channels = 64
+        channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048), (1024, 2048, 4096)]
+        for mod_id, num in enumerate(structure):
+            # Create blocks for module
+            blocks = []
+            for block_id in range(num):
+                if not dilation:
+                    dil = 1
+                    stride = 2 if block_id == 0 and 2 <= mod_id <= 4 else 1
+                else:
+                    if mod_id == 3:
+                        dil = 2
+                    elif mod_id > 3:
+                        dil = 4
+                    else:
+                        dil = 1
+                    stride = 2 if block_id == 0 and mod_id == 2 else 1
+
+                if mod_id == 4:
+                    drop = None
+                elif mod_id == 5:
+                    drop = None
+                else:
+                    drop = None
+
+                blocks.append((
+                    "block%d" % (block_id + 1),
+                    IdentityResidualBlock(in_channels, channels[mod_id], bn_type=bn_type, stride=stride, dilation=dil,
+                                          dropout=drop)
+                ))
+
+                # Update channels and p_keep
+                in_channels = channels[mod_id][-1]
+
+            # Create module
+            if mod_id < 2:
+                self.add_module("pool%d" % (mod_id + 2), nn.MaxPool2d(3, stride=2, padding=1, ceil_mode=True))
+            self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks)))
+
+        self.bn_out = ModuleHelper.BNReLU(in_channels, bn_type=bn_type)
+
+
+    def forward(self, img):
+        tuple_features = list()
+        out = self.mod1(img)
+        out = self.mod2(self.pool2(out))
+        out = self.mod3(self.pool3(out))
+        out = self.mod4(out)
+        tuple_features.append(out)
+        out = self.mod5(out)
+        tuple_features.append(out)
+        out = self.mod6(out)
+        tuple_features.append(out)
+        out = self.mod7(out)
+        out = self.bn_out(out)
+        tuple_features.append(out)
+        return tuple_features
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wsl_resnext_models.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wsl_resnext_models.py
new file mode 100644
index 0000000..d28f8ad
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/resnet/wsl_resnext_models.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Optional list of dependencies required by the package
+dependencies = ['torch', 'torchvision']
+
+from torch.hub import load_state_dict_from_url
+from torchvision.models.resnet import ResNet, Bottleneck
+
+
+model_urls = {
+    'resnext101_32x8d': 'https://download.pytorch.org/models/ig_resnext101_32x8-c38310e5.pth',
+    'resnext101_32x16d': 'https://download.pytorch.org/models/ig_resnext101_32x16-c6f796b0.pth',
+    'resnext101_32x32d': 'https://download.pytorch.org/models/ig_resnext101_32x32-e4b90b00.pth',
+    'resnext101_32x48d': 'https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth',
+}
+
+
+def _resnext(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
+    model.load_state_dict(state_dict)
+    return model
+
+
+def resnext101_32x8d_wsl(progress=True, **kwargs):
+    """Constructs a ResNeXt-101 32x8 model pre-trained on weakly-supervised data
+    and finetuned on ImageNet from Figure 5 in
+    `"Exploring the Limits of Weakly Supervised Pretraining" <https://arxiv.org/abs/1805.00932>`_
+
+    Args:
+        progress (bool): If True, displays a progress bar of the download to stderr.
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnext('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs)
+
+
+def resnext101_32x16d_wsl(progress=True, **kwargs):
+    """Constructs a ResNeXt-101 32x16 model pre-trained on weakly-supervised data
+    and finetuned on ImageNet from Figure 5 in
+    `"Exploring the Limits of Weakly Supervised Pretraining" <https://arxiv.org/abs/1805.00932>`_
+
+    Args:
+        progress (bool): If True, displays a progress bar of the download to stderr.
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 16
+    return _resnext('resnext101_32x16d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs)
+
+
+def resnext101_32x32d_wsl(progress=True, **kwargs):
+    """Constructs a ResNeXt-101 32x32 model pre-trained on weakly-supervised data
+    and finetuned on ImageNet from Figure 5 in
+    `"Exploring the Limits of Weakly Supervised Pretraining" <https://arxiv.org/abs/1805.00932>`_
+
+    Args:
+        progress (bool): If True, displays a progress bar of the download to stderr.
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 32
+    return _resnext('resnext101_32x32d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs)
+
+
+def resnext101_32x48d_wsl(progress=True, **kwargs):
+    """Constructs a ResNeXt-101 32x48 model pre-trained on weakly-supervised data
+    and finetuned on ImageNet from Figure 5 in
+    `"Exploring the Limits of Weakly Supervised Pretraining" <https://arxiv.org/abs/1805.00932>`_
+
+    Args:
+        progress (bool): If True, displays a progress bar of the download to stderr.
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 48
+    return _resnext('resnext101_32x48d', Bottleneck, [3, 4, 23, 3], True, progress, **kwargs)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/helper.py
new file mode 100644
index 0000000..f4fbb19
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/helper.py
@@ -0,0 +1,18 @@
+import torch.nn as nn
+
+
+class IntermediateSequential(nn.Sequential):
+    def __init__(self, *args, return_intermediate=True):
+        super().__init__(*args)
+        self.return_intermediate = return_intermediate
+
+    def forward(self, input):
+        if not self.return_intermediate:
+            return super().forward(input)
+
+        intermediate_outputs = {}
+        output = input
+        for name, module in self.named_children():
+            output = intermediate_outputs[name] = module(output)
+
+        return output, intermediate_outputs
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/position_encoding.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/position_encoding.py
new file mode 100644
index 0000000..340af78
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/position_encoding.py
@@ -0,0 +1,41 @@
+import torch
+import torch.nn as nn
+
+
+class FixedPositionalEncoding(nn.Module):
+    def __init__(self, embedding_dim, max_length=5000):
+        super(FixedPositionalEncoding, self).__init__()
+
+        pe = torch.zeros(max_length, embedding_dim)
+        position = torch.arange(0, max_length, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, embedding_dim, 2).float()
+            * (-torch.log(torch.tensor(10000.0)) / embedding_dim)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        x = x + self.pe[: x.size(0), :]
+        return x
+
+
+class LearnedPositionalEncoding(nn.Module):
+    def __init__(self, max_position_embeddings, embedding_dim, seq_length):
+        super(LearnedPositionalEncoding, self).__init__()
+        self.pe = nn.Embedding(max_position_embeddings, embedding_dim)
+        self.seq_length = seq_length
+
+        self.register_buffer(
+            "position_ids",
+            torch.arange(max_position_embeddings).expand((1, -1)),
+        )
+
+    def forward(self, x, position_ids=None):
+        if position_ids is None:
+            position_ids = self.position_ids[:, : self.seq_length]
+
+        position_embeddings = self.pe(position_ids)
+        return x + position_embeddings
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/setr.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/setr.py
new file mode 100644
index 0000000..27bf2c0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/setr.py
@@ -0,0 +1,721 @@
+import torch
+import torch.nn as nn
+from lib.models.backbones.vit.transformer import TransformerModel
+from lib.models.backbones.vit.position_encoding import (
+    FixedPositionalEncoding,
+    LearnedPositionalEncoding,
+)
+from lib.models.backbones.vit.helper import IntermediateSequential
+
+__all__ = [
+    'SETR_Naive_S',
+    'SETR_Naive_L',
+    'SETR_Naive_H',
+    'SETR_PUP_S',
+    'SETR_PUP_L',
+    'SETR_PUP_H',
+    'SETR_MLA_S',
+    'SETR_MLA_L',
+    'SETR_MLA_H',
+]
+
+class SegmentationTransformer(nn.Module):
+    def __init__(
+        self,
+        img_dim,
+        patch_dim,
+        num_channels,
+        embedding_dim,
+        num_heads,
+        num_layers,
+        hidden_dim,
+        dropout_rate=0.0,
+        attn_dropout_rate=0.0,
+        conv_patch_representation=False,
+        positional_encoding_type="learned",
+    ):
+        super(SegmentationTransformer, self).__init__()
+
+        assert embedding_dim % num_heads == 0
+        assert img_dim % patch_dim == 0
+
+        self.img_dim = img_dim
+        self.embedding_dim = embedding_dim
+        self.num_heads = num_heads
+        self.patch_dim = patch_dim
+        self.num_channels = num_channels
+        self.dropout_rate = dropout_rate
+        self.attn_dropout_rate = attn_dropout_rate
+        self.conv_patch_representation = conv_patch_representation
+
+        self.num_patches = int((img_dim // patch_dim) ** 2)
+        self.seq_length = self.num_patches
+        self.flatten_dim = patch_dim * patch_dim * num_channels
+
+        self.linear_encoding = nn.Linear(self.flatten_dim, embedding_dim)
+        if positional_encoding_type == "learned":
+            self.position_encoding = LearnedPositionalEncoding(
+                self.seq_length, self.embedding_dim, self.seq_length
+            )
+        elif positional_encoding_type == "fixed":
+            self.position_encoding = FixedPositionalEncoding(
+                self.embedding_dim,
+            )
+
+        self.pe_dropout = nn.Dropout(p=self.dropout_rate)
+
+        self.transformer = TransformerModel(
+            embedding_dim,
+            num_layers,
+            num_heads,
+            hidden_dim,
+            self.dropout_rate,
+            self.attn_dropout_rate,
+        )
+        self.pre_head_ln = nn.LayerNorm(embedding_dim)
+
+        if self.conv_patch_representation:
+            self.conv_x = nn.Conv2d(
+                self.num_channels,
+                self.embedding_dim,
+                kernel_size=(self.patch_dim, self.patch_dim),
+                stride=(self.patch_dim, self.patch_dim),
+                padding=self._get_padding(
+                    'VALID', (self.patch_dim, self.patch_dim),
+                ),
+            )
+        else:
+            self.conv_x = None
+
+    def _init_decode(self):
+        raise NotImplementedError("Should be implemented in child class!!")
+
+    def encode(self, x):
+        n, c, h, w = x.shape
+        if self.conv_patch_representation:
+            # combine embedding w/ conv patch distribution
+            x = self.conv_x(x)
+            x = x.permute(0, 2, 3, 1).contiguous()
+            x = x.view(x.size(0), -1, self.embedding_dim)
+        else:
+            x = (
+                x.unfold(2, self.patch_dim, self.patch_dim)
+                .unfold(3, self.patch_dim, self.patch_dim)
+                .contiguous()
+            )
+            x = x.view(n, c, -1, self.patch_dim ** 2)
+            x = x.permute(0, 2, 3, 1).contiguous()
+            x = x.view(x.size(0), -1, self.flatten_dim)
+            x = self.linear_encoding(x)
+
+        x = self.position_encoding(x)
+        x = self.pe_dropout(x)
+
+        # apply transformer
+        x, intmd_x = self.transformer(x)
+        x = self.pre_head_ln(x)
+
+        return x, intmd_x
+
+    def decode(self, x):
+        raise NotImplementedError("Should be implemented in child class!!")
+
+    def forward(self, x, auxillary_output_layers=None):
+        encoder_output, intmd_encoder_outputs = self.encode(x)
+        decoder_output = self.decode(
+            encoder_output, intmd_encoder_outputs, auxillary_output_layers
+        )
+
+        if auxillary_output_layers is not None:
+            auxillary_outputs = {}
+            for i in auxillary_output_layers:
+                val = str(2 * i - 1)
+                _key = 'Z' + str(i)
+                auxillary_outputs[_key] = intmd_encoder_outputs[val]
+
+            return decoder_output, auxillary_outputs
+
+        return decoder_output
+
+    def _get_padding(self, padding_type, kernel_size):
+        assert padding_type in ['SAME', 'VALID']
+        if padding_type == 'SAME':
+            _list = [(k - 1) // 2 for k in kernel_size]
+            return tuple(_list)
+        return tuple(0 for _ in kernel_size)
+
+    def _reshape_output(self, x):
+        x = x.view(
+            x.size(0),
+            int(self.img_dim / self.patch_dim),
+            int(self.img_dim / self.patch_dim),
+            self.embedding_dim,
+        )
+        x = x.permute(0, 3, 1, 2).contiguous()
+        return x
+
+
+class SETR_Naive(SegmentationTransformer):
+    def __init__(
+        self,
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim,
+        num_heads,
+        num_layers,
+        hidden_dim,
+        dropout_rate=0.0,
+        attn_dropout_rate=0.0,
+        conv_patch_representation=False,
+        positional_encoding_type="learned",
+    ):
+        super(SETR_Naive, self).__init__(
+            img_dim=img_dim,
+            patch_dim=patch_dim,
+            num_channels=num_channels,
+            embedding_dim=embedding_dim,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            hidden_dim=hidden_dim,
+            dropout_rate=dropout_rate,
+            attn_dropout_rate=attn_dropout_rate,
+            conv_patch_representation=conv_patch_representation,
+            positional_encoding_type=positional_encoding_type,
+        )
+
+        self.num_classes = num_classes
+        self._init_decode()
+
+    def _init_decode(self):
+        self.conv1 = nn.Conv2d(
+            in_channels=self.embedding_dim,
+            out_channels=self.embedding_dim,
+            kernel_size=1,
+            stride=1,
+            padding=self._get_padding('VALID', (1, 1),),
+        )
+        self.bn1 = nn.BatchNorm2d(self.embedding_dim)
+        self.act1 = nn.ReLU()
+        self.conv2 = nn.Conv2d(
+            in_channels=self.embedding_dim,
+            out_channels=self.num_classes,
+            kernel_size=1,
+            stride=1,
+            padding=self._get_padding('VALID', (1, 1),),
+        )
+        self.upsample = nn.Upsample(
+            scale_factor=self.patch_dim, mode='bilinear'
+        )
+
+    def decode(self, x, intmd_x, intmd_layers=None):
+        x = self._reshape_output(x)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.conv2(x)
+        x = self.upsample(x)
+        return x
+
+
+class SETR_PUP(SegmentationTransformer):
+    def __init__(
+        self,
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim,
+        num_heads,
+        num_layers,
+        hidden_dim,
+        dropout_rate=0.0,
+        attn_dropout_rate=0.0,
+        conv_patch_representation=False,
+        positional_encoding_type="learned",
+    ):
+        super(SETR_PUP, self).__init__(
+            img_dim=img_dim,
+            patch_dim=patch_dim,
+            num_channels=num_channels,
+            embedding_dim=embedding_dim,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            hidden_dim=hidden_dim,
+            dropout_rate=dropout_rate,
+            attn_dropout_rate=attn_dropout_rate,
+            conv_patch_representation=conv_patch_representation,
+            positional_encoding_type=positional_encoding_type,
+        )
+
+        self.num_classes = num_classes
+        self._init_decode()
+
+    def _init_decode(self):
+        extra_in_channels = int(self.embedding_dim / 4)
+        in_channels = [
+            self.embedding_dim,
+            extra_in_channels,
+            extra_in_channels,
+            extra_in_channels,
+            extra_in_channels,
+        ]
+        out_channels = [
+            extra_in_channels,
+            extra_in_channels,
+            extra_in_channels,
+            extra_in_channels,
+            self.num_classes,
+        ]
+
+        modules = []
+        for i, (in_channel, out_channel) in enumerate(
+            zip(in_channels, out_channels)
+        ):
+            modules.append(
+                nn.Conv2d(
+                    in_channels=in_channel,
+                    out_channels=out_channel,
+                    kernel_size=1,
+                    stride=1,
+                    padding=self._get_padding('VALID', (1, 1),),
+                )
+            )
+            if i != 4:
+                modules.append(nn.Upsample(scale_factor=2, mode='bilinear'))
+        self.decode_net = IntermediateSequential(
+            *modules, return_intermediate=False
+        )
+
+    def decode(self, x, intmd_x, intmd_layers=None):
+        x = self._reshape_output(x)
+        x = self.decode_net(x)
+        return x
+
+
+class SETR_MLA(SegmentationTransformer):
+    def __init__(
+        self,
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim,
+        num_heads,
+        num_layers,
+        hidden_dim,
+        dropout_rate=0.0,
+        attn_dropout_rate=0.0,
+        conv_patch_representation=False,
+        positional_encoding_type="learned",
+    ):
+        super(SETR_MLA, self).__init__(
+            img_dim=img_dim,
+            patch_dim=patch_dim,
+            num_channels=num_channels,
+            embedding_dim=embedding_dim,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            hidden_dim=hidden_dim,
+            dropout_rate=dropout_rate,
+            attn_dropout_rate=attn_dropout_rate,
+            conv_patch_representation=conv_patch_representation,
+            positional_encoding_type=positional_encoding_type,
+        )
+
+        self.num_classes = num_classes
+        self._init_decode()
+
+    def _init_decode(self):
+        self.net1_in, self.net1_intmd, self.net1_out = self._define_agg_net()
+        self.net2_in, self.net2_intmd, self.net2_out = self._define_agg_net()
+        self.net3_in, self.net3_intmd, self.net3_out = self._define_agg_net()
+        self.net4_in, self.net4_intmd, self.net4_out = self._define_agg_net()
+
+        # fmt: off
+        self.output_net = IntermediateSequential(return_intermediate=False)
+        self.output_net.add_module(
+            "conv_1",
+            nn.Conv2d(
+                in_channels=self.embedding_dim, out_channels=self.num_classes,
+                kernel_size=1, stride=1,
+                padding=self._get_padding('VALID', (1, 1),),
+            )
+        )
+        self.output_net.add_module(
+            "upsample_1",
+            nn.Upsample(scale_factor=4, mode='bilinear')
+        )
+        # fmt: on
+
+    def decode(self, x, intmd_x, intmd_layers=None):
+        assert intmd_layers is not None, "pass the intermediate layers for MLA"
+
+        encoder_outputs = {}
+        all_keys = []
+        for i in intmd_layers:
+            val = str(2 * i - 1)
+            _key = 'Z' + str(i)
+            all_keys.append(_key)
+            encoder_outputs[_key] = intmd_x[val]
+        all_keys.reverse()
+
+        temp_x = encoder_outputs[all_keys[0]]
+        temp_x = self._reshape_output(temp_x)
+        key0_intmd_in = self.net1_in(temp_x)
+        key0_out = self.net1_out(key0_intmd_in)
+
+        temp_x = encoder_outputs[all_keys[1]]
+        temp_x = self._reshape_output(temp_x)
+        key1_in = self.net2_in(temp_x)
+        key1_intmd_in = key1_in + key0_intmd_in
+        key1_intmd_out = self.net2_intmd(key1_intmd_in)
+        key1_out = self.net2_out(key1_intmd_out)
+
+        temp_x = encoder_outputs[all_keys[2]]
+        temp_x = self._reshape_output(temp_x)
+        key2_in = self.net3_in(temp_x)
+        key2_intmd_in = key2_in + key1_intmd_in
+        key2_intmd_out = self.net3_intmd(key2_intmd_in)
+        key2_out = self.net3_out(key2_intmd_out)
+
+        temp_x = encoder_outputs[all_keys[3]]
+        temp_x = self._reshape_output(temp_x)
+        key3_in = self.net4_in(temp_x)
+        key3_intmd_in = key3_in + key2_intmd_in
+        key3_intmd_out = self.net4_intmd(key3_intmd_in)
+        key3_out = self.net4_out(key3_intmd_out)
+
+        out = torch.cat((key0_out, key1_out, key2_out, key3_out), dim=1)
+        out = self.output_net(out)
+        return out
+
+    # fmt: off
+    def _define_agg_net(self):
+        model_in = IntermediateSequential(return_intermediate=False)
+        model_in.add_module(
+            "layer_1",
+            nn.Conv2d(
+                self.embedding_dim, int(self.embedding_dim / 2), 1, 1,
+                padding=self._get_padding('VALID', (1, 1),),
+            ),
+        )
+
+        model_intmd = IntermediateSequential(return_intermediate=False)
+        model_intmd.add_module(
+            "layer_intmd",
+            nn.Conv2d(
+                int(self.embedding_dim / 2), int(self.embedding_dim / 2), 3, 1,
+                padding=self._get_padding('SAME', (3, 3),),
+            ),
+        )
+
+        model_out = IntermediateSequential(return_intermediate=False)
+        model_out.add_module(
+            "layer_2",
+            nn.Conv2d(
+                int(self.embedding_dim / 2), int(self.embedding_dim / 2), 3, 1,
+                padding=self._get_padding('SAME', (3, 3),),
+            ),
+        )
+        model_out.add_module(
+            "layer_3",
+            nn.Conv2d(
+                int(self.embedding_dim / 2), int(self.embedding_dim / 4), 3, 1,
+                padding=self._get_padding('SAME', (3, 3),),
+            ),
+        )
+        model_out.add_module(
+            "upsample", nn.Upsample(scale_factor=4, mode='bilinear')
+        )
+        return model_in, model_intmd, model_out
+    # fmt: on
+
+
+def SETR_Naive_S(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = None
+    model = SETR_Naive(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=768,
+        num_heads=12,
+        num_layers=12,
+        hidden_dim=3072,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_Naive_L(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = [10, 15, 20]
+    model = SETR_Naive(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1024,
+        num_heads=16,
+        num_layers=24,
+        hidden_dim=4096,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_Naive_H(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = None
+    model = SETR_Naive(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1280,
+        num_heads=16,
+        num_layers=32,
+        hidden_dim=5120,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_PUP_S(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = None
+    model = SETR_PUP(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=768,
+        num_heads=12,
+        num_layers=12,
+        hidden_dim=3072,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_PUP_L(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = [10, 15, 20, 24]
+    model = SETR_PUP(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1024,
+        num_heads=16,
+        num_layers=24,
+        hidden_dim=4096,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_PUP_H(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = [10, 15, 20, 24]
+    model = SETR_PUP(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1280,
+        num_heads=16,
+        num_layers=32,
+        hidden_dim=5120,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_MLA_S(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = None
+    model = SETR_MLA(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=768,
+        num_heads=12,
+        num_layers=12,
+        hidden_dim=3072,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_MLA_L(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = [6, 12, 18, 24]
+    model = SETR_MLA(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1024,
+        num_heads=16,
+        num_layers=24,
+        hidden_dim=4096,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
+
+
+def SETR_MLA_H(dataset='cityscapes', _conv_repr=False, _pe_type="learned"):
+    if dataset.lower() == 'cityscapes':
+        img_dim = 768
+        num_classes = 19
+    elif dataset.lower() == 'ade20k':
+        img_dim = 512
+        num_classes = 150
+    elif dataset.lower() == 'pascal':
+        img_dim = 480
+        num_classes = 59
+
+    num_channels = 3
+    patch_dim = 16
+    aux_layers = [8, 16, 24, 32]
+    model = SETR_MLA(
+        img_dim,
+        patch_dim,
+        num_channels,
+        num_classes,
+        embedding_dim=1280,
+        num_heads=16,
+        num_layers=32,
+        hidden_dim=5120,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+        conv_patch_representation=_conv_repr,
+        positional_encoding_type=_pe_type,
+    )
+
+    return aux_layers, model
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/transformer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/transformer.py
new file mode 100644
index 0000000..639daf1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/backbones/vit/transformer.py
@@ -0,0 +1,119 @@
+import torch.nn as nn
+from lib.models.backbones.vit.helper import IntermediateSequential
+
+
+class SelfAttention(nn.Module):
+    def __init__(
+        self, dim, heads=8, qkv_bias=False, qk_scale=None, dropout_rate=0.0
+    ):
+        super().__init__()
+        self.num_heads = heads
+        head_dim = dim // heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(dropout_rate)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(dropout_rate)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = (
+            self.qkv(x)
+            .reshape(B, N, 3, self.num_heads, C // self.num_heads)
+            .permute(2, 0, 3, 1, 4)
+        )
+        q, k, v = (
+            qkv[0],
+            qkv[1],
+            qkv[2],
+        )  # make torchscript happy (cannot use tensor as tuple)
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x):
+        return self.fn(x) + x
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x):
+        return self.fn(self.norm(x))
+
+
+class PreNormDrop(nn.Module):
+    def __init__(self, dim, dropout_rate, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.dropout = nn.Dropout(p=dropout_rate)
+        self.fn = fn
+
+    def forward(self, x):
+        return self.dropout(self.fn(self.norm(x)))
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout_rate):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(p=dropout_rate),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(p=dropout_rate),
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class TransformerModel(nn.Module):
+    def __init__(
+        self,
+        dim,
+        depth,
+        heads,
+        mlp_dim,
+        dropout_rate=0.1,
+        attn_dropout_rate=0.1,
+    ):
+        super().__init__()
+        layers = []
+        for _ in range(depth):
+            layers.extend(
+                [
+                    Residual(
+                        PreNormDrop(
+                            dim,
+                            dropout_rate,
+                            SelfAttention(
+                                dim, heads=heads, dropout_rate=attn_dropout_rate
+                            ),
+                        )
+                    ),
+                    Residual(
+                        PreNorm(dim, FeedForward(dim, mlp_dim, dropout_rate))
+                    ),
+                ]
+            )
+        self.net = IntermediateSequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/model_manager.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/model_manager.py
new file mode 100644
index 0000000..25ed0a0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/model_manager.py
@@ -0,0 +1,99 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Microsoft Research
+## Author: RainbowSecret, LangHuang, JingyiXie, JianyuanGuo
+## Copyright (c) 2019
+## yuyua@microsoft.com
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# Our approaches including FCN baseline, HRNet, OCNet, ISA, OCR
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# FCN baseline 
+from models.protoseg_core.lib.models.nets.fcnet import FcnNet
+
+# OCR
+from models.protoseg_core.lib.models.nets.ocrnet import SpatialOCRNet, ASPOCRNet
+from models.protoseg_core.lib.models.nets.ideal_ocrnet import IdealSpatialOCRNet, IdealSpatialOCRNetB, IdealSpatialOCRNetC, IdealGatherOCRNet, IdealDistributeOCRNet
+
+# HRNet
+from models.protoseg_core.lib.models.nets.hrnet import HRNet_W48, HRNet_W48_CONTRAST
+from models.protoseg_core.lib.models.nets.hrnet import HRNet_W48_OCR, HRNet_W48_OCR_B, HRNet_W48_OCR_B_HA, HRNet_W48_OCR_CONTRAST, HRNet_W48_MEM, HRNet_W48_Proto
+
+# OCNet
+from models.protoseg_core.lib.models.nets.ocnet import BaseOCNet, AspOCNet
+
+# ISA Net
+from models.protoseg_core.lib.models.nets.isanet import ISANet
+
+# CE2P
+from models.protoseg_core.lib.models.nets.ce2pnet import CE2P_OCRNet, CE2P_IdealOCRNet, CE2P_ASPOCR
+
+# SegFix
+from models.protoseg_core.lib.models.nets.segfix import SegFix_HRNet
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+from models.protoseg_core.lib.models.nets.deeplab import DeepLabV3, DeepLabV3Contrast
+
+from models.protoseg_core.lib.models.nets.ms_ocrnet import MscaleOCR
+
+SEG_MODEL_DICT = {
+    # SegFix
+    'segfix_hrnet': SegFix_HRNet,
+    # OCNet series
+    'base_ocnet': BaseOCNet,
+    'asp_ocnet': AspOCNet,
+    # ISA Net
+    'isanet': ISANet,
+    # OCR series
+    'spatial_ocrnet': SpatialOCRNet,
+    'spatial_asp_ocrnet': ASPOCRNet,
+    # OCR series with ground-truth   
+    'ideal_spatial_ocrnet': IdealSpatialOCRNet,
+    'ideal_spatial_ocrnet_b': IdealSpatialOCRNetB,
+    'ideal_spatial_ocrnet_c': IdealSpatialOCRNetC, 
+    'ideal_gather_ocrnet': IdealGatherOCRNet,
+    'ideal_distribute_ocrnet': IdealDistributeOCRNet,
+    # HRNet series
+    'hrnet_w48': HRNet_W48,
+    'hrnet_w48_ocr': HRNet_W48_OCR,
+    'hrnet_w48_ocr_b': HRNet_W48_OCR_B,
+    # CE2P series
+    'ce2p_asp_ocrnet': CE2P_ASPOCR,
+    'ce2p_ocrnet': CE2P_OCRNet,
+    'ce2p_ideal_ocrnet': CE2P_IdealOCRNet, 
+    # baseline series
+    'fcnet': FcnNet,
+    'hrnet_w48_contrast': HRNet_W48_CONTRAST,
+    'hrnet_w48_ocr_contrast': HRNet_W48_OCR_CONTRAST,
+    'hrnet_w48_mem': HRNet_W48_MEM,
+    'deeplab_v3': DeepLabV3,
+    'deeplab_v3_contrast': DeepLabV3Contrast,
+    'ms_ocr': MscaleOCR,
+    'hrnet_w48_ocr_b_ha': HRNet_W48_OCR_B_HA,
+    'hrnet_w48_proto': HRNet_W48_Proto
+}
+
+
+class ModelManager(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def semantic_segmentor(self):
+        model_name = self.configer.get('network', 'model_name')
+
+        if model_name not in SEG_MODEL_DICT:
+            Log.error('Model: {} not valid!'.format(model_name))
+            exit(1)
+
+        model = SEG_MODEL_DICT[model_name](self.configer)
+
+        return model
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/asp_oc_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/asp_oc_block.py
new file mode 100644
index 0000000..09bdcb7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/asp_oc_block.py
@@ -0,0 +1,110 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from lib.models.modules.base_oc_block import BaseOC_Context_Module
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class ASP_OC_Module(nn.Module):
+    def __init__(self, features, out_features=256, dilations=(12, 24, 36), bn_type=None, dropout=0.1):
+        super(ASP_OC_Module, self).__init__()
+        self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True),
+                                     ModuleHelper.BNReLU(out_features, bn_type=bn_type),
+                                     BaseOC_Context_Module(in_channels=out_features, out_channels=out_features,
+                                                              key_channels=out_features//2, value_channels=out_features//2,
+                                                              dropout=0, sizes=([2]), bn_type=bn_type))
+        self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
+                                   ModuleHelper.BNReLU(out_features, bn_type=bn_type))
+        self.conv3 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
+                                   ModuleHelper.BNReLU(out_features, bn_type=bn_type))
+        self.conv4 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
+                                   ModuleHelper.BNReLU(out_features, bn_type=bn_type))
+        self.conv5 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
+                                   ModuleHelper.BNReLU(out_features, bn_type=bn_type))
+
+        self.conv_bn_dropout = nn.Sequential(
+            nn.Conv2d(out_features * 5, out_features * 2, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(out_features * 2, bn_type=bn_type),
+            nn.Dropout2d(dropout)
+            )
+
+    def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
+        assert(len(feat1)==len(feat2))
+        z = []
+        for i in range(len(feat1)):
+            z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
+        return z
+
+    def forward(self, x):
+        if isinstance(x, Variable):
+            _, _, h, w = x.size()
+        elif isinstance(x, tuple) or isinstance(x, list):
+            _, _, h, w = x[0].size()
+        else:
+            raise RuntimeError('unknown input type')
+
+        feat1 = self.context(x)
+        feat2 = self.conv2(x)
+        feat3 = self.conv3(x)
+        feat4 = self.conv4(x)
+        feat5 = self.conv5(x)
+
+        if isinstance(x, Variable):
+            out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
+        elif isinstance(x, tuple) or isinstance(x, list):
+            out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
+        else:
+            raise RuntimeError('unknown input type')
+
+        output = self.conv_bn_dropout(out)
+        return output
+
+
+if __name__ == "__main__":
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    feats = torch.randn((1, 2048, 128, 128)).cuda()
+
+    conv_3x3 = nn.Sequential(
+        nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
+        ModuleHelper.BNReLU(512, bn_type='torchsyncbn'),
+    )
+    aspoc_infer = ASP_OC_Module(512,
+                                256,
+                                bn_type='torchsyncbn')
+
+    aspoc_infer.eval()
+    conv_3x3.eval()
+    aspoc_infer.cuda()
+    conv_3x3.cuda()
+
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    avg_time = 0
+    avg_mem  = 0
+    import time
+    # with torch.no_grad()
+    for i in range(100):
+        start_time = time.time()
+        outputs = conv_3x3(feats)
+        outputs = aspoc_infer(outputs)
+        torch.cuda.synchronize()
+        avg_time += (time.time() - start_time)
+        avg_mem  += (torch.cuda.memory_allocated()-feats.element_size() * feats.nelement())
+
+    print("Average Parameters : {}".format(count_parameters(aspoc_infer)+count_parameters(conv_3x3)))
+    print("Average Running Time: {}".format(avg_time/100))
+    print("Average GPU Memory: {}".format(avg_mem/100))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/aspp_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/aspp_block.py
new file mode 100644
index 0000000..ec86ffc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/aspp_block.py
@@ -0,0 +1,133 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from lib.models.tools.module_helper import ModuleHelper
+from functools import partial
+
+
+class ASPPModuleV2(nn.Module):
+    """
+    Reference:
+        Chen, Liang-Chieh, et al. 
+        *"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs."*
+    """
+    def __init__(self, features, inner_features=512, out_features=512, dilations=(12, 24, 36), bn_type=None, dropout=0.1):
+        super(ASPPModuleV2, self).__init__()
+        self.conv_1x1 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_1 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_2 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_3 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.fuse = nn.Sequential(
+            nn.Conv2d(inner_features * 4, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(out_features, bn_type=bn_type),
+            nn.Dropout2d(dropout),
+        )
+
+    def forward(self, x):
+        _, _, h, w = x.size()
+        feat1 = self.conv_1x1(x)
+        feat2 = self.conv_3x3_1(x)
+        feat3 = self.conv_3x3_2(x)
+        feat4 = self.conv_3x3_3(x)
+        out = torch.cat((feat1, feat2, feat3, feat4), 1)
+        out = self.fuse(out)
+        return out
+
+class ASPPModule(nn.Module):
+    """
+    Reference:
+        Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
+    """
+    def __init__(self, features, inner_features=256, out_features=256, dilations=(12, 24, 36), bn_type=None, dropout=0.1):
+        super(ASPPModule, self).__init__()
+
+        self.conv_gp = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+                                   nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
+                                             bias=False),
+                                   ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+
+        self.conv_1x1 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_1 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_2 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+        self.conv_3x3_3 = nn.Sequential(
+            nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
+            ModuleHelper.BNReLU(inner_features, bn_type=bn_type))
+
+        self.fuse = nn.Sequential(
+            nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(out_features, bn_type=bn_type),
+            nn.Dropout2d(dropout),
+        )
+
+    def forward(self, x):
+        _, _, h, w = x.size()
+        feat_gp = F.interpolate(self.conv_gp(x), size=(h, w), mode='bilinear', align_corners=True)
+        feat1 = self.conv_1x1(x)
+        feat2 = self.conv_3x3_1(x)
+        feat3 = self.conv_3x3_2(x)
+        feat4 = self.conv_3x3_3(x)
+        out = torch.cat((feat_gp, feat1, feat2, feat3, feat4), 1)
+        out = self.fuse(out)
+        return out
+
+
+if __name__ == "__main__":
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    custom_bn_type = os.environ.get('bn_type', 'inplace_abn')
+
+    if int(os.environ.get('eval_os_8', 1)):
+        print("Complexity Evaluation Results for ASPP with input shape [2048 X 128 X 128]")
+        feats = torch.randn((1, 2048, 128, 128)).cuda()
+        aspp_infer = ASPPModule(2048, 256, 256, bn_type=custom_bn_type)
+    else:
+        print("Complexity Evaluation Results for ASPP with input shape [720 X 256 X 512]")
+        feats = torch.randn((1, 720, 256, 512)).cuda()
+        aspp_infer = ASPPModule(720, 256, 256, bn_type=custom_bn_type)
+
+    aspp_infer.eval()
+    aspp_infer.cuda()
+
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    avg_time = 0
+    avg_mem  = 0
+    import time
+    with torch.no_grad():
+        for i in range(100):
+            start_time = time.time()
+            outputs = aspp_infer(feats)
+            torch.cuda.synchronize()
+            avg_time += (time.time() - start_time)
+            avg_mem  += (torch.cuda.max_memory_allocated()-feats.element_size() * feats.nelement())
+
+    print("Average Parameters : {}".format(count_parameters(aspp_infer)))
+    print("Average Running Time: {}".format(avg_time/100))
+    print("Average GPU Memory: {:.2f} MB".format(avg_mem / 100 / 2**20))
+    print("\n\n")
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/base_oc_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/base_oc_block.py
new file mode 100644
index 0000000..f6f658e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/base_oc_block.py
@@ -0,0 +1,234 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Modified from: https://github.com/AlexHex7/Non-local_pytorch
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## Ocnet: Object context network for scene parsing
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import sys
+import pdb
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class _SelfAttentionBlock(nn.Module):
+    '''
+    The basic implementation for self-attention block/non-local block
+    Input:
+        N X C X H X W
+    Parameters:
+        in_channels       : the dimension of the input feature map
+        key_channels      : the dimension after the key/query transform
+        value_channels    : the dimension after the value transform
+        scale             : choose the scale to downsample the input feature maps (save memory cost)
+    Return:
+        N X C X H X W
+        position-aware context features.(w/o concate or add with the input)
+    '''
+    def __init__(self, 
+                 in_channels, 
+                 key_channels, 
+                 value_channels, 
+                 out_channels=None, 
+                 scale=1, 
+                 bn_type=None):
+        super(_SelfAttentionBlock, self).__init__()
+        self.scale = scale
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.key_channels = key_channels
+        self.value_channels = value_channels
+        if out_channels == None:
+            self.out_channels = in_channels
+        self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
+        self.f_key = nn.Sequential(
+            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
+                kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels,
+                kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+        self.f_query = nn.Sequential(
+            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
+                kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels,
+                kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+
+        self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels,
+            kernel_size=1, stride=1, padding=0)
+        self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels,
+            kernel_size=1, stride=1, padding=0)
+        nn.init.constant_(self.W.weight, 0)
+        nn.init.constant_(self.W.bias, 0)
+
+    def forward(self, x):
+        batch_size, h, w = x.size(0), x.size(2), x.size(3)
+        if self.scale > 1:
+            x = self.pool(x)   
+        value = self.f_value(x).view(batch_size, self.value_channels, -1)
+        value = value.permute(0, 2, 1)
+        query = self.f_query(x).view(batch_size, self.key_channels, -1)
+        query = query.permute(0, 2, 1)
+        key = self.f_key(x).view(batch_size, self.key_channels, -1)
+
+        sim_map = torch.matmul(query, key)
+        sim_map = (self.key_channels**-.5) * sim_map
+        sim_map = F.softmax(sim_map, dim=-1)
+
+        context = torch.matmul(sim_map, value)
+        context = context.permute(0, 2, 1).contiguous()
+        context = context.view(batch_size, self.value_channels, *x.size()[2:])
+        context = self.W(context)
+        if self.scale > 1:
+            context = F.interpolate(input=context, size=(h, w), mode='bilinear', align_corners=True)
+        return context
+
+
+class SelfAttentionBlock2D(_SelfAttentionBlock):
+    def __init__(self, 
+                 in_channels, 
+                 key_channels, 
+                 value_channels, 
+                 out_channels=None, 
+                 scale=1, 
+                 bn_type=None):
+        super(SelfAttentionBlock2D, self).__init__(in_channels,
+                                                    key_channels,
+                                                    value_channels,
+                                                    out_channels,
+                                                    scale, bn_type)
+
+
+class BaseOC_Module(nn.Module):
+    """
+    Implementation of the BaseOC module
+    Parameters:
+        in_features / out_features: the channels of the input / output feature maps.
+        dropout: we choose 0.05 as the default value.
+        size: you can apply multiple sizes. Here we only use one size.
+    Return:
+        features fused with Object context information.
+    """
+    def __init__(self, 
+                 in_channels, 
+                 out_channels, 
+                 key_channels, 
+                 value_channels, 
+                 dropout, 
+                 sizes=([1]), 
+                 bn_type=None):
+        super(BaseOC_Module, self).__init__()
+        self.stages = []
+        self.stages = nn.ModuleList([self._make_stage(in_channels, in_channels,
+                                                      key_channels, value_channels, size, bn_type) for size in sizes])
+        self.conv_bn_dropout = nn.Sequential(
+            nn.Conv2d(2*in_channels, out_channels, kernel_size=1, padding=0),
+            ModuleHelper.BNReLU(out_channels, bn_type=bn_type),
+            nn.Dropout2d(dropout)
+        )
+
+    def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size, bn_type):
+        return SelfAttentionBlock2D(in_channels,
+                                    key_channels,
+                                    value_channels,
+                                    output_channels, 
+                                    size, 
+                                    bn_type=bn_type)
+        
+    def forward(self, feats):
+        priors = [stage(feats) for stage in self.stages]
+        context = priors[0]
+        for i in range(1, len(priors)):
+            context += priors[i]
+        output = self.conv_bn_dropout(torch.cat([context, feats], 1))
+        return output
+
+
+class BaseOC_Context_Module(nn.Module):
+    """
+    Output only the context features.
+    Parameters:
+        in_features / out_features: the channels of the input / output feature maps.
+        dropout: specify the dropout ratio
+        fusion: We provide two different fusion method, "concat" or "add"
+        size: we find that directly learn the attention weights on even 1/8 feature maps is hard.
+    Return:
+        features after "concat" or "add"
+    """
+    def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout=0, sizes=([1]), bn_type=None):
+        super(BaseOC_Context_Module, self).__init__()
+        self.stages = []
+        self.stages = nn.ModuleList([self._make_stage(in_channels, out_channels,
+                                                      key_channels, value_channels, size, bn_type) for size in sizes])
+        self.conv_bn_dropout = nn.Sequential(
+            ModuleHelper.BNReLU(out_channels, bn_type=bn_type),
+            nn.Dropout2d(dropout),
+            )
+
+    def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size, bn_type):
+        return SelfAttentionBlock2D(in_channels,
+                                    key_channels,
+                                    value_channels,
+                                    output_channels, 
+                                    size, bn_type=bn_type)
+        
+    def forward(self, feats):
+        priors = [stage(feats) for stage in self.stages]
+        context = priors[0]
+        for i in range(1, len(priors)):
+            context += priors[i]
+        output = self.conv_bn_dropout(context)
+        return output
+
+if __name__ == "__main__":
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    feats = torch.randn((1, 2048, 128, 128)).cuda()
+
+    conv_3x3 = nn.Sequential(
+        nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
+        ModuleHelper.BNReLU(512, bn_type='torchsyncbn'),
+    )
+    baseoc_infer = BaseOC_Module(in_channels=512,
+                                 out_channels=512, 
+                                 key_channels=256,
+                                 value_channels=256, 
+                                 sizes=([1]),
+                                 dropout=0, 
+                                 bn_type='torchsyncbn')
+    baseoc_infer.eval()
+    conv_3x3.eval()
+    baseoc_infer.cuda()
+    conv_3x3.cuda()
+    
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    avg_time = 0
+    avg_mem  = 0
+    import time
+    with torch.no_grad():
+        for i in range(100):
+            start_time = time.time()
+            outputs = conv_3x3(feats)
+            outputs = baseoc_infer(outputs)
+            torch.cuda.synchronize()
+            avg_time += (time.time() - start_time)
+            avg_mem  += (torch.cuda.max_memory_allocated()-feats.element_size() * feats.nelement())
+
+    print("Average Parameters : {}".format(count_parameters(baseoc_infer)+count_parameters(conv_3x3)))
+    print("Average Running Time: {}".format(avg_time/100))
+    print("Average GPU Memory: {:.2f} MB".format(avg_mem / 100 / 2**20))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/basic.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/basic.py
new file mode 100644
index 0000000..75d5d82
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/basic.py
@@ -0,0 +1,34 @@
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+
+
+class SeparableConv2d(nn.Module):
+    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, relu_first=True,
+                 bias=False, norm_layer=nn.BatchNorm2d):
+        super().__init__()
+        depthwise = nn.Conv2d(inplanes, inplanes, kernel_size,
+                              stride=stride, padding=dilation,
+                              dilation=dilation, groups=inplanes, bias=bias)
+        bn_depth = norm_layer(inplanes)
+        pointwise = nn.Conv2d(inplanes, planes, 1, bias=bias)
+        bn_point = norm_layer(planes)
+
+        if relu_first:
+            self.block = nn.Sequential(OrderedDict([('relu', nn.ReLU()),
+                                                    ('depthwise', depthwise),
+                                                    ('bn_depth', bn_depth),
+                                                    ('pointwise', pointwise),
+                                                    ('bn_point', bn_point)
+                                                    ]))
+        else:
+            self.block = nn.Sequential(OrderedDict([('depthwise', depthwise),
+                                                    ('bn_depth', bn_depth),
+                                                    ('relu1', nn.ReLU(inplace=True)),
+                                                    ('pointwise', pointwise),
+                                                    ('bn_point', bn_point),
+                                                    ('relu2', nn.ReLU(inplace=True))
+                                                    ]))
+
+    def forward(self, x):
+        return self.block(x)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/contrast.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/contrast.py
new file mode 100644
index 0000000..d8634d4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/contrast.py
@@ -0,0 +1,29 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def momentum_update(old_value, new_value, momentum, debug=False):
+    update = momentum * old_value + (1 - momentum) * new_value
+    if debug:
+        print("old prot: {:.3f} x |{:.3f}|, new val: {:.3f} x |{:.3f}|, result= |{:.3f}|".format(
+            momentum, torch.norm(old_value, p=2), (1 - momentum), torch.norm(new_value, p=2),
+            torch.norm(update, p=2)))
+    return update
+
+
+def l2_normalize(x):
+    return F.normalize(x, p=2, dim=-1)
+
+
+class ProjectionHead(nn.Module):
+    def __init__(self, dim_in, proj_dim=256):
+        super(ProjectionHead, self).__init__()
+
+        self.proj = self.mlp2 = nn.Sequential(
+            nn.Conv2d(dim_in, dim_in, 1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(dim_in, proj_dim, 1))
+
+    def forward(self, x):
+        return l2_normalize(self.proj(x))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/decoder_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/decoder_block.py
new file mode 100644
index 0000000..b84ebd5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/decoder_block.py
@@ -0,0 +1,239 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Jianyuan Guo, Rainbowsecret
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class SEModule(nn.Module):
+    """Squeeze and Extraction module"""
+
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
+                             padding=0)
+        self.relu = nn.ReLU(inplace=False)
+        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
+                             padding=0)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class ASPPModule(nn.Module):
+    """Atrous Spatial Pyramid Pooling module based on DeepLab v3 settings"""
+
+    def __init__(self, in_dim, out_dim, d_rate=[12, 24, 36], bn_type=None):
+        super(ASPPModule, self).__init__()
+        self.b0 = nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=1,
+                                          bias=False),
+                                ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+                                )
+        self.b1 = nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=3,
+                                          padding=d_rate[0],
+                                          dilation=d_rate[0], bias=False),
+                                ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+                                )
+        self.b2 = nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=3,
+                                          padding=d_rate[1],
+                                          dilation=d_rate[1], bias=False),
+                                ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+                                )
+        self.b3 = nn.Sequential(nn.Conv2d(in_dim, out_dim, kernel_size=3,
+                                          padding=d_rate[2],
+                                          dilation=d_rate[2], bias=False),
+                                ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+                                )
+        self.b4 = nn.Sequential(nn.AdaptiveAvgPool2d(1),
+                                nn.Conv2d(in_dim, out_dim, kernel_size=1,
+                                          padding=0, bias=False),
+                                ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+                                )
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_dim, out_dim, kernel_size=3, padding=1,
+                      bias=False),
+            ModuleHelper.BNReLU(out_dim, bn_type=bn_type)
+        )
+
+    def forward(self, x):
+        h, w = x.size()[2:]
+        feat0 = self.b0(x)
+        feat1 = self.b1(x)
+        feat2 = self.b2(x)
+        feat3 = self.b3(x)
+        feat4 = F.interpolate(self.b4(x), size=(h, w), mode='bilinear',
+                              align_corners=True)
+
+        out = torch.cat((feat0, feat1, feat2, feat3, feat4), dim=1)
+        return self.project(out)
+
+class DeepLabHead_MobileNet_V1(nn.Module):
+    """Segmentation head based on DeepLab v3"""
+
+    def __init__(self, num_classes, bn_type=None):
+        super(DeepLabHead_MobileNet_V1, self).__init__()
+        # main pipeline
+        self.layer_aspp = ASPPModule(1024, 512, bn_type=bn_type)
+        self.refine = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3,
+                                              padding=1, stride=1, bias=False),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(512),
+                                    nn.Conv2d(512, num_classes, kernel_size=1,
+                                              stride=1, bias=True))
+
+    def forward(self, x):
+        # aspp module
+        x_aspp = self.layer_aspp(x)
+        # refine module
+        x_seg = self.refine(x_aspp)
+
+        return x_seg
+
+class DeepLabHead_MobileNet_V3(nn.Module):
+    """Segmentation head based on DeepLab v3"""
+
+    def __init__(self, num_classes, bn_type=None):
+        super(DeepLabHead_MobileNet_V3, self).__init__()
+        # main pipeline
+        self.layer_aspp = ASPPModule(960, 512, bn_type=bn_type)
+        self.refine = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3,
+                                              padding=1, stride=1, bias=False),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(512),
+                                    nn.Conv2d(512, num_classes, kernel_size=1,
+                                              stride=1, bias=True))
+
+    def forward(self, x):
+        # aspp module
+        x_aspp = self.layer_aspp(x)
+        # refine module
+        x_seg = self.refine(x_aspp)
+
+        return x_seg
+
+class DeepLabHead_MobileNet(nn.Module):
+    """Segmentation head based on DeepLab v3"""
+
+    def __init__(self, num_classes, bn_type=None):
+        super(DeepLabHead_MobileNet, self).__init__()
+        # main pipeline
+        self.layer_aspp = ASPPModule(1280, 512, bn_type=bn_type)
+        self.refine = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3,
+                                              padding=1, stride=1, bias=False),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(512),
+                                    nn.Conv2d(512, num_classes, kernel_size=1,
+                                              stride=1, bias=True))
+
+    def forward(self, x):
+        # aspp module
+        x_aspp = self.layer_aspp(x)
+        # refine module
+        x_seg = self.refine(x_aspp)
+
+        return x_seg
+
+
+class DeepLabHead(nn.Module):
+    """Segmentation head based on DeepLab v3"""
+
+    def __init__(self, num_classes, bn_type=None):
+        super(DeepLabHead, self).__init__()
+        # auxiliary loss
+        self.layer_dsn = nn.Sequential(nn.Conv2d(1024, 256, kernel_size=3,
+                                                 stride=1, padding=1),
+                                       ModuleHelper.BNReLU(256, bn_type=bn_type),
+                                       nn.Conv2d(256, num_classes,
+                                                 kernel_size=1, stride=1,
+                                                 padding=0, bias=True))
+        # main pipeline
+        self.layer_aspp = ASPPModule(2048, 512, bn_type=bn_type)
+        self.refine = nn.Sequential(nn.Conv2d(512, 512, kernel_size=3,
+                                              padding=1, stride=1, bias=False),
+                                    ModuleHelper.BatchNorm2d(bn_type=bn_type)(512),
+                                    nn.Conv2d(512, num_classes, kernel_size=1,
+                                              stride=1, bias=True))
+
+    def forward(self, x):
+        # auxiliary supervision
+        x_dsn = self.layer_dsn(x[2])
+        # aspp module
+        x_aspp = self.layer_aspp(x[3])
+        # refine module
+        x_seg = self.refine(x_aspp)
+
+        return [x_seg, x_dsn]
+
+
+class Decoder_Module(nn.Module):
+
+    def __init__(self, bn_type=None, inplane1=512, inplane2=256, outplane=128):
+        super(Decoder_Module, self).__init__()
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(inplane1, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=bn_type),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(inplane2, 48, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(48, bn_type=bn_type),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(304, outplane, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(outplane, bn_type=bn_type),
+            nn.Conv2d(outplane, outplane, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(outplane, bn_type=bn_type),
+        )
+
+    def forward(self, xt, xl):
+        _, _, h, w = xl.size()
+        xt = F.interpolate(xt, size=(h, w), mode='bilinear', align_corners=True)
+        xl = self.conv2(xl)
+        x = torch.cat([xt, xl], dim=1)
+        x = self.conv3(x)
+        return x
+
+
+class CE2P_Decoder_Module(nn.Module):
+
+    def __init__(self, num_classes, dropout=0, bn_type=None, inplane1=512, inplane2=256):
+        super(CE2P_Decoder_Module, self).__init__()
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(inplane1, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=bn_type),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(inplane2, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(48, bn_type=bn_type),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=bn_type),
+            nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=bn_type),
+            nn.Dropout2d(dropout),
+        )
+
+        self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+
+    def forward(self, xt, xl):
+        _, _, h, w = xl.size()
+        xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
+        xl = self.conv2(xl)
+        x = torch.cat([xt, xl], dim=1)
+        x = self.conv3(x)
+        seg = self.conv4(x)
+        return seg, x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/edge_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/edge_block.py
new file mode 100644
index 0000000..b3fcf01
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/edge_block.py
@@ -0,0 +1,56 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Reproduce model writed by RainbowSecret
+## Created by: Jianyuan Guo
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from lib.models.tools.module_helper import ModuleHelper
+
+   
+class Edge_Module(nn.Module):
+    def __init__(self, mid_fea, out_fea, bn_type=None, factor=1):
+        super(Edge_Module, self).__init__()
+        
+        self.conv1 =  nn.Sequential(
+            nn.Conv2d(factor*256, mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(mid_fea, bn_type=bn_type),
+            ) 
+        self.conv2 =  nn.Sequential(
+            nn.Conv2d(factor*512, mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(mid_fea, bn_type=bn_type),
+            )  
+        self.conv3 =  nn.Sequential(
+            nn.Conv2d(factor*1024, mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(mid_fea, bn_type=bn_type),
+            )
+        
+        self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
+        self.conv5 = nn.Conv2d(out_fea*3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
+        
+    def forward(self, x1, x2, x3):
+        _, _, h, w = x1.size()
+        
+        edge1_fea = self.conv1(x1)
+        edge1 = self.conv4(edge1_fea)
+        edge2_fea = self.conv2(x2)
+        edge2 = self.conv4(edge2_fea)
+        edge3_fea = self.conv3(x3)
+        edge3 = self.conv4(edge3_fea)        
+        
+        edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
+        edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)         
+        edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
+        edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
+
+        edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
+        edge = torch.cat([edge1, edge2, edge3], dim=1)
+        edge = self.conv5(edge)
+         
+        return edge, edge_fea
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/hanet_attention.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/hanet_attention.py
new file mode 100644
index 0000000..1a38e7a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/hanet_attention.py
@@ -0,0 +1,135 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from models.protoseg_core.lib.models.modules.pos_embedding import PosEmbedding1D, PosEncoding1D
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+def Upsample(x, size):
+    """
+    Wrapper Around the Upsample Call
+    """
+    return nn.functional.interpolate(x, size=size, mode='bilinear',
+                                     align_corners=True)
+
+
+class HANet_Conv(nn.Module):
+
+    def __init__(self, in_channel, out_channel, kernel_size=3, r_factor=64, layer=3, pos_injection=2, is_encoding=1,
+                 pos_rfactor=8, pooling='mean', dropout_prob=0.0, pos_noise=0.0, bn_type=None):
+        super(HANet_Conv, self).__init__()
+
+        self.pooling = pooling
+        self.pos_injection = pos_injection
+        self.layer = layer
+        self.dropout_prob = dropout_prob
+        self.sigmoid = nn.Sigmoid()
+
+        if r_factor > 0:
+            mid_1_channel = math.ceil(in_channel / r_factor)
+        elif r_factor < 0:
+            r_factor = r_factor * -1
+            mid_1_channel = in_channel * r_factor
+
+        if self.dropout_prob > 0:
+            self.dropout = nn.Dropout2d(self.dropout_prob)
+
+        self.attention_first = nn.Sequential(
+            nn.Conv1d(in_channels=in_channel, out_channels=mid_1_channel,
+                      kernel_size=1, stride=1, padding=0, bias=False),
+            ModuleHelper.BNReLU(mid_1_channel, bn_type=bn_type),
+            nn.ReLU(inplace=True))
+
+        if layer == 2:
+            self.attention_second = nn.Sequential(
+                nn.Conv1d(in_channels=mid_1_channel, out_channels=out_channel,
+                          kernel_size=kernel_size, stride=1, padding=kernel_size // 2, bias=True))
+        elif layer == 3:
+            mid_2_channel = (mid_1_channel * 2)
+            self.attention_second = nn.Sequential(
+                nn.Conv1d(in_channels=mid_1_channel, out_channels=mid_2_channel,
+                          kernel_size=3, stride=1, padding=1, bias=True),
+                ModuleHelper.BNReLU(mid_2_channel, bn_type=bn_type),
+                nn.ReLU(inplace=True))
+            self.attention_third = nn.Sequential(
+                nn.Conv1d(in_channels=mid_2_channel, out_channels=out_channel,
+                          kernel_size=kernel_size, stride=1, padding=kernel_size // 2, bias=True))
+
+        if self.pooling == 'mean':
+            # print("##### average pooling")
+            self.rowpool = nn.AdaptiveAvgPool2d((128 // pos_rfactor, 1))
+        else:
+            # print("##### max pooling")
+            self.rowpool = nn.AdaptiveMaxPool2d((128 // pos_rfactor, 1))
+
+        if pos_rfactor > 0:
+            if is_encoding == 0:
+                if self.pos_injection == 1:
+                    self.pos_emb1d_1st = PosEmbedding1D(pos_rfactor, dim=in_channel, pos_noise=pos_noise)
+                elif self.pos_injection == 2:
+                    self.pos_emb1d_2nd = PosEmbedding1D(pos_rfactor, dim=mid_1_channel, pos_noise=pos_noise)
+            elif is_encoding == 1:
+                if self.pos_injection == 1:
+                    self.pos_emb1d_1st = PosEncoding1D(pos_rfactor, dim=in_channel, pos_noise=pos_noise)
+                elif self.pos_injection == 2:
+                    self.pos_emb1d_2nd = PosEncoding1D(pos_rfactor, dim=mid_1_channel, pos_noise=pos_noise)
+            else:
+                print("Not supported position encoding")
+                exit()
+
+    def forward(self, x, out, pos=None, return_attention=False, return_posmap=False, attention_loss=False):
+        """
+            inputs :
+                x : input feature maps( B X C X W X H)
+            returns :
+                out : self attention value + input feature
+                attention: B X N X N (N is Width*Height)
+        """
+        H = out.size(2)
+        x1d = self.rowpool(x).squeeze(3)
+
+        if pos is not None and self.pos_injection == 1:
+            if return_posmap:
+                x1d, pos_map1 = self.pos_emb1d_1st(x1d, pos, True)
+            else:
+                x1d = self.pos_emb1d_1st(x1d, pos)
+
+        if self.dropout_prob > 0:
+            x1d = self.dropout(x1d)
+        x1d = self.attention_first(x1d)
+
+        if pos is not None and self.pos_injection == 2:
+            if return_posmap:
+                x1d, pos_map2 = self.pos_emb1d_2nd(x1d, pos, True)
+            else:
+                x1d = self.pos_emb1d_2nd(x1d, pos)
+
+        x1d = self.attention_second(x1d)
+
+        if self.layer == 3:
+            x1d = self.attention_third(x1d)
+            if attention_loss:
+                last_attention = x1d
+            x1d = self.sigmoid(x1d)
+        else:
+            if attention_loss:
+                last_attention = x1d
+            x1d = self.sigmoid(x1d)
+
+        x1d = F.interpolate(x1d, size=H, mode='linear')
+        out = torch.mul(out, x1d.unsqueeze(3))
+
+        if return_attention:
+            if return_posmap:
+                if self.pos_injection == 1:
+                    pos_map = (pos_map1)
+                elif self.pos_injection == 2:
+                    pos_map = (pos_map2)
+                return out, x1d, pos_map
+            else:
+                return out, x1d
+        else:
+            if attention_loss:
+                return out, last_attention
+            else:
+                return out
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/infer_time.sh b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/infer_time.sh
new file mode 100644
index 0000000..901790c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/infer_time.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+
+export PYTHONPATH="../../../":$PYTHONPATH
+export eval_os_8=1
+export bn_type="inplace_abn"
+
+# ${PYTHON} -m lib.models.modules.psp_block
+# ${PYTHON} -m lib.models.modules.aspp_block
+# ${PYTHON} -m lib.models.modules.base_oc_block
+export isa_type="base_oc"
+${PYTHON} -m lib.models.modules.isa_block
+${PYTHON} -m lib.models.modules.spatial_ocr_block
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/isa_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/isa_block.py
new file mode 100644
index 0000000..e28c7c2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/isa_block.py
@@ -0,0 +1,188 @@
+import torch
+import math
+from torch import nn
+from torch.nn import functional as F
+import numpy as np
+
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class SelfAttentionBlock2D(nn.Module):
+    '''
+    The basic implementation for self-attention block/non-local block
+    Input:
+        N X C X H X W
+    Parameters:
+        in_channels       : the dimension of the input feature map
+        key_channels      : the dimension after the key/query transform
+        value_channels    : the dimension after the value transform
+        scale             : choose the scale to downsample the input feature maps (save memory cost)
+    Return:
+        N X C X H X W
+        position-aware context features.(w/o concate or add with the input)
+    '''
+    def __init__(self, in_channels, key_channels, value_channels, out_channels=None, bn_type=None):
+        super(SelfAttentionBlock2D, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.key_channels = key_channels
+        self.value_channels = value_channels
+        if out_channels == None:
+            self.out_channels = in_channels
+        self.f_key = nn.Sequential(
+            nn.Conv2d(self.in_channels, self.key_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(self.key_channels, self.key_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+        self.f_query = nn.Sequential(
+            nn.Conv2d(self.in_channels, self.key_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(self.key_channels, self.key_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+
+        self.f_value = nn.Conv2d(self.in_channels, self.value_channels, kernel_size=1, bias=False)
+        self.W = nn.Sequential(
+            nn.Conv2d(self.value_channels, self.out_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(self.out_channels, bn_type=bn_type)
+        )
+            
+         
+
+    def forward(self, x):
+        batch_size, h, w = x.size(0), x.size(2), x.size(3)
+
+        value = self.f_value(x).view(batch_size, self.value_channels, -1)
+        value = value.permute(0, 2, 1)
+        query = self.f_query(x).view(batch_size, self.key_channels, -1)
+        query = query.permute(0, 2, 1)
+        key = self.f_key(x).view(batch_size, self.key_channels, -1)
+
+        sim_map = torch.matmul(query, key)
+        sim_map = (self.key_channels**-.5) * sim_map
+        sim_map = F.softmax(sim_map, dim=-1)
+
+        context = torch.matmul(sim_map, value)
+        context = context.permute(0, 2, 1).contiguous()
+        context = context.view(batch_size, self.value_channels, h, w)
+        context = self.W(context)
+        return context
+
+
+class ISA_Block(nn.Module):
+    def __init__(self, in_channels, key_channels, value_channels, out_channels, down_factor=[8,8], bn_type=None):
+        super(ISA_Block, self).__init__()
+        self.out_channels = out_channels
+        assert isinstance(down_factor, (tuple, list)) and len(down_factor) == 2
+        self.down_factor = down_factor
+        self.long_range_sa = SelfAttentionBlock2D(in_channels, key_channels, value_channels, out_channels, bn_type=bn_type)
+        self.short_range_sa = SelfAttentionBlock2D(out_channels, key_channels, value_channels, out_channels, bn_type=bn_type)
+    
+    def forward(self, x):
+        n, c, h, w = x.size()
+        dh, dw = self.down_factor       # down_factor for h and w, respectively
+        
+        out_h, out_w = math.ceil(h / dh), math.ceil(w / dw)
+        # pad the feature if the size is not divisible
+        pad_h, pad_w = out_h * dh - h, out_w * dw - w
+        if pad_h > 0 or pad_w > 0:  # padding in both left&right sides
+            feats = F.pad(x, (pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2))
+        else:
+            feats = x
+        
+        # long range attention
+        feats = feats.view(n, c, out_h, dh, out_w, dw)
+        feats = feats.permute(0, 3, 5, 1, 2, 4).contiguous().view(-1, c, out_h, out_w)
+        feats = self.long_range_sa(feats)
+        c = self.out_channels
+
+        # short range attention
+        feats = feats.view(n, dh, dw, c, out_h, out_w)
+        feats = feats.permute(0, 4, 5, 3, 1, 2).contiguous().view(-1, c, dh, dw)
+        feats = self.short_range_sa(feats)
+        feats = feats.view(n, out_h, out_w, c, dh, dw).permute(0, 3, 1, 4, 2, 5)
+        feats = feats.contiguous().view(n, c, dh * out_h, dw * out_w)
+
+        # remove padding
+        if pad_h > 0 or pad_w > 0:
+            feats = feats[:, :, pad_h//2:pad_h//2 + h, pad_w//2:pad_w//2 + w]
+        
+        return feats
+
+
+class ISA_Module(nn.Module):
+    def __init__(self, in_channels, key_channels, value_channels, out_channels, down_factors=[[8,8]], dropout=0, bn_type=None):
+        super(ISA_Module, self).__init__()
+
+        assert isinstance(down_factors, (tuple, list))
+        self.down_factors = down_factors
+
+        self.stages = nn.ModuleList([
+            ISA_Block(in_channels, key_channels, value_channels, out_channels, d, bn_type) for d in down_factors
+        ])
+
+        concat_channels = in_channels + out_channels
+        if len(self.down_factors) > 1:
+            self.up_conv = nn.Sequential(
+                nn.Conv2d(in_channels, len(self.down_factors) * out_channels, kernel_size=1, padding=0, bias=False),
+                ModuleHelper.BNReLU(len(self.down_factors) * out_channels, bn_type=bn_type),
+            )
+            concat_channels = out_channels * len(self.down_factors) * 2
+        
+        self.conv_bn = nn.Sequential(
+            nn.Conv2d(concat_channels, out_channels, kernel_size=1, bias=False),
+            ModuleHelper.BNReLU(out_channels, bn_type=bn_type),
+            nn.Dropout2d(dropout),
+        )
+    
+    def forward(self, x):
+        priors = [stage(x) for stage in self.stages]
+        if len(self.down_factors) == 1:
+            context = priors[0]
+        else:
+            context = torch.cat(priors, dim=1)
+            x = self.up_conv(x)
+        # residual connection
+        return self.conv_bn(torch.cat([x, context], dim=1))
+
+
+if __name__ == "__main__":
+    import os
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    feats = torch.randn((1, 2048, 128, 128)).cuda()
+    mem = torch.cuda.max_memory_allocated()
+    conv_3x3 = nn.Sequential(
+        nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
+        ModuleHelper.BNReLU(512, bn_type='torchsyncbn'),
+    )
+    baseoc_infer = ISA_Module(in_channels=512,
+                                key_channels=256,
+                                value_channels=512,
+                                out_channels=512,
+                                dropout=0,
+                                bn_type='torchsyncbn')
+    baseoc_infer.eval()
+    baseoc_infer.cuda()
+    conv_3x3.eval()
+    conv_3x3.cuda()
+
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    avg_time = 0
+    avg_mem  = 0
+    import time
+    with torch.no_grad():
+        for i in range(110):
+            start_time = time.time()
+            outputs = conv_3x3(feats)
+            outputs = baseoc_infer(outputs)
+            torch.cuda.synchronize()
+            if i >= 10:
+                avg_time += (time.time() - start_time)
+                avg_mem  += (torch.cuda.max_memory_allocated()-feats.element_size() * feats.nelement())
+
+    print("Average Parameters : {}".format(count_parameters(baseoc_infer)))
+    print("Average Running Time: {}".format(avg_time/100))
+    print("Average GPU Memory: {:.2f} MB".format(avg_mem / 100 / 2**20))
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/norm.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/norm.py
new file mode 100644
index 0000000..616373c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/norm.py
@@ -0,0 +1,60 @@
+import torch
+import math
+import warnings
+
+
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+
+
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # type: (Tensor, float, float, float, float) -> Tensor
+    r"""Fills the input Tensor with values drawn from a truncated
+    normal distribution. The values are effectively drawn from the
+    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+    with values outside :math:`[a, b]` redrawn until they are within
+    the bounds. The method used for generating the random values works
+    best when :math:`a \leq \text{mean} \leq b`.
+    Args:
+        tensor: an n-dimensional `torch.Tensor`
+        mean: the mean of the normal distribution
+        std: the standard deviation of the normal distribution
+        a: the minimum cutoff value
+        b: the maximum cutoff value
+    Examples:
+        >>> w = torch.empty(3, 5)
+        >>> nn.init.trunc_normal_(w)
+    """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/offset_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/offset_block.py
new file mode 100644
index 0000000..ef194eb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/offset_block.py
@@ -0,0 +1,53 @@
+import torch
+import math
+import pdb
+from torch import nn
+from torch.nn import functional as F
+import numpy as np
+
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class OffsetBlock(nn.Module):
+    '''
+    This module takes relative offset as input and outputs feature at each position (coordinate + offset)
+    '''
+    def __init__(self):
+        super(OffsetBlock, self).__init__()
+        self.coord_map = None
+        self.norm_factor = None
+    
+    def _gen_coord_map(self, H, W):
+        coord_vecs = [torch.arange(length, dtype=torch.float).cuda() for length in (H, W)]
+        coord_h, coord_w = torch.meshgrid(coord_vecs)
+        return coord_h, coord_w
+    
+    def forward(self, x, offset_map):
+        n, c, h, w = x.size()
+        
+        if self.coord_map is None or self.coord_map[0].size() != offset_map.size()[2:]:
+            self.coord_map = self._gen_coord_map(h, w)
+            self.norm_factor = torch.cuda.FloatTensor([(w-1) / 2, (h-1) / 2])
+        
+        # offset to absolute coordinate
+        grid_h = offset_map[:, 0] + self.coord_map[0]                               # (N, H, W)
+        grid_w = offset_map[:, 1] + self.coord_map[1]                               # (N, H, W)
+
+        # scale to [-1, 1], order of grid: [x, y] (i.e., [w, h])
+        grid = torch.stack([grid_w, grid_h], dim=-1) / self.norm_factor - 1.        # (N, H, W, 2)
+
+        # use grid to obtain output feature
+        feats = F.grid_sample(x, grid, padding_mode='border')                       # (N, C, H, W)
+        
+        return feats
+
+
+class OffsetModule(nn.Module):
+    def __init__(self):
+        super(OffsetModule, self).__init__()
+        self.offset_block = OffsetBlock()
+    
+    def forward(self, x, offset):
+        # sample
+        x_out = self.offset_block(x, offset)
+        return x_out
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/pos_embedding.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/pos_embedding.py
new file mode 100644
index 0000000..f3e99a9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/pos_embedding.py
@@ -0,0 +1,143 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+def initialize_embedding(*models):
+    """
+    Initialize Model Weights
+    """
+    for model in models:
+        for module in model.modules():
+            if isinstance(module, nn.Embedding):
+                module.weight.data.zero_()  # original
+
+
+def Upsample(x, size):
+    """
+    Wrapper Around the Upsample Call
+    """
+    return nn.functional.interpolate(x, size=size, mode='bilinear',
+                                     align_corners=True)
+
+
+def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):
+    ''' Sinusoid position encoding table '''
+
+    def cal_angle(position, hid_idx):
+        if d_hid > 50:
+            cycle = 10
+        elif d_hid > 5:
+            cycle = 100
+        else:
+            cycle = 10000
+        cycle = 10 if d_hid > 50 else 100
+        return position / np.power(cycle, 2 * (hid_idx // 2) / d_hid)
+
+    def get_posi_angle_vec(position):
+        return [cal_angle(position, hid_j) for hid_j in range(d_hid)]
+
+    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
+    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
+    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
+    if padding_idx is not None:
+        # zero vector for padding dimension
+        sinusoid_table[padding_idx] = 0.
+    return torch.FloatTensor(sinusoid_table)
+
+
+class PosEmbedding2D(nn.Module):
+
+    def __init__(self, pos_rfactor, dim):
+        super(PosEmbedding2D, self).__init__()
+
+        self.pos_layer_h = nn.Embedding((128 // pos_rfactor) + 1, dim)
+        self.pos_layer_w = nn.Embedding((128 // pos_rfactor) + 1, dim)
+        initialize_embedding(self.pos_layer_h)
+        initialize_embedding(self.pos_layer_w)
+
+    def forward(self, x, pos):
+        pos_h, pos_w = pos
+        pos_h = pos_h.unsqueeze(1)
+        pos_w = pos_w.unsqueeze(1)
+        pos_h = nn.functional.interpolate(pos_h.float(), size=x.shape[2:], mode='nearest').long()  # B X 1 X H X W
+        pos_w = nn.functional.interpolate(pos_w.float(), size=x.shape[2:], mode='nearest').long()  # B X 1 X H X W
+        pos_h = self.pos_layer_h(pos_h).transpose(1, 4).squeeze(4)  # B X 1 X H X W X C
+        pos_w = self.pos_layer_w(pos_w).transpose(1, 4).squeeze(4)  # B X 1 X H X W X C
+        x = x + pos_h + pos_w
+        return x
+
+
+class PosEncoding1D(nn.Module):
+
+    def __init__(self, pos_rfactor, dim, pos_noise=0.0):
+        super(PosEncoding1D, self).__init__()
+        print("use PosEncoding1D")
+        self.sel_index = torch.tensor([0]).cuda()
+        pos_enc = (get_sinusoid_encoding_table((128 // pos_rfactor) + 1, dim) + 1)
+        self.pos_layer = nn.Embedding.from_pretrained(embeddings=pos_enc, freeze=True)
+        self.pos_noise = pos_noise
+        self.noise_clamp = 16 // pos_rfactor  # 4: 4, 8: 2, 16: 1
+
+        self.pos_rfactor = pos_rfactor
+        if pos_noise > 0.0:
+            self.min = 0.0  # torch.tensor([0]).cuda()
+            self.max = 128 // pos_rfactor  # torch.tensor([128//pos_rfactor]).cuda()
+            self.noise = torch.distributions.normal.Normal(torch.tensor([0.0]), torch.tensor([pos_noise]))
+
+    def forward(self, x, pos, return_posmap=False):
+        pos_h, _ = pos  # B X H X W
+        pos_h = pos_h // self.pos_rfactor
+        pos_h = pos_h.index_select(2, self.sel_index).unsqueeze(1).squeeze(3)  # B X 1 X H
+        pos_h = nn.functional.interpolate(pos_h.float(), size=x.shape[2], mode='nearest').long()  # B X 1 X 48
+
+        if self.training is True and self.pos_noise > 0.0:
+            # pos_h = pos_h + (self.noise.sample(pos_h.shape).squeeze(3).cuda()//1).long()
+            pos_h = pos_h + torch.clamp((self.noise.sample(pos_h.shape).squeeze(3).cuda() // 1).long(),
+                                        min=-self.noise_clamp, max=self.noise_clamp)
+            pos_h = torch.clamp(pos_h, min=self.min, max=self.max)
+            # pos_h = torch.where(pos_h < self.min_tensor, self.min_tensor, pos_h)
+            # pos_h = torch.where(pos_h > self.max_tensor, self.max_tensor, pos_h)
+
+        pos_h = self.pos_layer(pos_h).transpose(1, 3).squeeze(3)  # B X 1 X 48 X 80 > B X 80 X 48 X 1
+        x = x + pos_h
+        if return_posmap:
+            return x, self.pos_layer.weight  # 33 X 80
+        return x
+
+
+class PosEmbedding1D(nn.Module):
+
+    def __init__(self, pos_rfactor, dim, pos_noise=0.0):
+        super(PosEmbedding1D, self).__init__()
+        print("use PosEmbedding1D")
+        self.sel_index = torch.tensor([0]).cuda()
+        self.pos_layer = nn.Embedding((128 // pos_rfactor) + 1, dim)
+        initialize_embedding(self.pos_layer)
+        self.pos_noise = pos_noise
+        self.pos_rfactor = pos_rfactor
+        self.noise_clamp = 16 // pos_rfactor  # 4: 4, 8: 2, 16: 1
+
+        if pos_noise > 0.0:
+            self.min = 0.0  # torch.tensor([0]).cuda()
+            self.max = 128 // pos_rfactor  # torch.tensor([128//pos_rfactor]).cuda()
+            self.noise = torch.distributions.normal.Normal(torch.tensor([0.0]), torch.tensor([pos_noise]))
+
+    def forward(self, x, pos, return_posmap=False):
+        pos_h, _ = pos  # B X H X W
+        pos_h = pos_h // self.pos_rfactor
+        pos_h = pos_h.index_select(2, self.sel_index).unsqueeze(1).squeeze(3)  # B X 1 X H
+        pos_h = nn.functional.interpolate(pos_h.float(), size=x.shape[2], mode='nearest').long()  # B X 1 X 48
+
+        if self.training is True and self.pos_noise > 0.0:
+            # pos_h = pos_h + (self.noise.sample(pos_h.shape).squeeze(3).cuda()//1).long()
+            pos_h = pos_h + torch.clamp((self.noise.sample(pos_h.shape).squeeze(3).cuda() // 1).long(),
+                                        min=-self.noise_clamp, max=self.noise_clamp)
+            pos_h = torch.clamp(pos_h, min=self.min, max=self.max)
+
+        pos_h = self.pos_layer(pos_h).transpose(1, 3).squeeze(3)  # B X 1 X 48 X 80 > B X 80 X 48 X 1
+        x = x + pos_h
+        if return_posmap:
+            return x, self.pos_layer.weight  # 33 X 80
+        return x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/projection.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/projection.py
new file mode 100644
index 0000000..94ba558
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/projection.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class ProjectionHead(nn.Module):
+    def __init__(self, dim_in, proj_dim=256, proj='convmlp', bn_type='torchsyncbn'):
+        super(ProjectionHead, self).__init__()
+
+        Log.info('proj_dim: {}'.format(proj_dim))
+
+        if proj == 'linear':
+            self.proj = nn.Conv2d(dim_in, proj_dim, kernel_size=1)
+        elif proj == 'convmlp':
+            self.proj = nn.Sequential(
+                nn.Conv2d(dim_in, dim_in, kernel_size=1),
+                ModuleHelper.BNReLU(dim_in, bn_type=bn_type),
+                nn.Conv2d(dim_in, proj_dim, kernel_size=1)
+            )
+
+    def forward(self, x):
+        return F.normalize(self.proj(x), p=2, dim=1)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/psp_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/psp_block.py
new file mode 100644
index 0000000..6a88ef6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/psp_block.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: speedinghzl
+# deeplabv3 res101 (synchronized BN version)
+
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from lib.models.backbones.backbone_selector import BackboneSelector
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class PSPModule(nn.Module):
+    """
+    Reference: 
+        Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
+    """
+    def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6), bn_type=None):
+        super(PSPModule, self).__init__()
+
+        self.stages = []
+        self.stages = nn.ModuleList([self._make_stage(features, out_features, size, bn_type) for size in sizes])
+        self.bottleneck = nn.Sequential(
+            nn.Conv2d(features+len(sizes)*out_features, out_features, kernel_size=3, padding=1, dilation=1, bias=False),
+            ModuleHelper.BNReLU(out_features, bn_type=bn_type),
+            nn.Dropout2d(0.1)
+            )
+
+    def _make_stage(self, features, out_features, size, bn_type):
+        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
+        bn = ModuleHelper.BNReLU(out_features, bn_type=bn_type)
+        return nn.Sequential(prior, conv, bn)
+
+    def forward(self, feats):
+        h, w = feats.size(2), feats.size(3)
+        priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats]
+        bottle = self.bottleneck(torch.cat(priors, 1))
+        return bottle
+
+
+if __name__ == "__main__":
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    custom_bn_type = os.environ.get('bn_type', 'inplace_abn')
+
+    if int(os.environ.get('eval_os_8', 1)):
+        print("Complexity Evaluation Results for PPM with input shape [2048 X 128 X 128]")
+        feats = torch.randn((1, 2048, 128, 128)).cuda()
+        psp_infer = PSPModule(2048, bn_type=custom_bn_type)
+    else:
+        print("Complexity Evaluation Results for PPM with input shape [720 X 256 X 512]")
+        feats = torch.randn((1, 720, 256, 512)).cuda()
+        psp_infer = PSPModule(720, bn_type=custom_bn_type)
+
+    psp_infer.eval()
+    psp_infer.cuda()
+
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    avg_time = 0
+    avg_mem  = 0
+    import time
+    with torch.no_grad():
+        for i in range(100):
+            start_time = time.time()
+            outputs = psp_infer(feats)
+            torch.cuda.synchronize()
+            avg_time += (time.time() - start_time)
+            avg_mem  += (torch.cuda.max_memory_allocated())
+
+    print("Average Parameters : {}".format(count_parameters(psp_infer)))
+    print("Average Running Time: {}".format(avg_time/100))
+    print("Average GPU Memory: {:.2f} MB".format(avg_mem / 100 / 2**20))
+    print("\n\n")
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/seg_basic.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/seg_basic.py
new file mode 100644
index 0000000..d9bb153
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/seg_basic.py
@@ -0,0 +1,17 @@
+import torch.nn as nn
+from lib.models.tools.module_helper import ModuleHelper
+
+
+class _FCNHead(nn.Module):
+    def __init__(self, in_channels, channels):
+        super(_FCNHead, self).__init__()
+        inter_channels = in_channels // 4
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
+            ModuleHelper.BNReLU(inter_channels, bn_type='torchsyncbn'),
+            nn.Dropout(0.1),
+            nn.Conv2d(inter_channels, channels, 1)
+        )
+
+    def forward(self, x):
+        return self.block(x)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/sinkhorn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/sinkhorn.py
new file mode 100644
index 0000000..601f9c2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/sinkhorn.py
@@ -0,0 +1,85 @@
+import torch
+import torch.nn.functional as F
+
+
+def distributed_sinkhorn(out, sinkhorn_iterations=3, epsilon=0.05):
+    L = torch.exp(out / epsilon).t() # K x B
+    B = L.shape[1]
+    K = L.shape[0]
+
+    # make the matrix sums to 1
+    sum_L = torch.sum(L)
+    L /= sum_L
+
+    for _ in range(sinkhorn_iterations):
+        L /= torch.sum(L, dim=1, keepdim=True)
+        L /= K
+
+        L /= torch.sum(L, dim=0, keepdim=True)
+        L /= B
+
+    L *= B
+    L = L.t()
+
+    indexs = torch.argmax(L, dim=1)
+    # L = torch.nn.functional.one_hot(indexs, num_classes=L.shape[1]).float()
+    L = F.gumbel_softmax(L, tau=0.5, hard=True)
+
+    return L, indexs
+
+
+def distributed_greenkhorn(out, sinkhorn_iterations=100, epsilon=0.05):
+    L = torch.exp(out / epsilon).t()
+    K = L.shape[0]
+    B = L.shape[1]
+
+    # make the matrix sums to 1
+    sum_L = torch.sum(L)
+    L /= sum_L
+
+    r = torch.ones((K,), dtype=L.dtype).to(L.device) / K
+    c = torch.ones((B,), dtype=L.dtype).to(L.device) / B
+
+    r_sum = torch.sum(L, axis=1)
+    c_sum = torch.sum(L, axis=0)
+
+    r_gain = r_sum - r + r * torch.log(r / r_sum + 1e-5)
+    c_gain = c_sum - c + c * torch.log(c / c_sum + 1e-5)
+
+    for _ in range(sinkhorn_iterations):
+        i = torch.argmax(r_gain)
+        j = torch.argmax(c_gain)
+        r_gain_max = r_gain[i]
+        c_gain_max = c_gain[j]
+
+        if r_gain_max > c_gain_max:
+            scaling = r[i] / r_sum[i]
+            old_row = L[i, :]
+            new_row = old_row * scaling
+            L[i, :] = new_row
+
+            L = L / torch.sum(L)
+            r_sum = torch.sum(L, axis=1)
+            c_sum = torch.sum(L, axis=0)
+
+            r_gain = r_sum - r + r * torch.log(r / r_sum + 1e-5)
+            c_gain = c_sum - c + c * torch.log(c / c_sum + 1e-5)
+        else:
+            scaling = c[j] / c_sum[j]
+            old_col = L[:, j]
+            new_col = old_col * scaling
+            L[:, j] = new_col
+
+            L = L / torch.sum(L)
+            r_sum = torch.sum(L, axis=1)
+            c_sum = torch.sum(L, axis=0)
+
+            r_gain = r_sum - r + r * torch.log(r / r_sum + 1e-5)
+            c_gain = c_sum - c + c * torch.log(c / c_sum + 1e-5)
+
+    L = L.t()
+
+    indexs = torch.argmax(L, dim=1)
+    G = F.gumbel_softmax(L, tau=0.5, hard=True)
+
+    return L, indexs
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/spatial_ocr_block.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/spatial_ocr_block.py
new file mode 100644
index 0000000..9c8ee3b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/spatial_ocr_block.py
@@ -0,0 +1,439 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import pdb
+import math
+import torch
+from torch import nn
+from torch.autograd import Variable
+from torch.nn import functional as F
+
+from lib.models.tools.module_helper import ModuleHelper
+
+
+def label_to_onehot(gt, num_classes, ignore_index=-1):
+    '''
+    gt: ground truth with size (N, H, W)
+    num_classes: the number of classes of different label
+    '''
+    N, H, W = gt.size()
+    x = gt
+    x[x == ignore_index] = num_classes
+    # convert label into onehot format
+    onehot = torch.zeros(N, x.size(1), x.size(2), num_classes + 1).cuda()
+    onehot = onehot.scatter_(-1, x.unsqueeze(-1), 1)
+
+    return onehot.permute(0, 3, 1, 2)
+
+
+class SpatialGather_Module(nn.Module):
+    """
+        Aggregate the context features according to the initial predicted probability distribution.
+        Employ the soft-weighted method to aggregate the context.
+    """
+
+    def __init__(self, cls_num=0, scale=1, use_gt=False):
+        super(SpatialGather_Module, self).__init__()
+        self.cls_num = cls_num
+        self.scale = scale
+        self.use_gt = use_gt
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, feats, probs, gt_probs=None):
+        if self.use_gt and gt_probs is not None:
+            gt_probs = label_to_onehot(gt_probs.squeeze(1).type(torch.cuda.LongTensor), probs.size(1))
+            batch_size, c, h, w = gt_probs.size(0), gt_probs.size(1), gt_probs.size(2), gt_probs.size(3)
+            gt_probs = gt_probs.view(batch_size, c, -1)
+            feats = feats.view(batch_size, feats.size(1), -1)
+            feats = feats.permute(0, 2, 1)  # batch x hw x c
+            gt_probs = F.normalize(gt_probs, p=1, dim=2)  # batch x k x hw
+            ocr_context = torch.matmul(gt_probs, feats).permute(0, 2, 1).unsqueeze(3)  # batch x k x c
+            return ocr_context
+        else:
+            batch_size, c, h, w = probs.size(0), probs.size(1), probs.size(2), probs.size(3)
+            probs = probs.view(batch_size, c, -1)
+            feats = feats.view(batch_size, feats.size(1), -1)
+            feats = feats.permute(0, 2, 1)  # batch x hw x c
+            probs = F.softmax(self.scale * probs, dim=2)  # batch x k x hw
+            ocr_context = torch.matmul(probs, feats).permute(0, 2, 1).unsqueeze(3)  # batch x k x c
+            return ocr_context
+
+
+class PyramidSpatialGather_Module(nn.Module):
+    """
+        Aggregate the context features according to the initial predicted probability distribution.
+        Employ the soft-weighted method to aggregate the context.
+    """
+
+    def __init__(self, cls_num=0, scales=[1, 2, 4]):
+        super(PyramidSpatialGather_Module, self).__init__()
+        self.cls_num = cls_num
+        self.scales = scales
+        self.relu = nn.ReLU(inplace=True)
+
+    def _compute_single_scale(self, feats, probs, dh, dw):
+        batch_size, k, h, w = probs.size(0), probs.size(1), probs.size(2), probs.size(3)
+        c = feats.size(1)
+
+        out_h, out_w = math.ceil(h / dh), math.ceil(w / dw)
+        pad_h, pad_w = out_h * dh - h, out_w * dw - w
+        if pad_h > 0 or pad_w > 0:  # padding in both left&right sides
+            feats = F.pad(feats, (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
+            probs = F.pad(probs, (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
+
+        feats = feats.view(batch_size, c, out_h, dh, out_w, dw).permute(0, 3, 5, 1, 2, 4)
+        feats = feats.contiguous().view(batch_size, dh * dw, c, out_h, out_w)
+
+        probs = probs.view(batch_size, k, out_h, dh, out_w, dw).permute(0, 3, 5, 1, 2, 4)
+        probs = probs.contiguous().view(batch_size, dh * dw, k, out_h, out_w)
+
+        feats = feats.view(batch_size, dh * dw, c, -1)
+        probs = probs.view(batch_size, dh * dw, k, -1)
+        feats = feats.permute(0, 1, 3, 2)
+
+        probs = F.softmax(probs, dim=3)  # batch x k x hw
+        cc = torch.matmul(probs, feats).view(batch_size, -1, c)  # batch x k x c
+
+        return cc.permute(0, 2, 1).unsqueeze(3)
+
+    def forward(self, feats, probs):
+        ocr_list = []
+        for scale in self.scales:
+            ocr_tmp = self._compute_single_scale(feats, probs, scale, scale)
+            ocr_list.append(ocr_tmp)
+        pyramid_ocr = torch.cat(ocr_list, 2)
+        return pyramid_ocr
+
+
+class _ObjectAttentionBlock(nn.Module):
+    '''
+    The basic implementation for object context block
+    Input:
+        N X C X H X W
+    Parameters:
+        in_channels       : the dimension of the input feature map
+        key_channels      : the dimension after the key/query transform
+        scale             : choose the scale to downsample the input feature maps (save memory cost)
+        use_gt            : whether use the ground truth label map to compute the similarity map
+        fetch_attention   : whether return the estimated similarity map
+        bn_type           : specify the bn type
+    Return:
+        N X C X H X W
+    '''
+
+    def __init__(self,
+                 in_channels,
+                 key_channels,
+                 scale=1,
+                 use_gt=False,
+                 use_bg=False,
+                 fetch_attention=False,
+                 bn_type=None):
+        super(_ObjectAttentionBlock, self).__init__()
+        self.scale = scale
+        self.in_channels = in_channels
+        self.key_channels = key_channels
+        self.use_gt = use_gt
+        self.use_bg = use_bg
+        self.fetch_attention = fetch_attention
+        self.pool = nn.MaxPool2d(kernel_size=(scale, scale))
+        self.f_pixel = nn.Sequential(
+            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+        self.f_object = nn.Sequential(
+            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+            nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+        self.f_down = nn.Sequential(
+            nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type),
+        )
+        self.f_up = nn.Sequential(
+            nn.Conv2d(in_channels=self.key_channels, out_channels=self.in_channels,
+                      kernel_size=1, stride=1, padding=0),
+            ModuleHelper.BNReLU(self.in_channels, bn_type=bn_type),
+        )
+
+    def forward(self, x, proxy, gt_label=None):
+        batch_size, h, w = x.size(0), x.size(2), x.size(3)
+        if self.scale > 1:
+            x = self.pool(x)
+
+        query = self.f_pixel(x).view(batch_size, self.key_channels, -1)
+        query = query.permute(0, 2, 1)
+        key = self.f_object(proxy).view(batch_size, self.key_channels, -1)
+        value = self.f_down(proxy).view(batch_size, self.key_channels, -1)
+        value = value.permute(0, 2, 1)
+
+        if self.use_gt and gt_label is not None:
+            gt_label = label_to_onehot(gt_label.squeeze(1).type(torch.cuda.LongTensor), proxy.size(2) - 1)
+            sim_map = gt_label[:, :, :, :].permute(0, 2, 3, 1).view(batch_size, h * w, -1)
+            if self.use_bg:
+                bg_sim_map = 1.0 - sim_map
+                bg_sim_map = F.normalize(bg_sim_map, p=1, dim=-1)
+            sim_map = F.normalize(sim_map, p=1, dim=-1)
+        else:
+            sim_map = torch.matmul(query, key)
+            sim_map = (self.key_channels ** -.5) * sim_map
+            sim_map = F.softmax(sim_map, dim=-1)
+
+            # add bg context ...
+        context = torch.matmul(sim_map, value)  # hw x k x k x c
+        context = context.permute(0, 2, 1).contiguous()
+        context = context.view(batch_size, self.key_channels, *x.size()[2:])
+        context = self.f_up(context)
+        if self.scale > 1:
+            context = F.interpolate(input=context, size=(h, w), mode='bilinear', align_corners=True)
+
+        if self.use_bg:
+            bg_context = torch.matmul(bg_sim_map, value)
+            bg_context = bg_context.permute(0, 2, 1).contiguous()
+            bg_context = bg_context.view(batch_size, self.key_channels, *x.size()[2:])
+            bg_context = self.f_up(bg_context)
+            bg_context = F.interpolate(input=bg_context, size=(h, w), mode='bilinear', align_corners=True)
+            return context, bg_context
+        else:
+            if self.fetch_attention:
+                return context, sim_map
+            else:
+                return context
+
+
+class ObjectAttentionBlock2D(_ObjectAttentionBlock):
+    def __init__(self,
+                 in_channels,
+                 key_channels,
+                 scale=1,
+                 use_gt=False,
+                 use_bg=False,
+                 fetch_attention=False,
+                 bn_type=None):
+        super(ObjectAttentionBlock2D, self).__init__(in_channels,
+                                                     key_channels,
+                                                     scale,
+                                                     use_gt,
+                                                     use_bg,
+                                                     fetch_attention,
+                                                     bn_type=bn_type)
+
+
+class SpatialOCR_Module(nn.Module):
+    """
+    Implementation of the OCR module:
+    We aggregate the global object representation to update the representation for each pixel.
+
+    use_gt=True: whether use the ground-truth label to compute the ideal object contextual representations.
+    use_bg=True: use the ground-truth label to compute the ideal background context to augment the representations.
+    use_oc=True: use object context or not.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 key_channels,
+                 out_channels,
+                 scale=1,
+                 dropout=0.1,
+                 use_gt=False,
+                 use_bg=False,
+                 use_oc=True,
+                 fetch_attention=False,
+                 bn_type=None):
+        super(SpatialOCR_Module, self).__init__()
+        self.use_gt = use_gt
+        self.use_bg = use_bg
+        self.use_oc = use_oc
+        self.fetch_attention = fetch_attention
+        self.object_context_block = ObjectAttentionBlock2D(in_channels,
+                                                           key_channels,
+                                                           scale,
+                                                           use_gt,
+                                                           use_bg,
+                                                           fetch_attention,
+                                                           bn_type)
+        if self.use_bg:
+            if self.use_oc:
+                _in_channels = 3 * in_channels
+            else:
+                _in_channels = 2 * in_channels
+        else:
+            _in_channels = 2 * in_channels
+
+        self.conv_bn_dropout = nn.Sequential(
+            nn.Conv2d(_in_channels, out_channels, kernel_size=1, padding=0),
+            ModuleHelper.BNReLU(out_channels, bn_type=bn_type),
+            nn.Dropout2d(dropout)
+        )
+
+    def forward(self, feats, proxy_feats, gt_label=None):
+        if self.use_gt and gt_label is not None:
+            if self.use_bg:
+                context, bg_context = self.object_context_block(feats, proxy_feats, gt_label)
+            else:
+                context = self.object_context_block(feats, proxy_feats, gt_label)
+        else:
+            if self.fetch_attention:
+                context, sim_map = self.object_context_block(feats, proxy_feats)
+            else:
+                context = self.object_context_block(feats, proxy_feats)
+
+        if self.use_bg:
+            if self.use_oc:
+                output = self.conv_bn_dropout(torch.cat([context, bg_context, feats], 1))
+            else:
+                output = self.conv_bn_dropout(torch.cat([bg_context, feats], 1))
+        else:
+            output = self.conv_bn_dropout(torch.cat([context, feats], 1))
+
+        if self.fetch_attention:
+            return output, sim_map
+        else:
+            return output
+
+
+class SpatialOCR_Context(nn.Module):
+    """
+    Implementation of the FastOC module:
+    We aggregate the global object representation to update the representation for each pixel.
+    """
+
+    def __init__(self, in_channels, key_channels, scale=1, dropout=0, bn_type=None, ):
+        super(SpatialOCR_Context, self).__init__()
+        self.object_context_block = ObjectAttentionBlock2D(in_channels,
+                                                           key_channels,
+                                                           scale,
+                                                           bn_type=bn_type)
+
+    def forward(self, feats, proxy_feats):
+        context = self.object_context_block(feats, proxy_feats)
+        return context
+
+
+class SpatialOCR_ASP_Module(nn.Module):
+    def __init__(self, features, hidden_features=256, out_features=512, dilations=(12, 24, 36), num_classes=19,
+                 bn_type=None, dropout=0.1):
+        super(SpatialOCR_ASP_Module, self).__init__()
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Context
+        self.context = nn.Sequential(
+            nn.Conv2d(features, hidden_features, kernel_size=3, padding=1, dilation=1, bias=True),
+            ModuleHelper.BNReLU(hidden_features, bn_type=bn_type),
+            SpatialOCR_Context(in_channels=hidden_features,
+                               key_channels=hidden_features // 2, scale=1, bn_type=bn_type),
+            )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(features, hidden_features, kernel_size=1, padding=0, dilation=1, bias=True),
+            ModuleHelper.BNReLU(hidden_features, bn_type=bn_type), )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(features, hidden_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=True),
+            ModuleHelper.BNReLU(hidden_features, bn_type=bn_type), )
+        self.conv4 = nn.Sequential(
+            nn.Conv2d(features, hidden_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=True),
+            ModuleHelper.BNReLU(hidden_features, bn_type=bn_type), )
+        self.conv5 = nn.Sequential(
+            nn.Conv2d(features, hidden_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=True),
+            ModuleHelper.BNReLU(hidden_features, bn_type=bn_type), )
+        self.conv_bn_dropout = nn.Sequential(
+            nn.Conv2d(hidden_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=True),
+            ModuleHelper.BNReLU(out_features, bn_type=bn_type),
+            nn.Dropout2d(dropout)
+        )
+        self.object_head = SpatialGather_Module(num_classes)
+
+    def _cat_each(self, feat1, feat2, feat3, feat4, feat5):
+        assert (len(feat1) == len(feat2))
+        z = []
+        for i in range(len(feat1)):
+            z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1))
+        return z
+
+    def forward(self, x, probs):
+        if isinstance(x, Variable):
+            _, _, h, w = x.size()
+        elif isinstance(x, tuple) or isinstance(x, list):
+            _, _, h, w = x[0].size()
+        else:
+            raise RuntimeError('unknown input type')
+
+        feat1 = self.context[0](x)
+        feat1 = self.context[1](feat1)
+        proxy_feats = self.object_head(feat1, probs)
+        feat1 = self.context[2](feat1, proxy_feats)
+        feat2 = self.conv2(x)
+        feat3 = self.conv3(x)
+        feat4 = self.conv4(x)
+        feat5 = self.conv5(x)
+
+        if isinstance(x, Variable):
+            out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
+        elif isinstance(x, tuple) or isinstance(x, list):
+            out = self._cat_each(feat1, feat2, feat3, feat4, feat5)
+        else:
+            raise RuntimeError('unknown input type')
+
+        output = self.conv_bn_dropout(out)
+        return output
+
+
+if __name__ == "__main__":
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    probs = torch.randn((1, 19, 128, 128)).cuda()
+    feats = torch.randn((1, 2048, 128, 128)).cuda()
+
+    conv_3x3 = nn.Sequential(
+        nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
+        ModuleHelper.BNReLU(512, bn_type='torchsyncbn'),
+    )
+
+    ocp_gather_infer = SpatialGather_Module(19)
+    ocp_distr_infer = SpatialOCR_Module(in_channels=512,
+                                        key_channels=256,
+                                        out_channels=512,
+                                        scale=1,
+                                        dropout=0,
+                                        bn_type='torchsyncbn')
+    ocp_gather_infer.eval()
+    ocp_gather_infer.cuda()
+    ocp_distr_infer.eval()
+    ocp_distr_infer.cuda()
+    conv_3x3.eval()
+    conv_3x3.cuda()
+
+
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+
+    avg_time = 0
+    avg_mem = 0
+    import time
+
+    with torch.no_grad():
+        for i in range(100):
+            start_time = time.time()
+            feats_ = conv_3x3(feats)
+            ocp_feats = ocp_gather_infer(feats_, probs)
+            outputs = ocp_distr_infer(feats_, ocp_feats)
+            torch.cuda.synchronize()
+            avg_time += (time.time() - start_time)
+            avg_mem += (torch.cuda.max_memory_allocated() - feats.element_size() * feats.nelement())
+
+    print("Average Parameters : {}".format(count_parameters(ocp_distr_infer) + count_parameters(conv_3x3)))
+    print("Average Running Time: {}".format(avg_time / 100))
+    print("Average GPU Memory: {:.2f} MB".format(avg_mem / 100 / 2 ** 20))
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/time_log b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/time_log
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/trans_layer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/trans_layer.py
new file mode 100644
index 0000000..17cf719
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/trans_layer.py
@@ -0,0 +1,241 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+import math
+from lib.utils.tools.logger import Logger as Log
+from lib.models.tools.module_helper import ModuleHelper
+from lib.models.modules.basic import SeparableConv2d
+
+
+def make_sine_position_embedding(d_model, size, temperature=10000,
+                                 scale=2 * math.pi):
+    h, w = size, size
+    area = torch.ones(1, h, w)  # [b, h, w]
+    y_embed = area.cumsum(1, dtype=torch.float32)
+    x_embed = area.cumsum(2, dtype=torch.float32)
+
+    one_direction_feats = d_model // 2
+
+    eps = 1e-6
+    y_embed = y_embed / (y_embed[:, -1:, :] + eps) * scale
+    x_embed = x_embed / (x_embed[:, :, -1:] + eps) * scale
+
+    dim_t = torch.arange(one_direction_feats, dtype=torch.float32)
+    dim_t = temperature ** (2 * (dim_t // 2) / one_direction_feats)
+
+    pos_x = x_embed[:, :, :, None] / dim_t
+    pos_y = y_embed[:, :, :, None] / dim_t
+    pos_x = torch.stack(
+        (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
+    pos_y = torch.stack(
+        (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
+    pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2).contiguous()
+    pos = pos.flatten(2).permute(0, 2, 1).contiguous()
+    return pos
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+
+    def forward(self, low_feature, h_feature, H, W):
+        B, N, C = h_feature.shape
+        q = self.q(h_feature).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+
+        if self.sr_ratio > 1:
+            x_ = low_feature.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(low_feature).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        low_feature = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        low_feature = self.proj(low_feature)
+        low_feature = self.proj_drop(low_feature)
+
+        return low_feature
+
+
+class SubPixelConv(nn.Module):
+    def __init__(self, img_size=224, patch_size=2, in_chans=768, embed_dim=768):
+        super().__init__()
+        self.img_size = to_2tuple(img_size)
+        self.patch_size = to_2tuple(patch_size)
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.upsample = nn.Upsample(scale_factor=self.patch_size[0], align_corners=False, mode='bilinear')
+        self.upsample_proj = nn.Conv2d(in_chans, embed_dim, kernel_size=3, stride=1, padding=1, bias=True)
+        # self.upsample_proj = SeparableConv2d(in_chans, embed_dim, 3)
+        # self.upsample_proj = nn.Sequential(
+        #     nn.Conv2d(in_chans, in_chans, kernel_size=3, stride=1, padding=1, bias=True),
+        #     ModuleHelper.BNReLU(in_chans, bn_type='torchbn'),
+        #     nn.Conv2d(in_chans, embed_dim, kernel_size=1)
+        # )
+        self.norm = nn.LayerNorm(embed_dim)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        import math
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1.0)
+            m.bias.data.zero_()
+
+    def forward(self, x, norm=True):
+        B, C, H, W = x.shape
+
+        x = self.upsample(x)
+        x = self.upsample_proj(x).flatten(2).transpose(1, 2)
+        if norm:
+            x = self.norm(x)
+
+        H, W = H * self.patch_size[0], W * self.patch_size[1]
+
+        return x, (H, W)
+
+
+class ImmediaUpsample(nn.Module):
+    def __init__(self, factor=2, in_chans=768, embed_dim=768, num_classes=60):
+        super().__init__()
+
+        self.conv = nn.Conv2d(in_channels=in_chans, out_channels=num_classes, kernel_size=1, stride=1)
+        self.upsample = nn.Upsample(scale_factor=factor, mode='bilinear')
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.upsample(x)
+
+        return x
+
+
+class AlignedModule(nn.Module):
+
+    def __init__(self, inplane, outplane, num_heads, mlp_ratio, sr_ratio):
+        super(AlignedModule, self).__init__()
+        self.reset_h = nn.Conv2d(inplane, outplane, 1, bias=False)
+        self.flow_make = nn.Conv2d(outplane * 2, 2, kernel_size=3, padding=1, bias=False)
+
+    def forward(self, x):
+        low_feature, h_feature = x
+        h, w = low_feature.size()[2:]
+        size = (h, w)
+
+        h_feature = self.reset_h(h_feature)
+        h_feature_orign = h_feature
+        h_feature = F.upsample(h_feature, size=size, mode="bilinear", align_corners=True)
+        flow_in = torch.cat([h_feature, low_feature], 1)
+        flow = self.flow_make(flow_in)
+        h_feature = self.flow_warp(h_feature_orign, flow, size=size)
+
+        return h_feature.flatten(2).transpose(1, 2), (h, w)
+
+    def flow_warp(self, input, flow, size):
+        out_h, out_w = size
+        n, c, h, w = input.size()
+
+        norm = torch.tensor([[[[out_w, out_h]]]]).type_as(input).to(input.device)
+        h = torch.linspace(-1.0, 1.0, out_h).view(-1, 1).repeat(1, out_w)
+        w = torch.linspace(-1.0, 1.0, out_w).repeat(out_h, 1)
+        grid = torch.cat((w.unsqueeze(2), h.unsqueeze(2)), 2)
+        grid = grid.repeat(n, 1, 1, 1).type_as(input).to(input.device)
+        grid = grid + flow.permute(0, 2, 3, 1) / norm
+
+        output = F.grid_sample(input, grid)
+        return output
+
+# att low_feature + flow wrap high feature
+# class AlignedModule(nn.Module):
+
+#     def __init__(self, inplane, outplane, num_heads, mlp_ratio, sr_ratio):
+#         super(AlignedModule, self).__init__()
+#         self.reset_h = nn.Conv2d(inplane, outplane, 1, bias=False)
+#         self.norm_h = partial(nn.LayerNorm, eps=1e-6)(outplane)
+#         self.norm_l = partial(nn.LayerNorm, eps=1e-6)(outplane)
+#         self.context_att = Attention(dim=outplane, num_heads=num_heads, sr_ratio=sr_ratio)
+#         self.flow_make = nn.Conv2d(outplane*2, 2, kernel_size=3, padding=1,  bias=False)
+
+#     def forward(self, x):
+#         low_feature, h_feature = x
+#         B, _, h, w = low_feature.size()
+#         size = (h, w)
+
+#         h_feature = self.reset_h(h_feature)
+#         h_feature_orign = h_feature
+#         h_feature = F.upsample(h_feature, size=size, mode="bilinear", align_corners=True)
+#         low_feature = self.context_att(self.norm_l(low_feature.flatten(2).transpose(1, 2)), self.norm_h(h_feature.flatten(2).transpose(1, 2)), h, w)
+#         low_feature = low_feature.reshape(B, h, w, -1).permute(0, 3, 1, 2).contiguous()
+
+#         flow_in = torch.cat([h_feature, low_feature], 1)
+#         flow = self.flow_make(flow_in)
+#         h_feature = self.flow_warp(h_feature_orign, flow, size=size)
+
+#         return low_feature, h_feature.flatten(2).transpose(1, 2), (h, w)
+
+#     def flow_warp(self, input, flow, size):
+#         out_h, out_w = size
+#         n, c, h, w = input.size()
+
+#         norm = torch.tensor([[[[out_w, out_h]]]]).type_as(input).to(input.device)
+#         h = torch.linspace(-1.0, 1.0, out_h).view(-1, 1).repeat(1, out_w)
+#         w = torch.linspace(-1.0, 1.0, out_w).repeat(out_h, 1)
+#         grid = torch.cat((w.unsqueeze(2), h.unsqueeze(2)), 2)
+#         grid = grid.repeat(n, 1, 1, 1).type_as(input).to(input.device)
+#         grid = grid + flow.permute(0, 2, 3, 1) / norm
+
+#         output = F.grid_sample(input, grid)
+#         return output
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/vit_trans_layer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/vit_trans_layer.py
new file mode 100644
index 0000000..d71c7fa
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/modules/vit_trans_layer.py
@@ -0,0 +1,307 @@
+import torch
+import torch.nn as nn
+from lib.models.modules.norm import trunc_normal_
+import math
+import torch.nn.functional as F
+from lib.utils.tools.logger import Logger as Log
+
+
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+class Mlp(nn.Module):
+    # two mlp, fc-relu-drop-fc-relu-drop
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention_Encoder(nn.Module):
+    def __init__(self, dim, kv_reduced_dim=None, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+                 proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        if kv_reduced_dim is not None and type(kv_reduced_dim) == int:
+            self.fc_k = nn.Linear()
+
+    def forward(self, x):
+        B, N, C = x.shape
+        # qkv shape [3, N, num_head, HW, C//num_head]
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # [N, num_head, HW, C//num_head]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Attention_Decoder(nn.Module):
+    def __init__(self, dim, num_heads=1, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.fc_q = nn.Linear(dim, dim * 1, bias=qkv_bias)
+        self.fc_kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, q, x):
+        # q:[B,12,256] x:[B,HW,256]
+        B, N, C = x.shape
+        n_class = q.shape[1]
+
+        q = self.fc_q(q).reshape(B, self.num_heads, n_class, C // self.num_heads)
+        kv = self.fc_kv(x).reshape(B, N, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]  # [B, num_head, HW, 256/num_head]
+
+        attn1 = (q @ k.transpose(-2, -1)) * self.scale  # [B, num_head, 12, HW]
+        attn2 = attn1.softmax(dim=-1)
+        attn3 = self.attn_drop(attn2)  # [B, num_head, 11, HW]
+
+        x = (attn3 @ v).reshape(B, n_class, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)  # [B, 12, 256]
+
+        # attn = attn3.permute(0, 2, 1, 3)
+        attn = attn1.permute(0, 2, 1, 3)
+        # attn = attn2.permute(0, 2, 1, 3)
+        return attn, x
+
+
+class Block_Encoder(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention_Encoder(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+    def forward(self, x):
+        x = x + self.drop_path(self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+
+class Block_Decoder(nn.Module):
+
+    def __init__(self, dim, num_heads, feat_HxW, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0.,
+                 attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.norm1_clsembed = norm_layer(dim)
+
+        self.attn = Attention_Decoder(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        self.norm3 = norm_layer(dim)
+        self.norm4 = norm_layer(256)
+
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.mlp2 = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.mlp3 = Mlp(in_features=feat_HxW, hidden_features=feat_HxW * 3, act_layer=act_layer, drop=drop)
+
+    def forward(self, query, feat):
+        # query:[B,12,256] feat:[B,12,HW]
+        attn, query = self.attn(self.norm1_clsembed(query), self.norm1(feat))
+        query = query + self.drop_path(query)
+        query = query + self.drop_path(self.mlp(self.norm2(query)))
+
+        feat = feat + self.drop_path(feat)
+        feat = feat + self.drop_path(self.mlp2(self.norm3(feat)))
+
+        attn = attn + self.drop_path(attn)
+        attn = attn + self.drop_path(self.mlp3(self.norm4(attn)))
+
+        return attn, query, feat
+
+
+class HybridEmbed(nn.Module):
+    """ CNN Feature Map Embedding
+    Extract feature map from CNN, flatten, project to embedding dim.
+    """
+
+    def __init__(self, input_dim=2048, embed_dim=768):
+        super().__init__()
+        self.proj = nn.Linear(input_dim, embed_dim)
+
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)
+        x = self.proj(x)
+        return x
+
+
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+
+    def __init__(self, input_dim=2048, embed_dim=768, depth=12, num_patches=32 * 32, nclass=12,
+                 decoder_feat_HxW=1024, num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None,
+                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm):
+        super().__init__()
+
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        self.cls_embed = nn.Parameter(torch.zeros(1, nclass, embed_dim))
+
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        self.blocks_encoder = nn.ModuleList([
+            Block_Encoder(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
+            for i in range(depth)])
+
+        self.blocks_decoder = nn.ModuleList([
+            Block_Decoder(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, feat_HxW=decoder_feat_HxW, qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
+            for i in range(depth)])
+
+        self.norm = norm_layer(embed_dim)
+
+        self.hybrid_embed = HybridEmbed(input_dim, embed_dim)
+
+        trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        trunc_normal_(self.cls_embed, std=.02)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token', 'cls_embed'}
+
+    def forward_encoder(self, x, h, w):
+        B = x.shape[0]
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+
+        pos_embed = self.pos_embed
+        pos_embed = self.resize_pos_embed(x, pos_embed, h, w)
+        x = x + pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks_encoder:
+            x = blk(x)
+
+        x = self.norm(x)
+        return x[:, 0], x[:, 1:]
+
+    def resize_pos_embed(self, x, pos_embed, h, w):
+        # if x.shape[1] == pos_embed.shape[1]:
+        #     return pos_embed
+
+        # n, hw, c = x.shape
+        # x_h = x_w = int(math.sqrt(hw - 1))
+        # assert x_h * x_w == hw - 1
+
+        cls_pos_embed, feat_pos_embed = pos_embed[:, 0:1, :], pos_embed[:, 1:, :]
+        feat_h = feat_w = int(math.sqrt(feat_pos_embed.shape[1]))
+        assert feat_h * feat_w == feat_pos_embed.shape[1]
+        feat_pos_embed = feat_pos_embed.reshape(feat_pos_embed.shape[0], feat_h, feat_w, -1).permute(0, 3, 1,
+                                                                                                     2)  # [n,c,h,w]
+        feat_pos_embed = F.interpolate(feat_pos_embed, (h, w), mode='bilinear', align_corners=True).permute(0, 2, 3,
+                                                                                                            1) \
+            .reshape(feat_pos_embed.shape[0], h * w, -1)
+
+        new_pos_embed = torch.cat([cls_pos_embed, feat_pos_embed], dim=1)
+        assert new_pos_embed.shape[1] == x.shape[1]
+        return new_pos_embed
+
+    def forward_decoder(self, x):
+        attns_list = []
+        feat = x
+        B = feat.shape[0]
+
+        for idx, blk in enumerate(self.blocks_decoder):
+            if idx == 0:
+                query = self.cls_embed.expand(B, -1, -1)
+            else:
+                query += self.cls_embed.expand(B, -1, -1)
+            attn, query, feat = blk(query, feat)
+            attns_list.append(attn)
+
+        return attns_list
+
+    def forward(self, x, use_decoder=False):
+        '''
+        x: [N,C,H,W]
+        '''
+        pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ce2pnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ce2pnet.py
new file mode 100644
index 0000000..4ee4ede
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ce2pnet.py
@@ -0,0 +1,262 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Jianyuan Guo, RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class CE2P_ASPOCR(nn.Module):
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(CE2P_ASPOCR, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        from lib.models.modules.edge_block import Edge_Module
+        from lib.models.modules.decoder_block import CE2P_Decoder_Module
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=2)
+            self.decoder = CE2P_Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=512)
+        else:
+            in_channels = [1024, 2048]
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=1)
+            self.decoder = CE2P_Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=256)
+
+        # extra added layers
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_ASP_Module
+        self.asp_ocr_head = SpatialOCR_ASP_Module(features=2048, 
+                                                  hidden_features=256, 
+                                                  out_features=512,
+                                                  dilations=(6, 12, 18),
+                                                  num_classes=self.num_classes,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.cls = nn.Sequential(
+            nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+            )
+
+        self.dsn = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1, bias=False),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_) # x: list output from conv2_x, conv3_x, conv4_x, conv5_x
+        seg_dsn = self.dsn(x[-2])
+        edge_out, edge_fea = self.edgelayer(x[-4], x[-3], x[-2])
+        x5 = x[-1]
+        x_hr = self.asp_ocr_head(x5, seg_dsn)
+        seg_out1, x_hr = self.decoder(x_hr, x[-4])
+        x_hr = torch.cat([x_hr, edge_fea], dim=1)
+        seg_out2 = self.cls(x_hr)
+
+        seg_dsn = F.interpolate(seg_dsn, 
+                              size=(x_.size(2), x_.size(3)), 
+                              mode="bilinear", 
+                              align_corners=True)
+        seg_out2 = F.interpolate(seg_out2, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        seg_out1 = F.interpolate(seg_out1, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        edge_out = F.interpolate(edge_out, 
+                                 size=(x_.size(2), x_.size(3)), 
+                                 mode="bilinear", 
+                                 align_corners=True)
+
+        return seg_out1, edge_out, seg_dsn, seg_out2
+
+
+class CE2P_OCRNet(nn.Module):
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(CE2P_OCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        from lib.models.modules.edge_block import Edge_Module
+        from lib.models.modules.decoder_block import Decoder_Module
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=2)
+            self.decoder = Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=512)
+        else:
+            in_channels = [1024, 2048]
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=1)
+            self.decoder = Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=256)
+
+        # extra added layers
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=2048, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0, 
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.cls = nn.Sequential(
+            nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+            )
+
+        self.dsn = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1, bias=False),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_) # x: list output from conv2_x, conv3_x, conv4_x, conv5_x
+        seg_dsn = self.dsn(x[-2])
+        edge_out, edge_fea = self.edgelayer(x[-4], x[-3], x[-2])
+        x5 = x[-1]
+        context = self.spatial_context_head(x5, seg_dsn)
+        x_hr = self.spatial_ocr_head(x5, context)
+        seg_out1, x_hr = self.decoder(x_hr, x[-4])
+        x_hr = torch.cat([x_hr, edge_fea], dim=1)
+        seg_out2 = self.cls(x_hr)
+
+        seg_dsn = F.interpolate(seg_dsn, 
+                              size=(x_.size(2), x_.size(3)), 
+                              mode="bilinear", 
+                              align_corners=True)
+        seg_out2 = F.interpolate(seg_out2, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        seg_out1 = F.interpolate(seg_out1, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        edge_out = F.interpolate(edge_out, 
+                                 size=(x_.size(2), x_.size(3)), 
+                                 mode="bilinear", 
+                                 align_corners=True)
+
+        return seg_out1, edge_out, seg_dsn, seg_out2
+
+
+class CE2P_IdealOCRNet(nn.Module):
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(CE2P_IdealOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        from lib.models.modules.edge_block import Edge_Module
+        from lib.models.modules.decoder_block import Decoder_Module
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=2)
+            self.decoder = Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=512)
+        else:
+            in_channels = [1024, 2048]
+            self.edgelayer = Edge_Module(256, 2, bn_type=self.configer.get('network', 'bn_type'), factor=1)
+            self.decoder = Decoder_Module(self.num_classes, 
+                              dropout=0.1, 
+                              bn_type=self.configer.get('network', 'bn_type'),
+                              inplane1=512,
+                              inplane2=256)
+
+        # extra added layers
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=True)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=2048, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0, 
+                                                  use_gt=True,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.cls = nn.Sequential(
+            nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+            )
+
+        self.dsn = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1, bias=False),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_) # x: list output from conv2_x, conv3_x, conv4_x, conv5_x
+        seg_dsn = self.dsn(x[-2])
+        edge_out, edge_fea = self.edgelayer(x[-4], x[-3], x[-2])
+        x5 = x[-1]
+
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x5.size(2), x5.size(3)), mode="nearest")
+        context = self.spatial_context_head(x5, seg_dsn, label)
+        x_hr = self.spatial_ocr_head(x5, context, label)
+
+        seg_out1, x_hr = self.decoder(x_hr, x[-4])
+        x_hr = torch.cat([x_hr, edge_fea], dim=1)
+        seg_out2 = self.cls(x_hr)
+
+        seg_dsn = F.interpolate(seg_dsn, 
+                              size=(x_.size(2), x_.size(3)), 
+                              mode="bilinear", 
+                              align_corners=True)
+        seg_out2 = F.interpolate(seg_out2, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        seg_out1 = F.interpolate(seg_out1, 
+                             size=(x_.size(2), x_.size(3)), 
+                             mode="bilinear", 
+                             align_corners=True)
+        edge_out = F.interpolate(edge_out, 
+                                 size=(x_.size(2), x_.size(3)), 
+                                 mode="bilinear", 
+                                 align_corners=True)
+
+        return seg_out1, edge_out, seg_dsn, seg_out2
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/deeplab.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/deeplab.py
new file mode 100644
index 0000000..4e628e7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/deeplab.py
@@ -0,0 +1,64 @@
+import torch.nn as nn
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.modules.decoder_block import DeepLabHead
+from models.protoseg_core.lib.models.modules.projection import ProjectionHead
+
+
+class DeepLabV3Contrast(nn.Module):
+    def __init__(self, configer):
+        super(DeepLabV3Contrast, self).__init__()
+
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+        self.proj_dim = self.configer.get('contrast', 'proj_dim')
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096]
+        else:
+            in_channels = [1024, 2048]
+
+        self.proj_head = ProjectionHead(dim_in=in_channels[1], proj_dim=self.proj_dim)
+
+        self.decoder = DeepLabHead(num_classes=self.num_classes, bn_type=self.configer.get('network', 'bn_type'))
+
+        for modules in [self.proj_head, self.decoder]:
+            for m in modules.modules():
+                if isinstance(m, nn.Conv2d):
+                    nn.init.kaiming_normal_(m.weight.data)
+                    if m.bias is not None:
+                        m.bias.data.zero_()
+
+    def forward(self, x_, with_embed=False, is_eval=False):
+        x = self.backbone(x_)
+
+        embedding = self.proj_head(x[-1])
+
+        x = self.decoder(x[-4:])
+
+        return {'embed': embedding, 'seg_aux': x[1], 'seg': x[0]}
+
+class DeepLabV3(nn.Module):
+    def __init__(self, configer):
+        super(DeepLabV3, self).__init__()
+
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        self.decoder = DeepLabHead(num_classes=self.num_classes, bn_type=self.configer.get('network', 'bn_type'))
+
+        for m in self.decoder.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+
+        x = self.decoder(x[-4:])
+
+        return x[1], x[0]
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/fcnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/fcnet.py
new file mode 100644
index 0000000..7ffaad9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/fcnet.py
@@ -0,0 +1,106 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import pdb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class FcnNet(nn.Module):
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(FcnNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096]
+        elif "mobilenetv2" in self.configer.get('network', 'backbone'):
+            in_channels = [160, 320]
+        else:
+            in_channels = [1024, 2048]
+        self.cls_head = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+        )
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+        )
+
+        if "mobilenetv2" in self.configer.get('network', 'backbone'):
+            self.cls_head = nn.Sequential(
+                nn.Conv2d(in_channels[1], 256, kernel_size=3, stride=1, padding=1),
+                ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+                nn.Dropout2d(0.10),
+                nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+            )
+            self.dsn_head = nn.Sequential(
+                nn.Conv2d(in_channels[0], 128, kernel_size=3, stride=1, padding=1),
+                ModuleHelper.BNReLU(128, bn_type=self.configer.get('network', 'bn_type')),
+                nn.Dropout2d(0.10),
+                nn.Conv2d(128, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        aux_x = self.dsn_head(x[-2])
+        x = self.cls_head(x[-1])
+        aux_x = F.interpolate(aux_x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return aux_x, x
+
+
+class FcnNet_wo_dsn(nn.Module):
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(FcnNet_wo_dsn, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096]
+        elif "mobilenetv2" in self.configer.get('network', 'backbone'):
+            in_channels = [160, 320]
+        else:
+            in_channels = [1024, 2048]
+        self.cls_head = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+        if "mobilenetv2" in self.configer.get('network', 'backbone'):
+            self.cls_head = nn.Sequential(
+                nn.Conv2d(in_channels[1], 256, kernel_size=3, stride=1, padding=1),
+                ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+                nn.Dropout2d(0.10),
+                nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x = self.cls_head(x[-1])
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return x
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/hrnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/hrnet.py
new file mode 100644
index 0000000..0c27592
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/hrnet.py
@@ -0,0 +1,480 @@
+import os
+import pdb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributed as dist
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.models.modules.projection import ProjectionHead
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.models.modules.hanet_attention import HANet_Conv
+from models.protoseg_core.lib.models.modules.contrast import momentum_update, l2_normalize, ProjectionHead
+from models.protoseg_core.lib.models.modules.sinkhorn import distributed_sinkhorn
+from timm.models.layers import trunc_normal_
+from einops import rearrange, repeat
+
+
+class HRNet_W48(nn.Module):
+    """
+    deep high-resolution representation learning for human pose estimation, CVPR2019
+    """
+
+    def __init__(self, configer):
+        super(HRNet_W48, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        in_channels = 720  # 48 + 96 + 192 + 384
+        self.cls_head = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(in_channels, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10),
+            nn.Conv2d(in_channels, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out = self.cls_head(feats)
+        out = F.interpolate(out, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return out
+
+
+class HRNet_W48_Proto(nn.Module):
+    """
+    deep high-resolution representation learning for human pose estimation, CVPR2019
+    """
+
+    def __init__(self, configer):
+        super(HRNet_W48_Proto, self).__init__()
+        self.configer = configer
+        self.gamma = self.configer.get('protoseg', 'gamma')
+        self.num_prototype = self.configer.get('protoseg', 'num_prototype')
+        self.use_prototype = self.configer.get('protoseg', 'use_prototype')
+        self.update_prototype = self.configer.get('protoseg', 'update_prototype')
+        self.pretrain_prototype = self.configer.get('protoseg', 'pretrain_prototype')
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        in_channels = 720
+        self.cls_head = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(in_channels, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10)
+        )
+
+        self.prototypes = nn.Parameter(torch.zeros(self.num_classes, self.num_prototype, in_channels),
+                                       requires_grad=True)
+
+        self.proj_head = ProjectionHead(in_channels, in_channels)
+        self.feat_norm = nn.LayerNorm(in_channels)
+        self.mask_norm = nn.LayerNorm(self.num_classes)
+
+        trunc_normal_(self.prototypes, std=0.02)
+
+    def prototype_learning(self, _c, out_seg, gt_seg, masks):
+        pred_seg = torch.max(out_seg, 1)[1]
+        mask = (gt_seg == pred_seg.view(-1))
+
+        cosine_similarity = torch.mm(_c, self.prototypes.view(-1, self.prototypes.shape[-1]).t())
+
+        proto_logits = cosine_similarity
+        proto_target = gt_seg.clone().float()
+
+        # clustering for each class
+        protos = self.prototypes.data.clone()
+        for k in range(self.num_classes):
+            init_q = masks[..., k]
+            init_q = init_q[gt_seg == k, ...]
+            if init_q.shape[0] == 0:
+                continue
+
+            q, indexs = distributed_sinkhorn(init_q)
+
+            m_k = mask[gt_seg == k]
+
+            c_k = _c[gt_seg == k, ...]
+
+            m_k_tile = repeat(m_k, 'n -> n tile', tile=self.num_prototype)
+
+            m_q = q * m_k_tile  # n x self.num_prototype
+
+            c_k_tile = repeat(m_k, 'n -> n tile', tile=c_k.shape[-1])
+
+            c_q = c_k * c_k_tile  # n x embedding_dim
+
+            f = m_q.transpose(0, 1) @ c_q  # self.num_prototype x embedding_dim
+
+            n = torch.sum(m_q, dim=0)
+
+            if torch.sum(n) > 0 and self.update_prototype is True:
+                f = F.normalize(f, p=2, dim=-1)
+
+                new_value = momentum_update(old_value=protos[k, n != 0, :], new_value=f[n != 0, :],
+                                            momentum=self.gamma, debug=False)
+                protos[k, n != 0, :] = new_value
+
+            proto_target[gt_seg == k] = indexs.float() + (self.num_prototype * k)
+
+        self.prototypes = nn.Parameter(l2_normalize(protos),
+                                       requires_grad=False)
+
+        if dist.is_available() and dist.is_initialized():
+            protos = self.prototypes.data.clone()
+            dist.all_reduce(protos.div_(dist.get_world_size()))
+            self.prototypes = nn.Parameter(protos, requires_grad=False)
+
+        return proto_logits, proto_target
+
+    def forward(self, x_, gt_semantic_seg=None, pretrain_prototype=False):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        c = self.cls_head(feats)
+
+        c = self.proj_head(c)
+        _c = rearrange(c, 'b c h w -> (b h w) c')
+        _c = self.feat_norm(_c)
+        _c = l2_normalize(_c)
+
+        self.prototypes.data.copy_(l2_normalize(self.prototypes))
+
+        # n: h*w, k: num_class, m: num_prototype
+        masks = torch.einsum('nd,kmd->nmk', _c, self.prototypes)
+
+        out_seg = torch.amax(masks, dim=1)
+        out_seg = self.mask_norm(out_seg)
+        out_seg = rearrange(out_seg, "(b h w) k -> b k h w", b=feats.shape[0], h=feats.shape[2])
+
+        if pretrain_prototype is False and self.use_prototype is True and gt_semantic_seg is not None:
+            gt_seg = F.interpolate(gt_semantic_seg.float(), size=feats.size()[2:], mode='nearest').view(-1)
+            contrast_logits, contrast_target = self.prototype_learning(_c, out_seg, gt_seg, masks)
+            return {'seg': out_seg, 'logits': contrast_logits, 'target': contrast_target}
+
+        return out_seg
+
+
+class HRNet_W48_CONTRAST(nn.Module):
+    """
+    deep high-resolution representation learning for human pose estimation, CVPR2019
+    """
+
+    def __init__(self, configer):
+        super(HRNet_W48_CONTRAST, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+        self.proj_dim = self.configer.get('contrast', 'proj_dim')
+
+        # extra added layers
+        in_channels = 720  # 48 + 96 + 192 + 384
+        self.cls_head = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(in_channels, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.10),
+            nn.Conv2d(in_channels, self.num_classes, kernel_size=1, stride=1, padding=0, bias=False)
+        )
+
+        self.proj_head = ProjectionHead(dim_in=in_channels, proj_dim=self.proj_dim)
+
+    def forward(self, x_, with_embed=False, is_eval=False):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out = self.cls_head(feats)
+
+        emb = self.proj_head(feats)
+        return {'seg': out, 'embed': emb}
+
+
+class HRNet_W48_OCR_CONTRAST(nn.Module):
+    def __init__(self, configer):
+        super(HRNet_W48_OCR_CONTRAST, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+        self.proj_dim = self.configer.get('contrast', 'proj_dim')
+
+        in_channels = 720
+        self.conv3x3 = nn.Sequential(
+            nn.Conv2d(in_channels, 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module
+        self.ocr_gather_head = SpatialGather_Module(self.num_classes)
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Module
+        self.ocr_distri_head = SpatialOCR_Module(in_channels=512,
+                                                 key_channels=256,
+                                                 out_channels=512,
+                                                 scale=1,
+                                                 dropout=0.05,
+                                                 bn_type=self.configer.get('network', 'bn_type'))
+        self.cls_head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.aux_head = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(in_channels, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(in_channels, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+        self.proj_head = ProjectionHead(dim_in=in_channels, proj_dim=self.proj_dim)
+
+    def forward(self, x_, with_embed=False, is_eval=False):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out_aux = self.aux_head(feats)
+
+        emb = self.proj_head(feats)
+
+        feats = self.conv3x3(feats)
+
+        context = self.ocr_gather_head(feats, out_aux)
+        feats = self.ocr_distri_head(feats, context)
+
+        out = self.cls_head(feats)
+
+        return {'seg': out, 'seg_aux': out_aux, 'embed': emb}
+
+
+class HRNet_W48_MEM(nn.Module):
+    def __init__(self, configer, dim=256, m=0.999, with_masked_ppm=False):
+        super(HRNet_W48_MEM, self).__init__()
+        self.configer = configer
+        self.m = m
+        self.r = self.configer.get('contrast', 'memory_size')
+        self.with_masked_ppm = with_masked_ppm
+
+        num_classes = self.configer.get('data', 'num_classes')
+
+        self.encoder_q = HRNet_W48_CONTRAST(configer)
+
+        self.register_buffer("segment_queue", torch.randn(num_classes, self.r, dim))
+        self.segment_queue = nn.functional.normalize(self.segment_queue, p=2, dim=2)
+        self.register_buffer("segment_queue_ptr", torch.zeros(num_classes, dtype=torch.long))
+
+        self.register_buffer("pixel_queue", torch.randn(num_classes, self.r, dim))
+        self.pixel_queue = nn.functional.normalize(self.pixel_queue, p=2, dim=2)
+        self.register_buffer("pixel_queue_ptr", torch.zeros(num_classes, dtype=torch.long))
+
+    @torch.no_grad()
+    def _momentum_update_key_encoder(self):
+        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
+            param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
+
+    def forward(self, im_q, lb_q=None, with_embed=True, is_eval=False):
+        if is_eval is True or lb_q is None:
+            ret = self.encoder_q(im_q, with_embed=with_embed)
+            return ret
+
+        ret = self.encoder_q(im_q)
+
+        q = ret['embed']
+        out = ret['seg']
+
+        return {'seg': out, 'embed': q, 'key': q.detach(), 'lb_key': lb_q.detach()}
+
+
+class HRNet_W48_OCR(nn.Module):
+    def __init__(self, configer):
+        super(HRNet_W48_OCR, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        in_channels = 720
+        self.conv3x3 = nn.Sequential(
+            nn.Conv2d(in_channels, 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module
+        self.ocr_gather_head = SpatialGather_Module(self.num_classes)
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Module
+        self.ocr_distri_head = SpatialOCR_Module(in_channels=512,
+                                                 key_channels=256,
+                                                 out_channels=512,
+                                                 scale=1,
+                                                 dropout=0.05,
+                                                 bn_type=self.configer.get('network', 'bn_type'))
+        self.cls_head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.aux_head = nn.Sequential(
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(in_channels, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(in_channels, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out_aux = self.aux_head(feats)
+
+        feats = self.conv3x3(feats)
+
+        context = self.ocr_gather_head(feats, out_aux)
+        feats = self.ocr_distri_head(feats, context)
+
+        out = self.cls_head(feats)
+
+        out_aux = F.interpolate(out_aux, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        out = F.interpolate(out, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return out_aux, out
+
+
+class HRNet_W48_OCR_B(nn.Module):
+    """
+    Considering that the 3x3 convolution on the 4x resolution feature map is expensive,
+    we can decrease the intermediate channels from 512 to 256 w/o performance loss.
+    """
+
+    def __init__(self, configer):
+        super(HRNet_W48_OCR_B, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        in_channels = 720  # 48 + 96 + 192 + 384
+        self.conv3x3 = nn.Sequential(
+            nn.Conv2d(in_channels, 256, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module
+        self.ocr_gather_head = SpatialGather_Module(self.num_classes)
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Module
+        self.ocr_distri_head = SpatialOCR_Module(in_channels=256,
+                                                 key_channels=128,
+                                                 out_channels=256,
+                                                 scale=1,
+                                                 dropout=0.05,
+                                                 bn_type=self.configer.get('network', 'bn_type'))
+
+        self.cls_head = nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.aux_head = nn.Sequential(
+            nn.Conv2d(in_channels, 256, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out_aux = self.aux_head(feats)
+
+        feats = self.conv3x3(feats)
+
+        context = self.ocr_gather_head(feats, out_aux)
+        feats = self.ocr_distri_head(feats, context)
+
+        out = self.cls_head(feats)
+
+        out_aux = F.interpolate(out_aux, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        out = F.interpolate(out, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return out_aux, out
+
+
+class HRNet_W48_OCR_B_HA(nn.Module):
+    """
+    Considering that the 3x3 convolution on the 4x resolution feature map is expensive,
+    we can decrease the intermediate channels from 512 to 256 w/o performance loss.
+    """
+
+    def __init__(self, configer):
+        super(HRNet_W48_OCR_B_HA, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        in_channels = 720  # 48 + 96 + 192 + 384
+        self.conv3x3 = nn.Sequential(
+            nn.Conv2d(in_channels, 256, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module
+        self.ocr_gather_head = SpatialGather_Module(self.num_classes)
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Module
+        self.ocr_distri_head = SpatialOCR_Module(in_channels=256,
+                                                 key_channels=128,
+                                                 out_channels=256,
+                                                 scale=1,
+                                                 dropout=0.05,
+                                                 bn_type=self.configer.get('network', 'bn_type'))
+        self.cls_head = nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.aux_head = nn.Sequential(
+            nn.Conv2d(in_channels, 256, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+        self.ha1 = HANet_Conv(384, 384, bn_type=self.configer.get('network', 'bn_type'))
+        self.ha2 = HANet_Conv(192, 192, bn_type=self.configer.get('network', 'bn_type'))
+        self.ha3 = HANet_Conv(96, 96, bn_type=self.configer.get('network', 'bn_type'))
+        self.ha4 = HANet_Conv(48, 48, bn_type=self.configer.get('network', 'bn_type'))
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        x[0] = x[0] + self.ha1(x[0])
+        x[1] = x[1] + self.ha1(x[1])
+        x[2] = x[2] + self.ha1(x[2])
+        x[3] = x[3] + self.ha1(x[3])
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        feats = torch.cat([feat1, feat2, feat3, feat4], 1)
+        out_aux = self.aux_head(feats)
+
+        feats = self.conv3x3(feats)
+
+        context = self.ocr_gather_head(feats, out_aux)
+        feats = self.ocr_distri_head(feats, context)
+
+        out = self.cls_head(feats)
+
+        out_aux = F.interpolate(out_aux, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        out = F.interpolate(out, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return out_aux, out
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ideal_ocrnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ideal_ocrnet.py
new file mode 100644
index 0000000..841804b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ideal_ocrnet.py
@@ -0,0 +1,263 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import pdb
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class IdealSpatialOCRNet(nn.Module):
+    """
+    augment the representations with the ground-truth object context.
+    """
+    def __init__(self, configer):
+        super(IdealSpatialOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=True)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  use_gt=True,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x.size(2), x.size(3)), mode="nearest")
+        context = self.spatial_context_head(x, x_dsn, label)
+        x = self.spatial_ocr_head(x, context, label)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class IdealSpatialOCRNetB(nn.Module):
+    """
+    augment the representations with both the ground-truth background context and object context.
+    """
+    def __init__(self, configer):
+        super(IdealSpatialOCRNetB, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=True)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  use_gt=True,
+                                                  use_bg=True,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x.size(2), x.size(3)), mode="nearest")
+        context = self.spatial_context_head(x, x_dsn, label)
+        x = self.spatial_ocr_head(x, context, label)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class IdealSpatialOCRNetC(nn.Module):
+    """
+    augment the representations with only the ground-truth background context.
+    """
+    def __init__(self, configer):
+        super(IdealSpatialOCRNetC, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=True)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  use_gt=True,
+                                                  use_bg=True,
+                                                  use_oc=False,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x.size(2), x.size(3)), mode="nearest")
+        context = self.spatial_context_head(x, x_dsn, label)
+        x = self.spatial_ocr_head(x, context, label)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class IdealGatherOCRNet(nn.Module):
+    def __init__(self, configer):
+        super(IdealGatherOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=True)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  use_gt=False,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x.size(2), x.size(3)), mode="nearest")
+        context = self.spatial_context_head(x, x_dsn, label)
+        x = self.spatial_ocr_head(x, context)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class IdealDistributeOCRNet(nn.Module):
+    def __init__(self, configer):
+        super(IdealDistributeOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes, use_gt=False)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  use_gt=True,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_, label_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        label = F.interpolate(input=label_.unsqueeze(1).type(torch.cuda.FloatTensor), size=(x.size(2), x.size(3)), mode="nearest")
+        context = self.spatial_context_head(x, x_dsn)
+        x = self.spatial_ocr_head(x, context, label)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/isanet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/isanet.py
new file mode 100644
index 0000000..6cb9f2a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/isanet.py
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class ISANet(nn.Module):
+    """
+    Interlaced Sparse Self-Attention for Semantic Segmentation
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(ISANet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        bn_type = self.configer.get('network', 'bn_type')
+        factors = self.configer.get('network', 'factors')
+        from lib.models.modules.isa_block import ISA_Module
+        self.isa_head = nn.Sequential(
+            nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1, bias=False),
+            ModuleHelper.BNReLU(512, bn_type=bn_type),
+            ISA_Module(in_channels=512, key_channels=256, value_channels=512, 
+                out_channels=512, down_factors=factors, dropout=0.05, bn_type=bn_type),
+        )
+        self.cls_head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1, bias=False),
+            ModuleHelper.BNReLU(512, bn_type=bn_type),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True),
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.isa_head(x[-1])
+        x = self.cls_head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return x_dsn, x
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ms_ocrnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ms_ocrnet.py
new file mode 100644
index 0000000..6c4e3c9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ms_ocrnet.py
@@ -0,0 +1,257 @@
+import torch
+from torch import nn
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from collections import OrderedDict
+import torch.nn.functional as F
+
+
+class OCR_block(nn.Module):
+    """
+    Some of the code in this class is borrowed from:
+    https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/HRNet-OCR
+    """
+
+    def __init__(self, configer, high_level_ch):
+        super(OCR_block, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+
+        ocr_mid_channels = 256
+        ocr_key_channels = 128
+        self.conv3x3_ocr = nn.Sequential(
+            nn.Conv2d(high_level_ch, ocr_mid_channels, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(ocr_mid_channels, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module
+        self.ocr_gather_head = SpatialGather_Module(self.num_classes)
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_Module
+        self.ocr_distri_head = SpatialOCR_Module(in_channels=ocr_mid_channels,
+                                                 key_channels=ocr_key_channels,
+                                                 out_channels=ocr_mid_channels,
+                                                 scale=1,
+                                                 dropout=0.05,
+                                                 bn_type=self.configer.get('network', 'bn_type'))
+
+        self.cls_head = nn.Conv2d(ocr_mid_channels, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.aux_head = nn.Sequential(
+            nn.Conv2d(high_level_ch, 256, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(256, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, high_level_features):
+        feats = self.conv3x3_ocr(high_level_features)
+        aux_out = self.aux_head(high_level_features)
+        context = self.ocr_gather_head(feats, aux_out)
+        ocr_feats = self.ocr_distri_head(feats, context)
+        cls_out = self.cls_head(ocr_feats)
+        return cls_out, aux_out, ocr_feats
+
+
+def make_attn_head(in_ch, out_ch, bn_type=None):
+    bot_ch = 256
+
+    od = OrderedDict([('conv0', nn.Conv2d(in_ch, bot_ch, kernel_size=3,
+                                          padding=1, bias=False)),
+                      ('bn0', ModuleHelper.BatchNorm2d(bn_type=bn_type)(bot_ch)),
+                      ('re0', nn.ReLU(inplace=True))])
+
+    if True:  # cfg.MODEL.MSCALE_INNER_3x3:
+        od['conv1'] = nn.Conv2d(bot_ch, bot_ch, kernel_size=3, padding=1,
+                                bias=False)
+        od['bn1'] = ModuleHelper.BatchNorm2d(bn_type=bn_type)(bot_ch)
+        od['re1'] = nn.ReLU(inplace=True)
+
+    if False:  # cfg.MODEL.MSCALE_DROPOUT:
+        od['drop'] = nn.Dropout(0.5)
+
+    od['conv2'] = nn.Conv2d(bot_ch, out_ch, kernel_size=1, bias=False)
+    od['sig'] = nn.Sigmoid()
+
+    attn_head = nn.Sequential(od)
+    # init_attn(attn_head)
+    return attn_head
+
+
+def Upsample(x, size):
+    """
+    Wrapper Around the Upsample Call
+    """
+    return nn.functional.interpolate(x, size=size, mode='bilinear',
+                                     align_corners=False)
+
+
+def fmt_scale(prefix, scale):
+    """
+    format scale name
+    :prefix: a string that is the beginning of the field name
+    :scale: a scale value (0.25, 0.5, 1.0, 2.0)
+    """
+
+    scale_str = str(float(scale))
+    scale_str.replace('.', '')
+    return f'{prefix}_{scale_str}x'
+
+
+class MscaleOCR(nn.Module):
+    """
+    OCR net
+    """
+
+    def __init__(self, configer, criterion=None):
+        super(MscaleOCR, self).__init__()
+        self.configer = configer
+        self.backbone = BackboneSelector(configer).get_backbone()
+        self.ocr = OCR_block(configer, 720)
+        self.scale_attn = make_attn_head(in_ch=256, out_ch=1, bn_type=self.configer.get('network', 'bn_type'))
+
+    def _fwd(self, x):
+        x_size = x.size()[2:]
+
+        x = self.backbone(x)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        feat2 = F.interpolate(x[1], size=(h, w), mode="bilinear", align_corners=True)
+        feat3 = F.interpolate(x[2], size=(h, w), mode="bilinear", align_corners=True)
+        feat4 = F.interpolate(x[3], size=(h, w), mode="bilinear", align_corners=True)
+
+        high_level_features = torch.cat([feat1, feat2, feat3, feat4], 1)
+        cls_out, aux_out, ocr_mid_feats = self.ocr(high_level_features)
+        attn = self.scale_attn(ocr_mid_feats)
+
+        aux_out = Upsample(aux_out, x_size)
+        cls_out = Upsample(cls_out, x_size)
+        attn = Upsample(attn, x_size)
+
+        return {'cls_out': cls_out,
+                'aux_out': aux_out,
+                'logit_attn': attn}
+
+    def nscale_forward(self, inputs, scales):
+        """
+        Hierarchical attention, primarily used for getting best inference
+        results.
+        We use attention at multiple scales, giving priority to the lower
+        resolutions. For example, if we have 4 scales {0.5, 1.0, 1.5, 2.0},
+        then evaluation is done as follows:
+              p_joint = attn_1.5 * p_1.5 + (1 - attn_1.5) * down(p_2.0)
+              p_joint = attn_1.0 * p_1.0 + (1 - attn_1.0) * down(p_joint)
+              p_joint = up(attn_0.5 * p_0.5) * (1 - up(attn_0.5)) * p_joint
+        The target scale is always 1.0, and 1.0 is expected to be part of the
+        list of scales. When predictions are done at greater than 1.0 scale,
+        the predictions are downsampled before combining with the next lower
+        scale.
+        Inputs:
+          scales - a list of scales to evaluate
+          inputs - dict containing 'images', the input, and 'gts', the ground
+                   truth mask
+        Output:
+          If training, return loss, else return prediction + attention
+        """
+        x_1x = inputs['images']
+
+        assert 1.0 in scales, 'expected 1.0 to be the target scale'
+        # Lower resolution provides attention for higher rez predictions,
+        # so we evaluate in order: high to low
+        scales = sorted(scales, reverse=True)
+
+        pred = None
+        aux = None
+        output_dict = {}
+
+        for s in scales:
+            x = torch.nn.functional.interpolate(x_1x, scale_factor=s, mode='bilinear', align_corners=False,
+                                                recompute_scale_factor=True)
+            outs = self._fwd(x)
+            cls_out = outs['cls_out']
+            attn_out = outs['logit_attn']
+            aux_out = outs['aux_out']
+
+            output_dict[fmt_scale('pred', s)] = cls_out
+            if s != 2.0:
+                output_dict[fmt_scale('attn', s)] = attn_out
+
+            if pred is None:
+                pred = cls_out
+                aux = aux_out
+            elif s >= 1.0:
+                # downscale previous
+                pred = torch.nn.functional.interpolate(pred, size=(cls_out.size(2), cls_out.size(3)), mode='bilinear',
+                                                       align_corners=False)
+                pred = attn_out * cls_out + (1 - attn_out) * pred
+                aux = torch.nn.functional.interpolate(aux, size=(cls_out.size(2), cls_out.size(3)), mode='bilinear',
+                                                      align_corners=False)
+                aux = attn_out * aux_out + (1 - attn_out) * aux
+            else:
+                # s < 1.0: upscale current
+                cls_out = attn_out * cls_out
+                aux_out = attn_out * aux_out
+
+                cls_out = torch.nn.functional.interpolate(cls_out, size=(pred.size(2), pred.size(3)), mode='bilinear',
+                                                          align_corners=False)
+                aux_out = torch.nn.functional.interpolate(aux_out, size=(pred.size(2), pred.size(3)), mode='bilinear',
+                                                          align_corners=False)
+                attn_out = torch.nn.functional.interpolate(attn_out, size=(pred.size(2), pred.size(3)), mode='bilinear',
+                                                           align_corners=False)
+
+                pred = cls_out + (1 - attn_out) * pred
+                aux = aux_out + (1 - attn_out) * aux
+
+        output_dict['pred'] = pred
+        return output_dict
+
+    def two_scale_forward(self, inputs):
+        """
+        Do we supervised both aux outputs, lo and high scale?
+        Should attention be used to combine the aux output?
+        Normally we only supervise the combined 1x output
+        If we use attention to combine the aux outputs, then
+        we can use normal weighting for aux vs. cls outputs
+        """
+        x_1x = inputs
+
+        x_lo = torch.nn.functional.interpolate(x_1x, scale_factor=0.5, mode='bilinear',
+                                               align_corners=False, recompute_scale_factor=True)
+        lo_outs = self._fwd(x_lo)
+        pred_05x = lo_outs['cls_out']
+        p_lo = pred_05x
+        aux_lo = lo_outs['aux_out']
+        logit_attn = lo_outs['logit_attn']
+        attn_05x = logit_attn
+
+        hi_outs = self._fwd(x_1x)
+        pred_10x = hi_outs['cls_out']
+        p_1x = pred_10x
+        aux_1x = hi_outs['aux_out']
+
+        p_lo = logit_attn * p_lo
+        aux_lo = logit_attn * aux_lo
+        p_lo = torch.nn.functional.interpolate(p_lo, size=(p_1x.size(2), p_1x.size(3)), mode='bilinear',
+                                               align_corners=False)
+        aux_lo = torch.nn.functional.interpolate(aux_lo, size=(p_1x.size(2), p_1x.size(3)), mode='bilinear',
+                                                 align_corners=False)
+
+        logit_attn = torch.nn.functional.interpolate(logit_attn, size=(p_1x.size(2), p_1x.size(3)), mode='bilinear',
+                                                     align_corners=False)
+
+        # combine lo and hi predictions with attention
+        joint_pred = p_lo + (1 - logit_attn) * p_1x
+        joint_aux = aux_lo + (1 - logit_attn) * aux_1x
+
+        output_dict = {
+            'pred': joint_pred,
+            'aux': joint_aux,
+            'pred_05x': pred_05x,
+            'pred_10x': pred_10x,
+            'attn_05x': attn_05x,
+        }
+        return output_dict
+
+    def forward(self, inputs):
+
+        # if not self.training:
+        #     return self.nscale_forward(inputs, [0.5, 1.0, 2.0])
+
+        return self.two_scale_forward(inputs)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocnet.py
new file mode 100644
index 0000000..d02205c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocnet.py
@@ -0,0 +1,100 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class BaseOCNet(nn.Module):
+    """
+    OCNet: Object Context Network for Scene Parsing
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(BaseOCNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.oc_module_pre = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        from lib.models.modules.base_oc_block import BaseOC_Module
+        self.oc_module = BaseOC_Module(in_channels=512, 
+                                       out_channels=512,
+                                       key_channels=256, 
+                                       value_channels=256,
+                                       dropout=0.05, 
+                                       sizes=([1]),
+                                       bn_type=self.configer.get('network', 'bn_type'))
+        self.cls = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn(x[-2])
+        x = self.oc_module_pre(x[-1])
+        x = self.oc_module(x)
+        x = self.cls(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return x_dsn, x
+
+
+class AspOCNet(nn.Module):
+    """
+    OCNet: Object Context Network for Scene Parsing
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(AspOCNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        from lib.models.modules.asp_oc_block import ASP_OC_Module
+        self.context = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            ASP_OC_Module(512, 256, bn_type=self.configer.get('network', 'bn_type')),
+        )
+        self.cls = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        aux_x = self.dsn(x[-2])
+        x = self.context(x[-1])
+        x = self.cls(x)
+        aux_x = F.interpolate(aux_x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return aux_x, x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet.py
new file mode 100644
index 0000000..b782dc1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet.py
@@ -0,0 +1,111 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import pdb
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class SpatialOCRNet(nn.Module):
+    """
+    Object-Contextual Representations for Semantic Segmentation,
+    Yuan, Yuhui and Chen, Xilin and Wang, Jingdong
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(SpatialOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        context = self.spatial_context_head(x, x_dsn)
+        x = self.spatial_ocr_head(x, context)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class ASPOCRNet(nn.Module):
+    """
+    Object-Contextual Representations for Semantic Segmentation,
+    Yuan, Yuhui and Chen, Xilin and Wang, Jingdong
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(ASPOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+
+        # we should increase the dilation rates as the output stride is larger
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_ASP_Module
+        self.asp_ocr_head = SpatialOCR_ASP_Module(features=2048, 
+                                                  hidden_features=256, 
+                                                  out_features=256,
+                                                  num_classes=self.num_classes,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.asp_ocr_head(x[-1], x_dsn)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet_ha.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet_ha.py
new file mode 100644
index 0000000..7b52ce6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/ocrnet_ha.py
@@ -0,0 +1,111 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import pdb
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+
+
+class SpatialOCRNet(nn.Module):
+    """
+    Object-Contextual Representations for Semantic Segmentation,
+    Yuan, Yuhui and Chen, Xilin and Wang, Jingdong
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(SpatialOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+        self.conv_3x3 = nn.Sequential(
+            nn.Conv2d(in_channels[1], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+        )
+
+        from lib.models.modules.spatial_ocr_block import SpatialGather_Module, SpatialOCR_Module
+        self.spatial_context_head = SpatialGather_Module(self.num_classes)
+        self.spatial_ocr_head = SpatialOCR_Module(in_channels=512, 
+                                                  key_channels=256, 
+                                                  out_channels=512,
+                                                  scale=1,
+                                                  dropout=0.05, 
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.05),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.conv_3x3(x[-1])
+        context = self.spatial_context_head(x, x_dsn)
+        x = self.spatial_ocr_head(x, context)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
+
+
+class ASPOCRNet(nn.Module):
+    """
+    Object-Contextual Representations for Semantic Segmentation,
+    Yuan, Yuhui and Chen, Xilin and Wang, Jingdong
+    """
+    def __init__(self, configer):
+        self.inplanes = 128
+        super(ASPOCRNet, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.backbone = BackboneSelector(configer).get_backbone()
+
+        # extra added layers
+        if "wide_resnet38" in self.configer.get('network', 'backbone'):
+            in_channels = [2048, 4096] 
+        else:
+            in_channels = [1024, 2048]
+
+        # we should increase the dilation rates as the output stride is larger
+        from lib.models.modules.spatial_ocr_block import SpatialOCR_ASP_Module
+        self.asp_ocr_head = SpatialOCR_ASP_Module(features=2048, 
+                                                  hidden_features=256, 
+                                                  out_features=256,
+                                                  num_classes=self.num_classes,
+                                                  bn_type=self.configer.get('network', 'bn_type'))
+
+        self.head = nn.Conv2d(256, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+        self.dsn_head = nn.Sequential(
+            nn.Conv2d(in_channels[0], 512, kernel_size=3, stride=1, padding=1),
+            ModuleHelper.BNReLU(512, bn_type=self.configer.get('network', 'bn_type')),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, self.num_classes, kernel_size=1, stride=1, padding=0, bias=True)
+            )
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        x_dsn = self.dsn_head(x[-2])
+        x = self.asp_ocr_head(x[-1], x_dsn)
+        x = self.head(x)
+        x_dsn = F.interpolate(x_dsn, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        x = F.interpolate(x, size=(x_.size(2), x_.size(3)), mode="bilinear", align_corners=True)
+        return  x_dsn, x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/segfix.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/segfix.py
new file mode 100644
index 0000000..3264fa5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/segfix.py
@@ -0,0 +1,89 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import pdb
+import cv2
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.utils.helpers.offset_helper import DTOffsetConfig
+from models.protoseg_core.lib.models.backbones.hrnet.hrnet_backbone import BasicBlock
+
+
+class SegFix_HRNet(nn.Module):
+    def __init__(self, configer):
+        super(SegFix_HRNet, self).__init__()
+        self.configer = configer
+        self.backbone = BackboneSelector(configer).get_backbone()
+        backbone_name = self.configer.get('network', 'backbone')
+        width = int(backbone_name[-2:])
+        if 'hrnet2x' in backbone_name:
+            in_channels = width * 31
+        else:
+            in_channels = width * 15
+
+        num_masks = 2
+        num_directions = DTOffsetConfig.num_classes
+
+        mid_channels = 256
+
+        self.dir_head = nn.Sequential(
+            nn.Conv2d(in_channels,
+                      mid_channels,
+                      kernel_size=1,
+                      stride=1,
+                      padding=0,
+                      bias=False),
+            ModuleHelper.BNReLU(mid_channels,
+                                bn_type=self.configer.get(
+                                    'network', 'bn_type')),
+            nn.Conv2d(mid_channels,
+                      num_directions,
+                      kernel_size=1,
+                      stride=1,
+                      padding=0,
+                      bias=False))
+        self.mask_head = nn.Sequential(
+            nn.Conv2d(in_channels,
+                      mid_channels,
+                      kernel_size=1,
+                      stride=1,
+                      padding=0,
+                      bias=False),
+            ModuleHelper.BNReLU(mid_channels,
+                                bn_type=self.configer.get(
+                                    'network', 'bn_type')),
+            nn.Conv2d(mid_channels,
+                      num_masks,
+                      kernel_size=1,
+                      stride=1,
+                      padding=0,
+                      bias=False))
+
+    def forward(self, x_):
+        x = self.backbone(x_)
+        _, _, h, w = x[0].size()
+
+        feat1 = x[0]
+        for i in range(1, len(x)):
+            x[i] = F.interpolate(x[i],
+                                 size=(h, w),
+                                 mode='bilinear',
+                                 align_corners=True)
+
+        feats = torch.cat(x, 1)
+        mask_map = self.mask_head(feats)
+        dir_map = self.dir_head(feats)
+        return mask_map, dir_map
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/semantic_fpn.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/semantic_fpn.py
new file mode 100644
index 0000000..9e5795e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/nets/semantic_fpn.py
@@ -0,0 +1,206 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+from models.protoseg_core.lib.models.tools.module_helper import ModuleHelper
+from models.protoseg_core.lib.models.backbones.backbone_selector import BackboneSelector
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+import numpy as np
+
+
+class FPN(nn.Module):
+    def __init__(self, in_channels,
+                 out_channels=256,
+                 num_outs=4,
+                 start_level=0,
+                 end_level=-1,
+                 add_extra_convs=False,
+                 extra_convs_on_inputs=False,
+                 relu_before_extra_convs=False,
+                 no_norm_on_lateral=False,
+                 upsample_cfg=dict(mode='nearest')):
+        super(FPN, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_ins = len(in_channels)
+        self.num_outs = num_outs
+        self.relu_before_extra_convs = relu_before_extra_convs
+        self.no_norm_on_lateral = no_norm_on_lateral
+        self.fp16_enabled = False
+        self.upsample_cfg = upsample_cfg.copy()
+
+        if end_level == -1:
+            self.backbone_end_level = self.num_ins
+            assert num_outs >= self.num_ins - start_level
+        else:
+            # if end_level < inputs, no extra level is allowed
+            self.backbone_end_level = end_level
+            assert end_level <= len(in_channels)
+            assert num_outs == end_level - start_level
+        self.start_level = start_level
+        self.end_level = end_level
+        self.add_extra_convs = add_extra_convs
+        assert isinstance(add_extra_convs, (str, bool))
+        if isinstance(add_extra_convs, str):
+            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
+            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
+        elif add_extra_convs:  # True
+            if extra_convs_on_inputs:
+                # For compatibility with previous release
+                # TODO: deprecate `extra_convs_on_inputs`
+                self.add_extra_convs = 'on_input'
+            else:
+                self.add_extra_convs = 'on_output'
+
+        self.lateral_convs = nn.ModuleList()
+        self.fpn_convs = nn.ModuleList()
+
+        for i in range(self.start_level, self.backbone_end_level):
+            l_conv = nn.Conv2d(
+                in_channels[i],
+                out_channels,
+                1)
+            fpn_conv = nn.Conv2d(
+                out_channels,
+                out_channels,
+                3,
+                padding=1)
+
+            self.lateral_convs.append(l_conv)
+            self.fpn_convs.append(fpn_conv)
+
+        # add extra conv layers (e.g., RetinaNet)
+        extra_levels = num_outs - self.backbone_end_level + self.start_level
+        if self.add_extra_convs and extra_levels >= 1:
+            for i in range(extra_levels):
+                if i == 0 and self.add_extra_convs == 'on_input':
+                    in_channels = self.in_channels[self.backbone_end_level - 1]
+                else:
+                    in_channels = out_channels
+                extra_fpn_conv = nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    3,
+                    stride=2,
+                    padding=1)
+                self.fpn_convs.append(extra_fpn_conv)
+
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_uniform(m)
+
+    def forward(self, inputs):
+        assert len(inputs) == len(self.in_channels)
+
+        # build laterals
+        laterals = [
+            lateral_conv(inputs[i + self.start_level])
+            for i, lateral_conv in enumerate(self.lateral_convs)
+        ]
+
+        # build top-down path
+        used_backbone_levels = len(laterals)
+        for i in range(used_backbone_levels - 1, 0, -1):
+            # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
+            #  it cannot co-exist with `size` in `F.interpolate`.
+            if 'scale_factor' in self.upsample_cfg:
+                laterals[i - 1] += F.interpolate(laterals[i],
+                                                 **self.upsample_cfg)
+            else:
+                prev_shape = laterals[i - 1].shape[2:]
+                laterals[i - 1] += F.interpolate(
+                    laterals[i], size=prev_shape, **self.upsample_cfg)
+
+        # build outputs
+        # part 1: from original levels
+        outs = [
+            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
+        ]
+        # part 2: add extra levels
+        if self.num_outs > len(outs):
+            # use max pool to get more levels on top of outputs
+            # (e.g., Faster R-CNN, Mask R-CNN)
+            if not self.add_extra_convs:
+                for i in range(self.num_outs - used_backbone_levels):
+                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))
+            # add conv layers on top of original feature maps (RetinaNet)
+            else:
+                if self.add_extra_convs == 'on_input':
+                    extra_source = inputs[self.backbone_end_level - 1]
+                elif self.add_extra_convs == 'on_lateral':
+                    extra_source = laterals[-1]
+                elif self.add_extra_convs == 'on_output':
+                    extra_source = outs[-1]
+                else:
+                    raise NotImplementedError
+                outs.append(self.fpn_convs[used_backbone_levels](extra_source))
+                for i in range(used_backbone_levels + 1, self.num_outs):
+                    if self.relu_before_extra_convs:
+                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))
+                    else:
+                        outs.append(self.fpn_convs[i](outs[-1]))
+        return tuple(outs)
+
+
+class SemanticFPNDecoder(nn.Module):
+    def __init__(self, feature_strides, num_classes):
+        super(SemanticFPNDecoder, self).__init__()
+        self.in_channels = [64, 128, 320, 512]
+        self.feature_strides = feature_strides
+        self.scale_heads = nn.ModuleList()
+        self.channels = 128
+        for i in range(len(feature_strides)):
+            head_length = max(
+                1,
+                int(np.log2(feature_strides[i]) - np.log2(feature_strides[0])))
+            scale_head = []
+            for k in range(head_length):
+                scale_head.append(
+                    nn.Conv2d(
+                        256 if k == 0 else self.channels,
+                        self.channels,
+                        kernel_size=3,
+                        padding=1))
+                scale_head.append(ModuleHelper.BNReLU(self.channels, bn_type='torchsyncbn'))
+                if feature_strides[i] != feature_strides[0]:
+                    scale_head.append(
+                        nn.Upsample(
+                            scale_factor=2,
+                            mode='bilinear',
+                            align_corners=False))
+            self.scale_heads.append(nn.Sequential(*scale_head))
+
+        self.cls_seg = nn.Conv2d(self.channels, num_classes, kernel_size=1)
+
+    def forward(self, x):
+        output = self.scale_heads[0](x[0])
+        for i in range(1, len(self.feature_strides)):
+            output = output + nn.functional.interpolate(
+                self.scale_heads[i](x[i]),
+                size=output.shape[2:],
+                mode='bilinear',
+                align_corners=False)
+
+        output = self.cls_seg(output)
+        return output
+
+
+class SemanticFPN(nn.Module):
+    def __init__(self, configer):
+        super(SemanticFPN, self).__init__()
+        self.configer = configer
+        self.num_classes = self.configer.get('data', 'num_classes')
+        self.arch = self.configer.get('network', 'backbone')
+        self.backbone = BackboneSelector(configer).get_backbone()
+        self.neck = FPN(in_channels=[64, 128, 320, 512], out_channels=256)
+        self.decoder = SemanticFPNDecoder(feature_strides=[4, 8, 16, 32],
+                                          num_classes=self.num_classes)
+
+    def forward(self, x):
+        x = self.backbone(x)
+        x = self.neck(x)
+        x = self.decoder(x)
+
+        return x
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/tools/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/tools/module_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/tools/module_helper.py
new file mode 100644
index 0000000..edaa6a6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/models/tools/module_helper.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import os
+import pdb
+
+import torch
+import torch.nn as nn
+
+try:
+    from urllib import urlretrieve
+except ImportError:
+    from urllib.request import urlretrieve
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from torch.nn.functional import interpolate
+
+
+class ModuleHelper(object):
+
+    @staticmethod
+    def BNReLU(num_features, bn_type=None, **kwargs):
+        if bn_type == 'torchbn':
+            return nn.Sequential(
+                nn.BatchNorm2d(num_features, **kwargs),
+                nn.ReLU()
+            )
+        elif bn_type == 'torchsyncbn':
+            return nn.Sequential(
+                nn.SyncBatchNorm(num_features, **kwargs),
+                nn.ReLU()
+            )
+        elif bn_type == 'syncbn':
+            from lib.extensions.syncbn.module import BatchNorm2d
+            return nn.Sequential(
+                BatchNorm2d(num_features, **kwargs),
+                nn.ReLU()
+            )
+        elif bn_type == 'sn':
+            from lib.extensions.switchablenorms.switchable_norm import SwitchNorm2d
+            return nn.Sequential(
+                SwitchNorm2d(num_features, **kwargs),
+                nn.ReLU()
+            )
+        elif bn_type == 'gn':
+            return nn.Sequential(
+                nn.GroupNorm(num_groups=8, num_channels=num_features, **kwargs),
+                nn.ReLU()
+            )
+        elif bn_type == 'fn':
+            Log.error('Not support Filter-Response-Normalization: {}.'.format(bn_type))
+            exit(1)
+        elif bn_type == 'inplace_abn':
+            torch_ver = torch.__version__[:3]
+            # Log.info('Pytorch Version: {}'.format(torch_ver))
+            if torch_ver == '0.4':
+                from lib.extensions.inplace_abn.bn import InPlaceABNSync
+                return InPlaceABNSync(num_features, **kwargs)
+            elif torch_ver in ('1.0', '1.1'):
+                from lib.extensions.inplace_abn_1.bn import InPlaceABNSync
+                return InPlaceABNSync(num_features, **kwargs)
+            elif torch_ver == '1.2':
+                from inplace_abn import InPlaceABNSync
+                return InPlaceABNSync(num_features, **kwargs)
+
+        else:
+            Log.error('Not support BN type: {}.'.format(bn_type))
+            exit(1)
+
+    @staticmethod
+    def BatchNorm2d(bn_type='torch', ret_cls=False):
+        if bn_type == 'torchbn':
+            return nn.BatchNorm2d
+
+        elif bn_type == 'torchsyncbn':
+            return nn.SyncBatchNorm
+
+        elif bn_type == 'syncbn':
+            from lib.extensions.syncbn.module import BatchNorm2d
+            return BatchNorm2d
+
+        elif bn_type == 'sn':
+            from lib.extensions.switchablenorms.switchable_norm import SwitchNorm2d
+            return SwitchNorm2d
+
+        elif bn_type == 'gn':
+            return functools.partial(nn.GroupNorm, num_groups=32)
+
+        elif bn_type == 'inplace_abn':
+            torch_ver = torch.__version__[:3]
+            if torch_ver == '0.4':
+                from lib.extensions.inplace_abn.bn import InPlaceABNSync
+                if ret_cls:
+                    return InPlaceABNSync
+
+                return functools.partial(InPlaceABNSync, activation='none')
+
+            elif torch_ver in ('1.0', '1.1'):
+                from lib.extensions.inplace_abn_1.bn import InPlaceABNSync
+                if ret_cls:
+                    return InPlaceABNSync
+
+                return functools.partial(InPlaceABNSync, activation='none')
+
+            elif torch_ver == '1.2':
+                from inplace_abn import InPlaceABNSync
+                if ret_cls:
+                    return InPlaceABNSync
+
+                return functools.partial(InPlaceABNSync, activation='identity')
+
+        else:
+            Log.error('Not support BN type: {}.'.format(bn_type))
+            exit(1)
+
+    @staticmethod
+    def load_model(model, pretrained=None, all_match=True, network='resnet101'):
+        if pretrained is None:
+            return model
+
+        if all_match:
+            Log.info('Loading pretrained model:{}'.format(pretrained))
+            pretrained_dict = torch.load(pretrained, map_location=lambda storage, loc: storage)
+            model_dict = model.state_dict()
+            load_dict = dict()
+            for k, v in pretrained_dict.items():
+                if 'resinit.{}'.format(k) in model_dict:
+                    load_dict['resinit.{}'.format(k)] = v
+                else:
+                    load_dict[k] = v
+            model.load_state_dict(load_dict)
+
+        else:
+            Log.info('Loading pretrained model:{}'.format(pretrained))
+            pretrained_dict = torch.load(pretrained, map_location=lambda storage, loc: storage)
+
+            # settings for "wide_resnet38"  or network == "resnet152"
+            if network == "wide_resnet":
+                pretrained_dict = pretrained_dict['state_dict']
+
+            model_dict = model.state_dict()
+
+            if network == "hrnet_plus":
+                # pretrained_dict['conv1_full_res.weight'] = pretrained_dict['conv1.weight']
+                # pretrained_dict['conv2_full_res.weight'] = pretrained_dict['conv2.weight']
+                load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
+
+            elif network == 'pvt':
+                pretrained_dict = {k: v for k, v in pretrained_dict.items() if
+                                   k in model_dict.keys()}
+                pretrained_dict['pos_embed1'] = \
+                    interpolate(pretrained_dict['pos_embed1'].unsqueeze(dim=0), size=[16384, 64])[0]
+                pretrained_dict['pos_embed2'] = \
+                    interpolate(pretrained_dict['pos_embed2'].unsqueeze(dim=0), size=[4096, 128])[0]
+                pretrained_dict['pos_embed3'] = \
+                    interpolate(pretrained_dict['pos_embed3'].unsqueeze(dim=0), size=[1024, 320])[0]
+                pretrained_dict['pos_embed4'] = \
+                    interpolate(pretrained_dict['pos_embed4'].unsqueeze(dim=0), size=[256, 512])[0]
+                pretrained_dict['pos_embed7'] = \
+                    interpolate(pretrained_dict['pos_embed1'].unsqueeze(dim=0), size=[16384, 64])[0]
+                pretrained_dict['pos_embed6'] = \
+                    interpolate(pretrained_dict['pos_embed2'].unsqueeze(dim=0), size=[4096, 128])[0]
+                pretrained_dict['pos_embed5'] = \
+                    interpolate(pretrained_dict['pos_embed3'].unsqueeze(dim=0), size=[1024, 320])[0]
+                load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
+
+            elif network == 'pcpvt' or network == 'svt':
+                load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
+                Log.info('Missing keys: {}'.format(list(set(model_dict) - set(load_dict))))
+
+            elif network == 'transunet_swin':
+                pretrained_dict = {k: v for k, v in pretrained_dict.items() if
+                                   k in model_dict.keys()}
+                for item in list(pretrained_dict.keys()):
+                    if item.startswith('layers.0') and not item.startswith('layers.0.downsample'):
+                        pretrained_dict['dec_layers.2' + item[15:]] = pretrained_dict[item]
+                    if item.startswith('layers.1') and not item.startswith('layers.1.downsample'):
+                        pretrained_dict['dec_layers.1' + item[15:]] = pretrained_dict[item]
+                    if item.startswith('layers.2') and not item.startswith('layers.2.downsample'):
+                        pretrained_dict['dec_layers.0' + item[15:]] = pretrained_dict[item]
+
+                for item in list(pretrained_dict.keys()):
+                    if 'relative_position_index' in item:
+                        pretrained_dict[item] = \
+                            interpolate(pretrained_dict[item].unsqueeze(dim=0).unsqueeze(dim=0).float(),
+                                        size=[256, 256])[0][0]
+                    if 'relative_position_bias_table' in item:
+                        pretrained_dict[item] = \
+                            interpolate(pretrained_dict[item].unsqueeze(dim=0).unsqueeze(dim=0).float(),
+                                        size=[961, pretrained_dict[item].size(1)])[0][0]
+                    if 'attn_mask' in item:
+                        pretrained_dict[item] = \
+                            interpolate(pretrained_dict[item].unsqueeze(dim=0).unsqueeze(dim=0).float(),
+                                        size=[pretrained_dict[item].size(0), 256, 256])[0][0]
+
+            elif network == "hrnet" or network == "xception" or network == 'resnest':
+                load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()}
+                Log.info('Missing keys: {}'.format(list(set(model_dict) - set(load_dict))))
+
+            elif network == "dcnet" or network == "resnext":
+                load_dict = dict()
+                for k, v in pretrained_dict.items():
+                    if 'resinit.{}'.format(k) in model_dict:
+                        load_dict['resinit.{}'.format(k)] = v
+                    else:
+                        if k in model_dict:
+                            load_dict[k] = v
+                        else:
+                            pass
+
+            elif network == "wide_resnet":
+                load_dict = {'.'.join(k.split('.')[1:]): v \
+                             for k, v in pretrained_dict.items() \
+                             if '.'.join(k.split('.')[1:]) in model_dict}
+            else:
+                load_dict = {'.'.join(k.split('.')[1:]): v \
+                             for k, v in pretrained_dict.items() \
+                             if '.'.join(k.split('.')[1:]) in model_dict}
+
+            # used to debug
+            if int(os.environ.get("debug_load_model", 0)):
+                Log.info('Matched Keys List:')
+                for key in load_dict.keys():
+                    Log.info('{}'.format(key))
+            model_dict.update(load_dict)
+            model.load_state_dict(model_dict)
+
+        return model
+
+    @staticmethod
+    def load_url(url, map_location=None):
+        model_dir = os.path.join('~', '.PyTorchCV', 'models')
+        if not os.path.exists(model_dir):
+            os.makedirs(model_dir)
+
+        filename = url.split('/')[-1]
+        cached_file = os.path.join(model_dir, filename)
+        if not os.path.exists(cached_file):
+            Log.info('Downloading: "{}" to {}\n'.format(url, cached_file))
+            urlretrieve(url, cached_file)
+
+        Log.info('Loading pretrained model:{}'.format(cached_file))
+        return torch.load(cached_file, map_location=map_location)
+
+    @staticmethod
+    def constant_init(module, val, bias=0):
+        nn.init.constant_(module.weight, val)
+        if hasattr(module, 'bias') and module.bias is not None:
+            nn.init.constant_(module.bias, bias)
+
+    @staticmethod
+    def xavier_init(module, gain=1, bias=0, distribution='normal'):
+        assert distribution in ['uniform', 'normal']
+        if distribution == 'uniform':
+            nn.init.xavier_uniform_(module.weight, gain=gain)
+        else:
+            nn.init.xavier_normal_(module.weight, gain=gain)
+        if hasattr(module, 'bias') and module.bias is not None:
+            nn.init.constant_(module.bias, bias)
+
+    @staticmethod
+    def normal_init(module, mean=0, std=1, bias=0):
+        nn.init.normal_(module.weight, mean, std)
+        if hasattr(module, 'bias') and module.bias is not None:
+            nn.init.constant_(module.bias, bias)
+
+    @staticmethod
+    def uniform_init(module, a=0, b=1, bias=0):
+        nn.init.uniform_(module.weight, a, b)
+        if hasattr(module, 'bias') and module.bias is not None:
+            nn.init.constant_(module.bias, bias)
+
+    @staticmethod
+    def kaiming_init(module,
+                     mode='fan_in',
+                     nonlinearity='leaky_relu',
+                     bias=0,
+                     distribution='normal'):
+        assert distribution in ['uniform', 'normal']
+        if distribution == 'uniform':
+            nn.init.kaiming_uniform_(
+                module.weight, mode=mode, nonlinearity=nonlinearity)
+        else:
+            nn.init.kaiming_normal_(
+                module.weight, mode=mode, nonlinearity=nonlinearity)
+        if hasattr(module, 'bias') and module.bias is not None:
+            nn.init.constant_(module.bias, bias)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/__init__.py
@@ -0,0 +1 @@
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/distributed.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/distributed.py
new file mode 100644
index 0000000..988d22f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/distributed.py
@@ -0,0 +1,79 @@
+import torch
+import torch.nn as nn
+import subprocess
+import sys
+import os
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+def is_distributed():
+    return torch.distributed.is_initialized()
+
+def get_world_size():
+    if not torch.distributed.is_initialized():
+        return 1
+    return torch.distributed.get_world_size()
+
+def get_rank():
+    if not torch.distributed.is_initialized():
+        return 0
+    return torch.distributed.get_rank()
+
+def all_reduce_numpy(array):
+    tensor = torch.from_numpy(array).cuda()
+    torch.distributed.all_reduce(tensor)
+    return tensor.cpu().numpy()
+
+def handle_distributed(args, main_file):
+    if not args.distributed:
+        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, args.gpu))        
+        return
+
+    if args.local_rank >= 0:
+        _setup_process_group(args)
+        return
+
+    current_env = os.environ.copy()
+    if current_env.get('CUDA_VISIBLE_DEVICES') is None:
+        current_env['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, args.gpu))
+        world_size = len(args.gpu)
+    else:
+        world_size = len(current_env['CUDA_VISIBLE_DEVICES'].split(','))
+
+    current_env['WORLD_SIZE'] = str(world_size)
+
+    print('World size:', world_size)
+    # Logic for spawner
+    python_exec = sys.executable
+    command_args = sys.argv
+    Log.info('{}'.format(command_args))
+    try:
+        main_index = command_args.index('main_contrastive.py')
+    except:
+        main_index = command_args.index('main.py')
+
+    command_args = command_args[main_index+1:]
+    print(command_args)
+    command_args = [
+        python_exec, '-u',
+        '-m', 'torch.distributed.launch',
+        '--nproc_per_node', str(world_size),
+        '--master_port', str(29961),
+        main_file,
+    ] + command_args
+    process = subprocess.Popen(command_args, env=current_env)
+    process.wait()
+    if process.returncode != 0:
+        raise subprocess.CalledProcessError(returncode=process.returncode,
+                                            cmd=command_args)    
+    sys.exit(process.returncode)
+
+def _setup_process_group(args):
+    local_rank = args.local_rank
+
+    torch.cuda.set_device(local_rank)
+    torch.distributed.init_process_group(
+        'nccl',
+        init_method='env://',
+        # rank=local_rank
+    )
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/dc_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/dc_helper.py
new file mode 100644
index 0000000..04fcaae
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/dc_helper.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+
+from lib.extensions.parallel.data_container import DataContainer
+
+
+class DCHelper(object):
+
+    @staticmethod
+    def tolist(dc):
+        return list(itertools.chain(*dc.data))
+
+    @staticmethod
+    def todc(data_list, gpu_list, cpu_only=False):
+        assert len(data_list) % len(gpu_list) == 0
+        samples_per_gpu = len(data_list) // len(gpu_list)
+        stacked = []
+        for i in range(0, len(data_list), samples_per_gpu):
+            stacked.append(data_list[i:i + samples_per_gpu])
+
+        return DataContainer(stacked, cpu_only=cpu_only)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/file_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/file_helper.py
new file mode 100644
index 0000000..eb7b10f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/file_helper.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Repackage some file operations.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+
+class FileHelper(object):
+
+    @staticmethod
+    def make_dirs(dir_path, is_file=False):
+        dir_path = os.path.expanduser(dir_path)
+        dir_name = FileHelper.dir_name(dir_path) if is_file else dir_path
+        if not os.path.exists(dir_name):
+            os.makedirs(dir_name)
+
+    @staticmethod
+    def dir_name(file_path):
+        return os.path.dirname(file_path)
+
+    @staticmethod
+    def abs_path(file_path):
+        return os.path.abspath(file_path)
+
+    @staticmethod
+    def shotname(file_name):
+        shotname, extension = os.path.splitext(file_name)
+        return shotname
+
+    @staticmethod
+    def scandir(dir_path, suffix=None):
+        for entry in os.scandir(dir_path):
+            if not entry.is_file():
+                continue
+            filename = entry.name
+            if suffix is None:
+                yield filename
+            elif filename.endswith(suffix):
+                yield filename
+
+    @staticmethod
+    def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
+        if not os.path.isfile(filename):
+            raise FileNotFoundError(msg_tmpl.format(filename))
+
+    @staticmethod
+    def list_dir(dir_name, prefix=''):
+        filename_list = list()
+        items = os.listdir(os.path.join(dir_name, prefix))
+        for item in items:
+            fi_d = os.path.join(dir_name, prefix, item)
+            if os.path.isdir(fi_d):
+                prefix_temp = '{}/{}'.format(prefix, item).lstrip('/')
+                filename_list += FileHelper.list_dir(dir_name, prefix_temp)
+            else:
+                filename_list.append('{}/{}'.format(prefix, item).lstrip('/'))
+
+        return filename_list
+
+
+if __name__ == "__main__":
+    print (FileHelper.list_dir('/home/donny/Projects'))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/image_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/image_helper.py
new file mode 100644
index 0000000..71be6cc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/image_helper.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Repackage some image operations.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import numpy as np
+from PIL import Image
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+PIL_INTER_DICT = {
+    'nearest': Image.NEAREST,
+    'linear': Image.BILINEAR,
+    'cubic': Image.CUBIC
+}
+
+CV2_INTER_DICT = {
+    'nearest': cv2.INTER_NEAREST,
+    'linear': cv2.INTER_LINEAR,
+    'cubic': cv2.INTER_CUBIC
+}
+
+
+class ImageHelper(object):
+
+    @staticmethod
+    def read_image(image_path, tool='pil', mode='RGB'):
+        if tool == 'pil':
+            return ImageHelper.pil_read_image(image_path, mode=mode)
+        elif tool == 'cv2':
+            return ImageHelper.cv2_read_image(image_path, mode=mode)
+        else:
+            Log.error('Not support mode {}'.format(mode))
+            exit(1)
+
+    @staticmethod
+    def cv2_read_image(image_path, mode='RGB'):
+        img_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        if mode == 'RGB':
+            return ImageHelper.bgr2rgb(img_bgr)
+
+        elif mode == 'BGR':
+            return img_bgr
+
+        elif mode == 'P':
+            return ImageHelper.img2np(Image.open(image_path).convert('P'))
+
+        else:
+            Log.error('Not support mode {}'.format(mode))
+            exit(1)
+
+    @staticmethod
+    def pil_read_image(image_path, mode='RGB'):
+        with open(image_path, 'rb') as f:
+            img = Image.open(f)
+
+            if mode == 'RGB':
+                return img.convert('RGB')
+
+            elif mode == 'BGR':
+                img = img.convert('RGB')
+                cv_img = ImageHelper.rgb2bgr(np.array(img))
+                return Image.fromarray(cv_img)
+
+            elif mode == 'P':
+                return img.convert('P')
+
+            else:
+                Log.error('Not support mode {}'.format(mode))
+                exit(1)
+
+    @staticmethod
+    def rgb2bgr(img_rgb):
+        if isinstance(img_rgb, Image.Image):
+            img_bgr = ImageHelper.rgb2bgr(ImageHelper.img2np(img_rgb))
+            return ImageHelper.np2img(img_bgr)
+
+        img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+        return img_bgr
+
+    @staticmethod
+    def bgr2rgb(img_bgr):
+        if isinstance(img_bgr, Image.Image):
+            img_rgb = ImageHelper.bgr2rgb(ImageHelper.img2np(img_bgr))
+            return ImageHelper.np2img(img_rgb)
+
+        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+        return img_rgb
+
+    @staticmethod
+    def bgr2gray(img, keepdim=False):
+        """Convert a BGR image to grayscale image.
+
+        Args:
+            img (ndarray): The input image.
+            keepdim (bool): If False (by default), then return the grayscale image
+                with 2 dims, otherwise 3 dims.
+
+        Returns:
+            ndarray: The converted grayscale image.
+        """
+        out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        if keepdim:
+            out_img = out_img[..., None]
+        return out_img
+
+    @staticmethod
+    def gray2bgr(img):
+        """Convert a grayscale image to BGR image.
+
+        Args:
+            img (ndarray or str): The input image.
+
+        Returns:
+            ndarray: The converted BGR image.
+        """
+        img = img[..., None] if img.ndim == 2 else img
+        out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        return out_img
+
+    @staticmethod
+    def get_cv2_bgr(img, mode='RGB'):
+        if isinstance(img, Image.Image):
+            img = ImageHelper.img2np(img)
+
+        if mode == 'RGB':
+            img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+            return img_bgr
+
+        return img
+
+    @staticmethod
+    def imshow(win_name, img, time=0):
+        if isinstance(img, Image.Image):
+            img = ImageHelper.rgb2bgr(ImageHelper.img2np(img))
+
+        cv2.imshow(win_name, img)
+        cv2.waitKey(time)
+
+    @staticmethod
+    def np2img(arr):
+        if len(arr.shape) == 2:
+            mode = 'P'
+        else:
+            mode = 'RGB'
+
+        return Image.fromarray(arr, mode=mode)
+
+    @staticmethod
+    def img2np(img):
+        return np.array(img)
+
+    @staticmethod
+    def tonp(img):
+        if isinstance(img, Image.Image):
+            img = ImageHelper.img2np(img)
+
+        return img.astype(np.uint8)
+
+    @staticmethod
+    def get_size(img):
+        if isinstance(img, Image.Image):
+            return img.size
+
+        elif isinstance(img, np.ndarray):
+            height, width = img.shape[:2]
+            return [width, height]
+
+        else:
+            Log.error('Image type is invalid.')
+            exit(1)
+
+    @staticmethod
+    def resize(img, target_size, interpolation=None):
+        assert isinstance(target_size, (list, tuple))
+        assert isinstance(interpolation, str)
+
+        target_size = tuple(target_size)
+        if isinstance(img, Image.Image):
+            return ImageHelper.pil_resize(img, target_size, interpolation=PIL_INTER_DICT[interpolation])
+
+        elif isinstance(img, np.ndarray):
+            return ImageHelper.cv2_resize(img, target_size, interpolation=CV2_INTER_DICT[interpolation])
+
+        else:
+            Log.error('Image type is invalid.')
+            exit(1)
+
+    @staticmethod
+    def pil_resize(img, target_size, interpolation):
+        assert isinstance(target_size, (list, tuple))
+
+        target_size = tuple(target_size)
+
+        if isinstance(img, Image.Image):
+            return img.resize(target_size, interpolation)
+
+        elif isinstance(img, np.ndarray):
+            pil_img = ImageHelper.np2img(img)
+            return ImageHelper.img2np(pil_img.resize(target_size, interpolation))
+
+        else:
+            Log.error('Image type is invalid.')
+            exit(1)
+
+    @staticmethod
+    def cv2_resize(img, target_size, interpolation):
+        assert isinstance(target_size, (list, tuple))
+
+        target_size = tuple(target_size)
+
+        if isinstance(img, Image.Image):
+            img = ImageHelper.img2np(img)
+            target_img = cv2.resize(img, target_size, interpolation=interpolation)
+            return ImageHelper.np2img(target_img)
+
+        elif isinstance(img, np.ndarray):
+            return cv2.resize(img, target_size, interpolation=interpolation)
+
+        else:
+            Log.error('Image type is invalid.')
+            exit(1)
+
+    @staticmethod
+    def save(img, save_path):
+        if isinstance(img, Image.Image):
+            img.save(save_path)
+
+        elif isinstance(img, np.ndarray):
+            cv2.imwrite(save_path, img)
+
+        else:
+            Log.error('Image type is invalid.')
+            exit(1)
+
+    @staticmethod
+    def fig2img(fig):
+        """
+        @brief Convert a Matplotlib figure to a PIL Image in RGBA format and return it
+        @param fig a matplotlib figure
+        @return a Python Imaging Library ( PIL ) image
+        """
+        # put the figure pixmap into a numpy array
+        buf = ImageHelper.fig2data(fig)
+        h, w, d = buf.shape
+        return Image.frombytes("RGBA", (w, h), buf.tostring())
+
+    @staticmethod
+    def fig2np(fig):
+        fig.canvas.draw()
+        data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
+        data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+        return data
+
+    @staticmethod
+    def fig2data(fig):
+        """
+        @brief Convert a Matplotlib figure to a 4D numpy array with RGBA channels and return it
+        @param fig a matplotlib figure
+        @return a numpy 3D array of RGBA values
+        """
+        # draw the renderer
+        fig.canvas.draw()
+
+        # Get the RGBA buffer from the figure
+        w, h = fig.canvas.get_width_height()
+        buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
+        buf.shape = (w, h, 4)
+
+        # canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have it in RGBA mode
+        buf = np.roll(buf, 3, axis=2)
+        return buf.reshape(h, w, 4)
+
+    @staticmethod
+    def imfrombytes(content, flag='color'):
+        """Read an image from bytes.
+
+        Args:
+            content (bytes): Image bytes got from files or other streams.
+            flag (str): Same as :func:`imread`.
+
+        Returns:
+            ndarray: Loaded image array.
+        """
+        imread_flags = {
+            'color': cv2.IMREAD_COLOR,
+            'grayscale': cv2.IMREAD_GRAYSCALE,
+            'unchanged': cv2.IMREAD_UNCHANGED
+        }
+        img_np = np.fromstring(content, np.uint8)
+        flag = imread_flags[flag] if isinstance(flag, str) else flag
+        img = cv2.imdecode(img_np, flag)
+        return img
+
+    @staticmethod
+    def is_img(img_name):
+        IMG_EXTENSIONS = [
+            '.jpg', '.JPG', '.jpeg', '.JPEG',
+            '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
+        ]
+        return any(img_name.endswith(extension) for extension in IMG_EXTENSIONS)
+
+
+if __name__ == "__main__":
+    target_size = (368, 368)
+    image_path = '/home/donny/Projects/PyTorchCV/val/samples/pose/coco/ski.jpg'
+    pil_img = ImageHelper.cv2_read_image(image_path)
+    pil_img = ImageHelper.np2img(pil_img)
+    cv2_img = ImageHelper.cv2_read_image(image_path)
+    ImageHelper.imshow('main', np.array(pil_img) - cv2_img)
+
+    pil_img = ImageHelper.cv2_resize(pil_img, target_size, interpolation=cv2.INTER_CUBIC)
+    cv2_img = ImageHelper.cv2_resize(cv2_img, target_size, interpolation=cv2.INTER_CUBIC)
+    # cv2_img = ImageHelper.bgr2rgb(cv2_img)
+    ImageHelper.imshow('main', np.array(pil_img) - cv2_img)
+    ImageHelper.imshow('main', pil_img)
+    ImageHelper.imshow('main', cv2_img)
+
+    # resize_pil_img.show()
+    print(np.unique(np.array(pil_img) - np.array(cv2_img)))
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/json_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/json_helper.py
new file mode 100644
index 0000000..0d1bf54
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/json_helper.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Repackage some json operations.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+
+from lib.utils.tools.logger import Logger as Log
+
+
+class JsonHelper(object):
+
+    @staticmethod
+    def load_file(json_file):
+        if not os.path.exists(json_file):
+            Log.error('Json file: {} not exists.'.format(json_file))
+            exit(1)
+
+        with open(json_file, 'r') as read_stream:
+            json_dict = json.load(read_stream)
+
+        return json_dict
+
+    @staticmethod
+    def save_file(json_dict, json_file):
+        dir_name = os.path.dirname(json_file)
+        if not os.path.exists(dir_name):
+            Log.info('Json Dir: {} not exists.'.format(dir_name))
+            os.makedirs(dir_name)
+
+        with open(json_file, 'w') as write_stream:
+            write_stream.write(json.dumps(json_dict))
+
+    @staticmethod
+    def json2xml(json_file, xml_file):
+        if not os.path.exists(json_file):
+            Log.error('Json file: {} not exists.'.format(json_file))
+            exit(1)
+
+        xml_dir_name = os.path.dirname(xml_file)
+        if not os.path.exists(xml_dir_name):
+            Log.info('Xml Dir: {} not exists.'.format(xml_dir_name))
+            os.makedirs(xml_dir_name)
+
+    @staticmethod
+    def xml2json(xml_file, json_file):
+        if not os.path.exists(xml_file):
+            Log.error('Xml file: {} not exists.'.format(xml_file))
+            exit(1)
+
+        json_dir_name = os.path.dirname(json_file)
+        if not os.path.exists(json_dir_name):
+            Log.info('Json Dir: {} not exists.'.format(json_dir_name))
+            os.makedirs(json_dir_name)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/mask_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/mask_helper.py
new file mode 100644
index 0000000..40cb08a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/mask_helper.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Repackage some mask operations.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import pycocotools.mask as mask_util
+
+
+class MaskHelper(object):
+
+    @staticmethod
+    def polys2mask(polygons, width, height):
+        rles = mask_util.frPyObjects(polygons, height, width)
+        rle = mask_util.merge(rles)
+        mask = np.array(mask_util.decode(rle), dtype=np.float32)
+        return mask
+
+    @staticmethod
+    def rle2mask(rle, width, height):
+        if 'counts' in rle and type(rle['counts']) == list:
+            rle = mask_util.frPyObjects([rle], height, width)
+
+        mask = np.array(mask_util.decode(rle), dtype=np.float32)
+        # Flatten in case polygons was a list
+        mask = np.sum(mask, axis=2)
+        mask = np.array(mask > 0, dtype=np.float32)
+        return mask
+
+    @staticmethod
+    def polys2mask_wrt_box(polygons, box, target_size):
+        """Convert from the COCO polygon segmentation format to a binary mask
+        encoded as a 2D array of data type numpy.float32. The polygon segmentation
+        is understood to be enclosed in the given box and rasterized to an M x M
+        mask. The resulting mask is therefore of shape (M, M).
+        """
+        w = box[2] - box[0]
+        h = box[3] - box[1]
+
+        w = np.maximum(w, 1)
+        h = np.maximum(h, 1)
+
+        polygons_norm = []
+        for poly in polygons:
+            p = np.array(poly, dtype=np.float32)
+            p[0::2] = (p[0::2] - box[0]) * target_size[0] / w
+            p[1::2] = (p[1::2] - box[1]) * target_size[1] / h
+            polygons_norm.append(p)
+
+        rle = mask_util.frPyObjects(polygons_norm, target_size[1], target_size[0])
+        mask = np.array(mask_util.decode(rle), dtype=np.float32)
+        # Flatten in case polygons was a list
+        mask = np.sum(mask, axis=2)
+        mask = np.array(mask > 0, dtype=np.float32)
+        return mask
+
+    @staticmethod
+    def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'):
+        """Returns new masks (in correspondence with `top_masks`) by combining
+        multiple overlapping masks coming from the pool of `all_masks`. Two methods
+        for combining masks are supported: 'AVG' uses a weighted average of
+        overlapping mask pixels; 'UNION' takes the union of all mask pixels.
+        """
+        if len(top_masks) == 0:
+            return
+
+        all_not_crowd = [False] * len(all_masks)
+        top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
+        decoded_all_masks = [
+            np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
+        ]
+        decoded_top_masks = [
+            np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
+        ]
+        all_boxes = all_dets[:, :4].astype(np.int32)
+        all_scores = all_dets[:, 4]
+
+        # Fill box support with weights
+        mask_shape = decoded_all_masks[0].shape
+        mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
+        for k in range(len(all_masks)):
+            ref_box = all_boxes[k]
+            x_0 = max(ref_box[0], 0)
+            x_1 = min(ref_box[2] + 1, mask_shape[1])
+            y_0 = max(ref_box[1], 0)
+            y_1 = min(ref_box[3] + 1, mask_shape[0])
+            mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
+        mask_weights = np.maximum(mask_weights, 1e-5)
+
+        top_segms_out = []
+        for k in range(len(top_masks)):
+            # Corner case of empty mask
+            if decoded_top_masks[k].sum() == 0:
+                top_segms_out.append(top_masks[k])
+                continue
+
+            inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
+            # Only matches itself
+            if len(inds_to_vote) == 1:
+                top_segms_out.append(top_masks[k])
+                continue
+
+            masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
+            if method == 'AVG':
+                ws = mask_weights[inds_to_vote]
+                soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
+                mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
+            elif method == 'UNION':
+                # Any pixel that's on joins the mask
+                soft_mask = np.sum(masks_to_vote, axis=0)
+                mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
+            else:
+                raise NotImplementedError('Method {} is unknown'.format(method))
+            rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
+            top_segms_out.append(rle)
+
+        return top_segms_out
+
+    @staticmethod
+    def rle_mask_nms(masks, dets, thresh, mode='IOU'):
+        """Performs greedy non-maximum suppression based on an overlap measurement
+        between masks. The type of measurement is determined by `mode` and can be
+        either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
+        mininum area).
+        """
+        if len(masks) == 0:
+            return []
+        if len(masks) == 1:
+            return [0]
+
+        if mode == 'IOU':
+            # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
+            all_not_crowds = [False] * len(masks)
+            ious = mask_util.iou(masks, masks, all_not_crowds)
+        elif mode == 'IOMA':
+            # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
+            all_crowds = [True] * len(masks)
+            # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
+            ious = mask_util.iou(masks, masks, all_crowds)
+            # ... = max(area(intersect(m1, m2)) / area(m2),
+            #           area(intersect(m2, m1)) / area(m1))
+            ious = np.maximum(ious, ious.transpose())
+        elif mode == 'CONTAINMENT':
+            # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
+            # Which measures how much m2 is contained inside m1
+            all_crowds = [True] * len(masks)
+            ious = mask_util.iou(masks, masks, all_crowds)
+        else:
+            raise NotImplementedError('Mode {} is unknown'.format(mode))
+
+        scores = dets[:, 4]
+        order = np.argsort(-scores)
+
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            ovr = ious[i, order[1:]]
+            inds_to_keep = np.where(ovr <= thresh)[0]
+            order = order[inds_to_keep + 1]
+
+        return keep
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/offset_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/offset_helper.py
new file mode 100644
index 0000000..b52ebbb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/offset_helper.py
@@ -0,0 +1,530 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import torch
+import numpy as np
+import torch.nn.functional as F
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+ori_scales = {
+    4: 1,
+    8: 1,
+    16: 2,
+    32: 4,
+}
+
+
+class DTOffsetConfig:
+    # energy configurations
+    energy_level_step = int(os.environ.get('dt_energy_level_step', 5))
+    assert energy_level_step > 0
+
+    max_distance = int(os.environ.get('dt_max_distance', 5))
+    min_distance = int(os.environ.get('dt_min_distance', 0))
+
+    num_energy_levels = max_distance // energy_level_step + 1
+
+    offset_min_level = int(os.environ.get('dt_offset_min_level', 0))
+    offset_max_level = int(os.environ.get('dt_offset_max_level', 5))
+    # assert 0 <= offset_min_level < num_energy_levels - 1
+    # assert 0 < offset_max_level <= num_energy_levels
+
+    # direction configurations
+    num_classes = int(os.environ.get('dt_num_classes', 8))
+    assert num_classes in (4, 8, 16, 32,)
+
+    # offset scale configurations
+    scale = int(os.environ.get('dt_scale', ori_scales[num_classes]))
+    assert scale % ori_scales[num_classes] == 0
+    scale //= ori_scales[num_classes]
+
+    c4_align_axis = os.environ.get('c4_align_axis') is not None
+
+    Log.info(
+        'engery/max-distance: {} engery/min-distance: {}'.format(
+            max_distance,
+            min_distance
+        )
+    )
+
+    Log.info(
+        'direction/num_classes: {} scale: {}'.format(
+            num_classes,
+            scale
+        )
+    )
+
+    Log.info(
+        'c4 align axis: {}'.format(c4_align_axis)
+    )
+
+
+label_to_vector_mapping = {
+    4: [
+        [-1, -1], [-1, 1], [1, 1], [1, -1]
+    ] if not DTOffsetConfig.c4_align_axis else [
+        [0, -1], [-1, 0], [0, 1], [1, 0]
+    ],    
+    8: [
+        [0, -1], [-1, -1], [-1, 0], [-1, 1],
+        [0, 1], [1, 1], [1, 0], [1, -1]
+    ],
+    16: [
+        [0, -2], [-1, -2], [-2, -2], [-2, -1], 
+        [-2, 0], [-2, 1], [-2, 2], [-1, 2],
+        [0, 2], [1, 2], [2, 2], [2, 1],
+        [2, 0], [2, -1], [2, -2], [1, -2]
+    ],
+    32: [
+        [0, -4], [-1, -4], [-2, -4], [-3, -4], [-4, -4], [-4, -3], [-4, -2], [-4, -1],
+        [-4, 0], [-4, 1], [-4, 2], [-4, 3], [-4, 4], [-3, 4], [-2, 4], [-1, 4],
+        [0, 4], [1, 4], [2, 4], [3, 4], [4, 4], [4, 3], [4, 2], [4, 1],
+        [4, 0], [4, -1], [4, -2], [4, -3], [4, -4], [3, -4], [2, -4], [1, -4],
+    ]
+}
+
+vector_to_label_mapping = {
+    8: list(range(8)),
+    16: list(range(16)),
+}
+
+
+class Sobel:
+
+    _caches = {}
+    ksize = 11
+
+    @staticmethod
+    def _generate_sobel_kernel(shape, axis):
+        """
+        shape must be odd: eg. (5,5)
+        axis is the direction, with 0 to positive x and 1 to positive y
+        """
+        k = np.zeros(shape, dtype=np.float32)
+        p = [
+            (j, i)
+            for j in range(shape[0])
+            for i in range(shape[1])
+            if not (i == (shape[1] - 1) / 2.0 and j == (shape[0] - 1) / 2.0)
+        ]
+
+        for j, i in p:
+            j_ = int(j - (shape[0] - 1) / 2.0)
+            i_ = int(i - (shape[1] - 1) / 2.0)
+            k[j, i] = (i_ if axis == 0 else j_) / float(i_ * i_ + j_ * j_)
+        return torch.from_numpy(k).unsqueeze(0)
+
+    @classmethod
+    def kernel(cls, ksize=None):
+        if ksize is None:
+            ksize = cls.ksize
+        if ksize in cls._caches:
+            return cls._caches[ksize]
+
+        sobel_x, sobel_y = (cls._generate_sobel_kernel((ksize, ksize), i) for i in (0, 1))
+        sobel_ker = torch.cat([sobel_y, sobel_x], dim=0).view(2, 1, ksize, ksize)
+        cls._caches[ksize] = sobel_ker
+        return sobel_ker
+
+
+class DTOffsetHelper:
+
+    @staticmethod
+    def encode_multi_labels(dir_labels):
+        """
+        Only accept ndarray of shape H x W (uint8).
+        """
+        assert isinstance(dir_labels, np.ndarray)
+
+        output = np.zeros((*dir_labels.shape, 8), dtype=np.int)
+        for i in range(8):
+            output[..., i] = (dir_labels & (1 << i) != 0).astype(np.int)
+
+        return output
+
+    @staticmethod
+    def edge_mask_to_vector(edge_mask, kernel_size=Sobel.ksize, normalized=True):
+        """
+        `edge_mask` -> 1 indicates edge.
+        """
+        edge_mask = torch.clamp(edge_mask, min=0, max=1)
+        edge_mask = 1 - edge_mask
+
+        sobel_kernel = Sobel.kernel(ksize=kernel_size).to(edge_mask.device)
+        direction = F.conv2d(
+            edge_mask,
+            sobel_kernel,
+            padding=kernel_size // 2
+        )
+
+        if normalized:
+            direction = F.normalize(direction, dim=1)
+
+        return direction
+
+    @staticmethod
+    def binary_mask_map_to_offset(bmap):
+        """
+        refer to: https://stackoverflow.com/questions/9567882/sobel-filter-kernel-of-large-size/41065243#41065243
+        apply sobel on the binary edge map to estimate the offset directions for the edge pixels.
+        """
+        from scipy.ndimage.morphology import distance_transform_edt
+
+        depths = []
+        _, h, w = bmap.size()
+        for bmap_i in (1 - bmap).cpu().numpy():
+            depth_i = distance_transform_edt(bmap_i)            
+            depths.append(torch.from_numpy(depth_i).view(1, 1, h, w))
+
+        depths = torch.cat(depths, dim=0).to(bmap.device)
+        offsets = F.conv2d(depths, Sobel.kernel().to(bmap.device), padding=Sobel.ksize // 2)
+        angles = torch.atan2(offsets[:, 0], offsets[:, 1]) / np.pi * 180
+        offset = DTOffsetHelper.angle_to_offset(angles, return_tensor=True)
+        offset[(bmap == 1).unsqueeze(-1).repeat(1, 1, 1, 2)] = 0
+        return offset
+
+    @staticmethod
+    def distance_to_energy_label(distance_map, 
+                                 seg_label_map, 
+                                 return_tensor=False):
+        if return_tensor:
+            assert isinstance(distance_map, torch.Tensor)
+            assert isinstance(seg_label_map, torch.Tensor)
+        else:
+            assert isinstance(distance_map, np.ndarray)
+            assert isinstance(seg_label_map, np.ndarray)
+
+        if return_tensor:
+            energy_label_map = torch.zeros_like(seg_label_map).long().to(distance_map.device)
+        else:
+            energy_label_map = np.zeros(seg_label_map.shape, dtype=np.int)
+
+        keep_mask = seg_label_map != -1
+        energy_level_step = DTOffsetConfig.energy_level_step
+
+        for i in range(DTOffsetConfig.num_energy_levels - 1):
+            energy_label_map[keep_mask & (
+                distance_map >= i * energy_level_step) & (distance_map < (i + 1) * energy_level_step)] = i
+        
+        energy_label_map[keep_mask & (
+            distance_map >= DTOffsetConfig.max_distance)] = DTOffsetConfig.num_energy_levels - 1
+
+        energy_label_map[~keep_mask] = -1
+
+        return energy_label_map
+
+    @staticmethod
+    def logits_to_vector(dir_map):
+        dir_map = F.softmax(dir_map, dim=1)
+
+        n, _, h, w = dir_map.shape
+        offsets = DTOffsetHelper.label_to_vector(
+            torch.arange(DTOffsetConfig.num_classes).view(DTOffsetConfig.num_classes, 1, 1).cuda()
+        ).float().unsqueeze(0)  # 1 x 8 x 2 x 1 x 1
+        offsets_h = offsets[:, :, 0].repeat(n, 1, h, w)  # n x 8 x h x w
+        offsets_w = offsets[:, :, 1].repeat(n, 1, h, w)  # n x 8 x h x w
+        offsets = torch.stack([
+            (offsets_h * dir_map).sum(dim=1),
+            (offsets_w * dir_map).sum(dim=1),
+        ], dim=1)
+        offsets = F.normalize(offsets, p=2, dim=1)
+
+        return offsets
+
+    @staticmethod
+    def get_opposite_angle(angle_map):
+        new_angle_map = angle_map + 180
+        mask = (new_angle_map >= 180) & (new_angle_map <= 360)
+        new_angle_map[mask] = new_angle_map[mask] - 360
+        return new_angle_map
+
+    @staticmethod
+    def label_to_vector(labelmap, 
+                        num_classes=DTOffsetConfig.num_classes):
+
+        assert isinstance(labelmap, torch.Tensor)
+
+        mapping = label_to_vector_mapping[num_classes]
+        offset_h = torch.zeros_like(labelmap).long()
+        offset_w = torch.zeros_like(labelmap).long()
+
+        for idx, (hdir, wdir) in enumerate(mapping):
+            mask = labelmap == idx
+            offset_h[mask] = hdir
+            offset_w[mask] = wdir
+
+        return torch.stack([offset_h, offset_w], dim=-1).permute(0, 3, 1, 2).to(labelmap.device)
+
+    @staticmethod
+    def distance_to_mask_label(distance_map, 
+                               seg_label_map, 
+                               return_tensor=False):
+
+        if return_tensor:
+            assert isinstance(distance_map, torch.Tensor)
+            assert isinstance(seg_label_map, torch.Tensor)
+        else:
+            assert isinstance(distance_map, np.ndarray)
+            assert isinstance(seg_label_map, np.ndarray)
+
+        if return_tensor:
+            mask_label_map = torch.zeros_like(seg_label_map).long().to(distance_map.device)
+        else:
+            mask_label_map = np.zeros(seg_label_map.shape, dtype=np.int)
+
+        keep_mask = (distance_map <= DTOffsetConfig.max_distance) & (distance_map >= DTOffsetConfig.min_distance)
+        mask_label_map[keep_mask] = 1
+        mask_label_map[seg_label_map == -1] = -1
+
+        return mask_label_map
+
+    @staticmethod
+    def align_angle_c4(angle_map, return_tensor=False):
+        """
+        [-180, -90) -> 0
+        [-90, 0) -> 1
+        [0, 90) -> 2
+        [90, 180) -> 3
+        """
+
+        if return_tensor:
+            assert isinstance(angle_map, torch.Tensor)
+        else:
+            angle_map = torch.from_numpy(angle_map)
+
+        angle_index_map = torch.trunc((angle_map + 180) / 90).long()
+        angle_index_map = torch.clamp(angle_index_map, min=0, max=3)
+
+        new_angle_map = (angle_index_map * 90 - 135).float()
+
+        if not return_tensor:
+            new_angle_map = new_angle_map.numpy()
+            angle_index_map = angle_index_map.numpy()
+
+        return new_angle_map, angle_index_map
+
+    @staticmethod
+    def align_angle(angle_map, 
+                    num_classes=DTOffsetConfig.num_classes, 
+                    return_tensor=False):
+
+        if num_classes == 4 and not DTOffsetConfig.c4_align_axis:
+            return DTOffsetHelper.align_angle_c4(angle_map, return_tensor=return_tensor)
+
+        if return_tensor:
+            assert isinstance(angle_map, torch.Tensor)
+        else:
+            assert isinstance(angle_map, np.ndarray)
+
+        step = 360 / num_classes
+        if return_tensor:
+            new_angle_map = torch.zeros(angle_map.shape).float().to(angle_map.device)
+            angle_index_map = torch.zeros(angle_map.shape).long().to(angle_map.device)
+        else:
+            new_angle_map = np.zeros(angle_map.shape, dtype=np.float)
+            angle_index_map = np.zeros(angle_map.shape, dtype=np.int)
+        mask = (angle_map <= (-180 + step/2)) | (angle_map > (180 - step/2))
+        new_angle_map[mask] = -180
+        angle_index_map[mask] = 0
+
+        for i in range(1, num_classes):
+            middle = -180 + step * i
+            mask = (angle_map > (middle - step / 2)) & (angle_map <= (middle + step / 2))
+            new_angle_map[mask] = middle
+            angle_index_map[mask] = i
+
+        return new_angle_map, angle_index_map
+
+
+    @staticmethod
+    def angle_to_offset(angle_map, 
+                        distance_map=None, 
+                        num_classes=DTOffsetConfig.num_classes, 
+                        return_tensor=False, 
+                        use_scale=False):
+
+        if return_tensor:
+            assert isinstance(distance_map, torch.Tensor) or distance_map is None
+            assert isinstance(angle_map, torch.Tensor)
+        else:
+            assert isinstance(distance_map, np.ndarray) or distance_map is None
+            assert isinstance(angle_map, np.ndarray)
+
+        _, angle_index_map = DTOffsetHelper.align_angle(
+            angle_map, num_classes=num_classes, return_tensor=return_tensor)
+        mapping = label_to_vector_mapping[num_classes]
+
+        if use_scale:
+            scale = DTOffsetConfig.scale
+        else:
+            scale = 1
+
+        if distance_map is not None:
+            no_offset_mask = (
+                (distance_map > DTOffsetConfig.max_distance) |
+                (distance_map < DTOffsetConfig.min_distance)
+            )
+        else:
+            no_offset_mask = torch.zeros(angle_map.shape, dtype=torch.uint8).to(angle_map.device)            
+
+        if return_tensor:
+            offset_h = torch.zeros(angle_map.shape).long().to(angle_map.device)
+            offset_w = torch.zeros(angle_map.shape).long().to(angle_map.device)
+        else:
+            offset_h = np.zeros(angle_map.shape, dtype=np.int)
+            offset_w = np.zeros(angle_map.shape, dtype=np.int)
+
+        for i in range(num_classes):
+            mask = (angle_index_map == i) & ~no_offset_mask
+            offset_h[mask] = mapping[i][0] * scale
+            offset_w[mask] = mapping[i][1] * scale
+
+        if return_tensor:
+            return torch.stack([offset_h, offset_w], dim=-1)
+        else:
+            return np.stack([offset_h, offset_w], axis=-1)
+
+
+    @staticmethod
+    def _vis_offset(_offset, 
+                    image_name=None, 
+                    image=None, 
+                    color=(0, 0, 255), 
+                    only_points=False):
+        import cv2
+        import random
+        import os.path as osp
+        if image is None:
+            color = 255
+            image = np.zeros_like(_offset[:, :, 0], dtype=np.uint8)
+
+        if only_points:
+            image[(_offset[:, :, 0] != 0) | (_offset[:, :, 1] != 0)] = 255
+        else:
+            step = 6
+            coord_map = torch.stack(torch.meshgrid([torch.arange(
+                length) for length in _offset.shape[:-1]]), dim=-1).numpy().astype(np.int)
+            offset = (_offset * 10 + coord_map).astype(np.int)
+            for i in range(step//2, offset.shape[0], step):
+                for j in range(step//2, offset.shape[1], step):
+                    if (_offset[i, j] == 0).all():
+                        continue
+                    cv2.arrowedLine(img=image, pt1=tuple(
+                        coord_map[i, j][::-1]), pt2=tuple(offset[i, j][::-1]), color=color, thickness=1)
+        if image_name is None:
+            image_name = '{}.png'.format(random.random())
+        cv2.imwrite('/msravcshare/v-jinxi/vis/{}.png'.format(image_name), image)         
+
+    @staticmethod
+    def angle_to_vector(angle_map, 
+                        num_classes=DTOffsetConfig.num_classes, 
+                        return_tensor=False):
+
+        if return_tensor:
+            assert isinstance(angle_map, torch.Tensor)
+        else:
+            assert isinstance(angle_map, np.ndarray)
+
+        if return_tensor:
+            lib = torch
+            vector_map = torch.zeros((*angle_map.shape, 2), dtype=torch.float).to(angle_map.device)
+            deg2rad = lambda x: np.pi / 180.0 * x
+        else:
+            lib = np
+            vector_map = np.zeros((*angle_map.shape, 2), dtype=np.float)
+            deg2rad = np.deg2rad
+
+        if num_classes is not None:
+            angle_map, _ = DTOffsetHelper.align_angle(angle_map, num_classes=num_classes, return_tensor=return_tensor)
+
+        angle_map = deg2rad(angle_map)
+        
+        vector_map[..., 0] = lib.sin(angle_map)
+        vector_map[..., 1] = lib.cos(angle_map)
+
+        return vector_map
+
+    @staticmethod
+    def angle_to_direction_label(angle_map, 
+                                 seg_label_map=None, 
+                                 distance_map=None, 
+                                 num_classes=DTOffsetConfig.num_classes, 
+                                 extra_ignore_mask=None, 
+                                 return_tensor=False):
+
+        if return_tensor:
+            assert isinstance(angle_map, torch.Tensor)
+            assert isinstance(seg_label_map, torch.Tensor) or seg_label_map is None
+        else:
+            assert isinstance(angle_map, np.ndarray)
+            assert isinstance(seg_label_map, np.ndarray) or seg_label_map is None
+
+        _, label_map = DTOffsetHelper.align_angle(angle_map, 
+                                                  num_classes=num_classes, 
+                                                  return_tensor=return_tensor)
+        if distance_map is not None:
+            label_map[distance_map > DTOffsetConfig.max_distance] = num_classes
+        if seg_label_map is None:
+            if return_tensor:
+                ignore_mask = torch.zeros(angle_map.shape, dtype=torch.uint8).to(angle_map.device)
+            else:
+                ignore_mask = np.zeros(angle_map.shape, dtype=np.bool)
+        else:
+            ignore_mask = seg_label_map == -1
+            
+        if extra_ignore_mask is not None:
+            ignore_mask = ignore_mask | extra_ignore_mask
+        label_map[ignore_mask] = -1
+
+        return label_map
+
+    @staticmethod
+    def vector_to_label(vector_map, 
+                        num_classes=DTOffsetConfig.num_classes, 
+                        return_tensor=False):
+
+        if return_tensor:
+            assert isinstance(vector_map, torch.Tensor)
+        else:
+            assert isinstance(vector_map, np.ndarray)
+
+        if return_tensor:
+            rad2deg = lambda x: x * 180. / np.pi
+        else:
+            rad2deg = np.rad2deg
+
+        angle_map = np.arctan2(vector_map[..., 0], vector_map[..., 1])
+        angle_map = rad2deg(angle_map)
+
+        return DTOffsetHelper.angle_to_direction_label(angle_map, 
+                                                       return_tensor=return_tensor, 
+                                                       num_classes=num_classes)
+
+if __name__ == '__main__':
+    angle = torch.tensor([[0., 45., 90., 180., -180.]])
+    print(DTOffsetHelper.align_angle(angle, num_classes=4, return_tensor=True))
+    raise RuntimeError
+    distance_map = torch.tensor([[1., 2., 3., 255., 4.]])
+    seg_map = torch.tensor([[-1, 0, 0, 0, 0]])
+    print(angle)
+    print(DTOffsetHelper.angle_to_direction_label(angle, return_tensor=True, distance_map=distance_map, seg_label_map=seg_map))
+    print(DTOffsetHelper.angle_to_offset(angle, return_tensor=True, distance_map=distance_map))
+    print(DTOffsetHelper.distance_to_mask_label(distance_map, seg_map, return_tensor=True))
+    vector = DTOffsetHelper.angle_to_vector(angle, return_tensor=True)
+    print(vector)
+    print(DTOffsetHelper.vector_to_label(vector, return_tensor=True))
+    angle = np.array([0., 45., 90., 180., -180.])
+    distance_map = np.array([1., 2., 3., 255., 4.])
+    print(angle)
+    print(DTOffsetHelper.angle_to_direction_label(angle, return_tensor=False, distance_map=distance_map))    
+    vector = (DTOffsetHelper.angle_to_vector(angle, return_tensor=False))
+    print(vector)
+    print(DTOffsetHelper.vector_to_label(vector, return_tensor=False))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/video_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/video_helper.py
new file mode 100644
index 0000000..2b6ea10
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/helpers/video_helper.py
@@ -0,0 +1,471 @@
+import os
+import os.path as osp
+import subprocess
+import tempfile
+from collections import OrderedDict
+
+import cv2
+from cv2 import (CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT, CAP_PROP_FPS,
+                 CAP_PROP_FRAME_COUNT, CAP_PROP_FOURCC,
+                 CAP_PROP_POS_FRAMES)
+from utils.helpers.file_helper import FileHelper
+
+from lib.utils.tools.progressbar import track_progress
+
+
+class Cache(object):
+
+    def __init__(self, capacity):
+        self._cache = OrderedDict()
+        self._capacity = int(capacity)
+        if capacity <= 0:
+            raise ValueError('capacity must be a positive integer')
+
+    @property
+    def capacity(self):
+        return self._capacity
+
+    @property
+    def size(self):
+        return len(self._cache)
+
+    def put(self, key, val):
+        if key in self._cache:
+            return
+        if len(self._cache) >= self.capacity:
+            self._cache.popitem(last=False)
+        self._cache[key] = val
+
+    def get(self, key, default=None):
+        val = self._cache[key] if key in self._cache else default
+        return val
+
+
+class VideoReader(object):
+    """Video class with similar usage to a list object.
+
+    This video warpper class provides convenient apis to access frames.
+    There exists an issue of OpenCV's VideoCapture class that jumping to a
+    certain frame may be inaccurate. It is fixed in this class by checking
+    the position after jumping each time.
+    Cache is used when decoding videos. So if the same frame is visited for
+    the second time, there is no need to decode again if it is stored in the
+    cache.
+
+    :Example:
+
+    >>> v = VideoReader('sample.mp4')
+    >>> len(v)  # get the total frame number with `len()`
+    120
+    >>> for img in v:  # v is iterable
+    >>>     cv2.imshow(img)
+    >>> v[5]  # get the 6th frame
+    """
+
+    def __init__(self, filename, cache_capacity=10):
+        FileHelper.check_file_exist(filename, 'Video file not found: ' + filename)
+        self._vcap = cv2.VideoCapture(filename)
+        assert cache_capacity > 0
+        self._cache = Cache(cache_capacity)
+        self._position = 0
+        # get basic info
+        self._width = int(self._vcap.get(CAP_PROP_FRAME_WIDTH))
+        self._height = int(self._vcap.get(CAP_PROP_FRAME_HEIGHT))
+        self._fps = int(round(self._vcap.get(CAP_PROP_FPS)))
+        self._frame_cnt = int(self._vcap.get(CAP_PROP_FRAME_COUNT))
+        self._fourcc = self._vcap.get(CAP_PROP_FOURCC)
+
+    @property
+    def vcap(self):
+        """:obj:`cv2.VideoCapture`: The raw VideoCapture object."""
+        return self._vcap
+
+    @property
+    def opened(self):
+        """bool: Indicate whether the video is opened."""
+        return self._vcap.isOpened()
+
+    @property
+    def width(self):
+        """int: Width of video frames."""
+        return self._width
+
+    @property
+    def height(self):
+        """int: Height of video frames."""
+        return self._height
+
+    @property
+    def resolution(self):
+        """tuple: Video resolution (width, height)."""
+        return (self._width, self._height)
+
+    @property
+    def fps(self):
+        """int: FPS of the video."""
+        return self._fps
+
+    @property
+    def frame_cnt(self):
+        """int: Total frames of the video."""
+        return self._frame_cnt
+
+    @property
+    def fourcc(self):
+        """str: "Four character code" of the video."""
+        return self._fourcc
+
+    @property
+    def position(self):
+        """int: Current cursor position, indicating frame decoded."""
+        return self._position
+
+    def _get_real_position(self):
+        return int(round(self._vcap.get(CAP_PROP_POS_FRAMES)))
+
+    def _set_real_position(self, frame_id):
+        self._vcap.set(CAP_PROP_POS_FRAMES, frame_id)
+        pos = self._get_real_position()
+        for _ in range(frame_id - pos):
+            self._vcap.read()
+        self._position = frame_id
+
+    def read(self):
+        """Read the next frame.
+
+        If the next frame have been decoded before and in the cache, then
+        return it directly, otherwise decode, cache and return it.
+
+        Returns:
+            ndarray or None: Return the frame if successful, otherwise None.
+        """
+        # pos = self._position
+        if self._cache:
+            img = self._cache.get(self._position)
+            if img is not None:
+                ret = True
+            else:
+                if self._position != self._get_real_position():
+                    self._set_real_position(self._position)
+                ret, img = self._vcap.read()
+                if ret:
+                    self._cache.put(self._position, img)
+        else:
+            ret, img = self._vcap.read()
+        if ret:
+            self._position += 1
+        return img
+
+    def get_frame(self, frame_id):
+        """Get frame by index.
+
+        Args:
+            frame_id (int): Index of the expected frame, 0-based.
+
+        Returns:
+            ndarray or None: Return the frame if successful, otherwise None.
+        """
+        if frame_id < 0 or frame_id >= self._frame_cnt:
+            raise IndexError(
+                '"frame_id" must be between 0 and {}'.format(self._frame_cnt -
+                                                             1))
+        if frame_id == self._position:
+            return self.read()
+        if self._cache:
+            img = self._cache.get(frame_id)
+            if img is not None:
+                self._position = frame_id + 1
+                return img
+        self._set_real_position(frame_id)
+        ret, img = self._vcap.read()
+        if ret:
+            if self._cache:
+                self._cache.put(self._position, img)
+            self._position += 1
+        return img
+
+    def current_frame(self):
+        """Get the current frame (frame that is just visited).
+
+        Returns:
+            ndarray or None: If the video is fresh, return None, otherwise
+                return the frame.
+        """
+        if self._position == 0:
+            return None
+        return self._cache.get(self._position - 1)
+
+    def cvt2frames(self,
+                   frame_dir,
+                   file_start=0,
+                   filename_tmpl='{:06d}.jpg',
+                   start=0,
+                   max_num=0,
+                   show_progress=True):
+        """Convert a video to frame images
+
+        Args:
+            frame_dir (str): Output directory to store all the frame images.
+            file_start (int): Filenames will start from the specified number.
+            filename_tmpl (str): Filename template with the index as the
+                placeholder.
+            start (int): The starting frame index.
+            max_num (int): Maximum number of frames to be written.
+            show_progress (bool): Whether to show a progress bar.
+        """
+        FileHelper.make_dirs(frame_dir)
+        if max_num == 0:
+            task_num = self.frame_cnt - start
+        else:
+            task_num = min(self.frame_cnt - start, max_num)
+        if task_num <= 0:
+            raise ValueError('start must be less than total frame number')
+        if start > 0:
+            self._set_real_position(start)
+
+        def write_frame(file_idx):
+            img = self.read()
+            filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
+            cv2.imwrite(filename, img)
+
+        if show_progress:
+            track_progress(write_frame, range(file_start, file_start + task_num))
+
+        else:
+            for i in range(task_num):
+                img = self.read()
+                if img is None:
+                    break
+                filename = osp.join(frame_dir,
+                                    filename_tmpl.format(i + file_start))
+                cv2.imwrite(filename, img)
+
+    def __len__(self):
+        return self.frame_cnt
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return [
+                self.get_frame(i)
+                for i in range(*index.indices(self.frame_cnt))
+            ]
+        # support negative indexing
+        if index < 0:
+            index += self.frame_cnt
+            if index < 0:
+                raise IndexError('index out of range')
+        return self.get_frame(index)
+
+    def __iter__(self):
+        self._set_real_position(0)
+        return self
+
+    def __next__(self):
+        img = self.read()
+        if img is not None:
+            return img
+        else:
+            raise StopIteration
+
+    next = __next__
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._vcap.release()
+
+
+
+
+class VideoHelper(object):
+
+    @staticmethod
+    def convert_video(in_file, out_file, print_cmd=False, pre_options='', **kwargs):
+        """Convert a video with ffmpeg.
+
+        This provides a general api to ffmpeg, the executed command is::
+
+            `ffmpeg -y <pre_options> -i <in_file> <options> <out_file>`
+
+        Options(kwargs) are mapped to ffmpeg commands with the following rules:
+
+        - key=val: "-key val"
+        - key=True: "-key"
+        - key=False: ""
+
+        Args:
+            in_file (str): Input video filename.
+            out_file (str): Output video filename.
+            pre_options (str): Options appears before "-i <in_file>".
+            print_cmd (bool): Whether to print the final ffmpeg command.
+        """
+        options = []
+        for k, v in kwargs.items():
+            if isinstance(v, bool):
+                if v:
+                    options.append('-{}'.format(k))
+            elif k == 'log_level':
+                assert v in [
+                    'quiet', 'panic', 'fatal', 'error', 'warning', 'info',
+                    'verbose', 'debug', 'trace'
+                ]
+                options.append('-loglevel {}'.format(v))
+            else:
+                options.append('-{} {}'.format(k, v))
+        cmd = 'ffmpeg -y {} -i {} {} {}'.format(pre_options, in_file,
+                                                ' '.join(options), out_file)
+        if print_cmd:
+            print(cmd)
+        subprocess.call(cmd, shell=True)
+
+    @staticmethod
+    def resize_video(in_file,
+                     out_file,
+                     size=None,
+                     ratio=None,
+                     keep_ar=False,
+                     log_level='info',
+                     print_cmd=False,
+                     **kwargs):
+        """Resize a video.
+
+        Args:
+            in_file (str): Input video filename.
+            out_file (str): Output video filename.
+            size (tuple): Expected size (w, h), eg, (320, 240) or (320, -1).
+            ratio (tuple or float): Expected resize ratio, (2, 0.5) means
+                (w*2, h*0.5).
+            keep_ar (bool): Whether to keep original aspect ratio.
+            log_level (str): Logging level of ffmpeg.
+            print_cmd (bool): Whether to print the final ffmpeg command.
+        """
+        if size is None and ratio is None:
+            raise ValueError('expected size or ratio must be specified')
+        elif size is not None and ratio is not None:
+            raise ValueError('size and ratio cannot be specified at the same time')
+        options = {'log_level': log_level}
+        if size:
+            if not keep_ar:
+                options['vf'] = 'scale={}:{}'.format(size[0], size[1])
+            else:
+                options['vf'] = ('scale=w={}:h={}:force_original_aspect_ratio'
+                                 '=decrease'.format(size[0], size[1]))
+        else:
+            if not isinstance(ratio, tuple):
+                ratio = (ratio, ratio)
+            options['vf'] = 'scale="trunc(iw*{}):trunc(ih*{})"'.format(ratio[0], ratio[1])
+
+        VideoHelper.convert_video(in_file, out_file, print_cmd, **options)
+
+    @staticmethod
+    def cut_video(in_file,
+                  out_file,
+                  start=None,
+                  end=None,
+                  vcodec=None,
+                  acodec=None,
+                  log_level='info',
+                  print_cmd=False,
+                  **kwargs):
+        """Cut a clip from a video.
+
+        Args:
+            in_file (str): Input video filename.
+            out_file (str): Output video filename.
+            start (None or float): Start time (in seconds).
+            end (None or float): End time (in seconds).
+            vcodec (None or str): Output video codec, None for unchanged.
+            acodec (None or str): Output audio codec, None for unchanged.
+            log_level (str): Logging level of ffmpeg.
+            print_cmd (bool): Whether to print the final ffmpeg command.
+        """
+        options = {'log_level': log_level}
+        if vcodec is None:
+            options['vcodec'] = 'copy'
+        if acodec is None:
+            options['acodec'] = 'copy'
+        if start:
+            options['ss'] = start
+        else:
+            start = 0
+        if end:
+            options['t'] = end - start
+
+        VideoHelper.convert_video(in_file, out_file, print_cmd, **options)
+
+    @staticmethod
+    def concat_video(video_list,
+                     out_file,
+                     vcodec=None,
+                     acodec=None,
+                     log_level='info',
+                     print_cmd=False,
+                     **kwargs):
+        """Concatenate multiple videos into a single one.
+
+        Args:
+            video_list (list): A list of video filenames
+            out_file (str): Output video filename
+            vcodec (None or str): Output video codec, None for unchanged
+            acodec (None or str): Output audio codec, None for unchanged
+            log_level (str): Logging level of ffmpeg.
+            print_cmd (bool): Whether to print the final ffmpeg command.
+        """
+        _, tmp_filename = tempfile.mkstemp(suffix='.txt', text=True)
+        with open(tmp_filename, 'w') as f:
+            for filename in video_list:
+                f.write('file {}\n'.format(osp.abspath(filename)))
+        options = {'log_level': log_level}
+        if vcodec is None:
+            options['vcodec'] = 'copy'
+        if acodec is None:
+            options['acodec'] = 'copy'
+
+        VideoHelper.convert_video(tmp_filename, out_file, print_cmd, pre_options='-f concat -safe 0', **options)
+        os.remove(tmp_filename)
+
+    @staticmethod
+    def frames2video(frame_dir,
+                     video_file,
+                     fps=30,
+                     fourcc='XVID',
+                     filename_tmpl='{:06d}.jpg',
+                     start=0,
+                     end=0,
+                     show_progress=True):
+        """Read the frame images from a directory and join them as a video
+
+        Args:
+            frame_dir (str): The directory containing video frames.
+            video_file (str): Output filename.
+            fps (int): FPS of the output video.
+            fourcc (str): Fourcc of the output video, this should be compatible
+                with the output file type.
+            filename_tmpl (str): Filename template with the index as the variable.
+            start (int): Starting frame index.
+            end (int): Ending frame index.
+            show_progress (bool): Whether to show a progress bar.
+        """
+
+        first_file = osp.join(frame_dir, filename_tmpl.format(start))
+        img = cv2.imread(first_file)
+        height, width = img.shape[:2]
+        resolution = (width, height)
+        vwriter = cv2.VideoWriter(video_file, cv2.VideoWriter_fourcc(*fourcc), fps,
+                                  resolution)
+
+        def write_frame(file_idx):
+            filename = osp.join(frame_dir, filename_tmpl.format(file_idx))
+            img = cv2.imread(filename)
+            vwriter.write(img)
+
+        if show_progress:
+            track_progress(write_frame, range(start, end))
+        else:
+            for i in range(start, end):
+                filename = osp.join(frame_dir, filename_tmpl.format(i))
+                img = cv2.imread(filename)
+                vwriter.write(img)
+        vwriter.release()
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/average_meter.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/average_meter.py
new file mode 100644
index 0000000..c3a6e3c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/average_meter.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Utils to store the average and current value.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class AverageMeter(object):
+    """ Computes ans stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0.
+        self.avg = 0.
+        self.sum = 0.
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/configer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/configer.py
new file mode 100644
index 0000000..282bfaa
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/configer.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Configer class for all hyper parameters.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import sys
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from ast import literal_eval
+
+import torch.backends.cudnn as cudnn
+
+def str2bool(v):
+    """ Usage:
+    parser.add_argument('--pretrained', type=str2bool, nargs='?', const=True,
+                        dest='pretrained', help='Whether to use pretrained models.')
+    """
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Unsupported value encountered.')
+
+
+parser = argparse.ArgumentParser()
+
+# ProtoSeg params
+parser.add_argument('--configs', default="models/protoseg_core/configs/cityscapes/H_48_D_4_proto.json", type=str,dest='configs', help='The file of the hyper parameters.')
+parser.add_argument('--phase', default='test', type=str,dest='phase', help='The phase of module.')
+parser.add_argument('--gpu', default=[0], nargs='+', type=int,dest='gpu', help='The gpu list used.')
+
+# ***********  Params for data.  **********
+parser.add_argument('--data_dir', default='./models/protoseg_core/Cityscapes', type=str, nargs='+',dest='data:data_dir', help='The Directory of the data.')
+parser.add_argument('--include_val', type=str2bool, nargs='?', default=False,dest='data:include_val', help='Include validation set for training.')
+# include-coarse is only provided for Cityscapes.
+parser.add_argument('--include_coarse', type=str2bool, nargs='?', default=False,dest='data:include_coarse', help='Include coarse-labeled set for training.')
+parser.add_argument('--only_coarse', type=str2bool, nargs='?', default=False,dest='data:only_coarse', help='Only include coarse-labeled set for training.')
+parser.add_argument('--only_mapillary', type=str2bool, nargs='?', default=False,dest='data:only_mapillary', help='Only include mapillary set for training.')
+parser.add_argument('--only_small', type=str2bool, nargs='?', default=False,dest='data:only_small', help='Only include small val set for testing.')
+# include-atr is used to choose ATR as extra training set for LIP dataset.
+parser.add_argument('--include_atr', type=str2bool, nargs='?', default=False,dest='data:include_atr', help='Include atr set for LIP training.')
+parser.add_argument('--include_cihp', type=str2bool, nargs='?', default=False,dest='data:include_cihp', help='Include cihp set for LIP training.')
+parser.add_argument('--drop_last', type=str2bool, nargs='?', default='y',dest='data:drop_last', help='Fix bug for syncbn.')
+parser.add_argument('--train_batch_size', default=None, type=int,dest='train:batch_size', help='The batch size of training.')
+parser.add_argument('--val_batch_size', default=None, type=int,dest='val:batch_size', help='The batch size of validation.')
+
+# ***********  Params for checkpoint.  **********
+parser.add_argument('--checkpoints_root', default=None, type=str,dest='checkpoints:checkpoints_root', help='The root dir of model save path.')
+parser.add_argument('--checkpoints_name', default='hrnet_w48_proto_lr1x_1', type=str,dest='checkpoints:checkpoints_name', help='The name of checkpoint model.')
+parser.add_argument('--save_iters', default=None, type=int,dest='checkpoints:save_iters', help='The saving iters of checkpoint model.')
+parser.add_argument('--save_epoch', default=None, type=int,dest='checkpoints:save_epoch', help='The saving epoch of checkpoint model.')
+
+# ***********  Params for model.  **********
+parser.add_argument('--model_name', default="hrnet_w48_proto", type=str,dest='network:model_name', help='The name of model.')
+parser.add_argument('--backbone', default="hrnet48", type=str,dest='network:backbone', help='The base network of model.')
+parser.add_argument('--bn_type', default=None, type=str,dest='network:bn_type', help='The BN type of the network.')
+parser.add_argument('--multi_grid', default=None, nargs='+', type=int,dest='network:multi_grid', help='The multi_grid for resnet backbone.')
+parser.add_argument('--pretrained', type=str, default=None,dest='network:pretrained', help='The path to pretrained model.')
+parser.add_argument('--resume', default="./models/protoseg_core/checkpoints/hrnet_w48_proto_lr1x_hrnet_proto_80k_latest.pth", type=str,dest='network:resume', help='The path of checkpoints.')
+parser.add_argument('--resume_strict', type=str2bool, nargs='?', default=True,dest='network:resume_strict', help='Fully match keys or not.')
+parser.add_argument('--resume_continue', type=str2bool, nargs='?', default=False,dest='network:resume_continue', help='Whether to continue training.')
+parser.add_argument('--resume_eval_train', type=str2bool, nargs='?', default=True,dest='network:resume_train', help='Whether to validate the training set  during resume.')
+parser.add_argument('--resume_eval_val', type=str2bool, nargs='?', default=True,dest='network:resume_val', help='Whether to validate the val set during resume.')
+parser.add_argument('--gathered', type=str2bool, nargs='?', default=True,dest='network:gathered', help='Whether to gather the output of model.')
+parser.add_argument('--loss_balance', type=str2bool, nargs='?', default=False,dest='network:loss_balance', help='Whether to balance GPU usage.')
+
+# ***********  Params for solver.  **********
+parser.add_argument('--optim_method', default=None, type=str,dest='optim:optim_method', help='The optim method that used.')
+parser.add_argument('--group_method', default=None, type=str,dest='optim:group_method', help='The group method that used.')
+parser.add_argument('--base_lr', default=None, type=float,dest='lr:base_lr', help='The learning rate.')
+parser.add_argument('--nbb_mult', default=1.0, type=float,dest='lr:nbb_mult', help='The not backbone mult ratio of learning rate.')
+parser.add_argument('--lr_policy', default=None, type=str,dest='lr:lr_policy', help='The policy of lr during training.')
+parser.add_argument('--loss_type', default="pixel_prototype_ce_loss", type=str,dest='loss:loss_type', help='The loss type of the network.')
+parser.add_argument('--is_warm', type=str2bool, nargs='?', default=False,dest='lr:is_warm', help='Whether to warm training.')
+
+# ***********  Params for display.  **********
+parser.add_argument('--max_epoch', default=None, type=int,dest='solver:max_epoch', help='The max epoch of training.')
+parser.add_argument('--max_iters', default=None, type=int,dest='solver:max_iters', help='The max iters of training.')
+parser.add_argument('--display_iter', default=None, type=int,dest='solver:display_iter', help='The display iteration of train logs.')
+parser.add_argument('--test_interval', default=None, type=int,dest='solver:test_interval', help='The test interval of validation.')
+
+# ***********  Params for logging.  **********
+parser.add_argument('--logfile_level', default=None, type=str,dest='logging:logfile_level', help='To set the log level to files.')
+parser.add_argument('--stdout_level', default=None, type=str,dest='logging:stdout_level', help='To set the level to print to screen.')
+parser.add_argument('--log_file', default=None, type=str,dest='logging:log_file', help='The path of log files.')
+parser.add_argument('--rewrite', type=str2bool, nargs='?', default=True,dest='logging:rewrite', help='Whether to rewrite files.')
+parser.add_argument('--log_to_file', type=str2bool, nargs='?', default=True,dest='logging:log_to_file', help='Whether to write logging into files.')
+
+# ***********  Params for test or submission.  **********
+parser.add_argument('--test_img', default=None, type=str,dest='test:test_img', help='The test path of image.')
+parser.add_argument('--test_dir', default='./Cityscapes', type=str,dest='test:test_dir', help='The test directory of images.')
+parser.add_argument('--out_dir', default='./result/hrnet_w48_proto_lr1x_1_val_ms', type=str,dest='test:out_dir', help='The test out directory of images.')
+parser.add_argument('--save_prob', type=str2bool, nargs='?', default=False, dest='test:save_prob', help='Save the logits map during testing.')
+
+# ***********  Params for env.  **********
+parser.add_argument('--seed', default=304, type=int, help='manual seed')
+parser.add_argument('--cudnn', type=str2bool, nargs='?', default=True, help='Use CUDNN.')
+
+# ***********  Params for distributed training.  **********
+parser.add_argument('--local_rank', type=int, default=-1, dest='local_rank', help='local rank of current process')
+parser.add_argument('--distributed', action='store_true', dest='distributed', help='Use multi-processing training.')
+parser.add_argument('--use_ground_truth', action='store_true', dest='use_ground_truth', help='Use ground truth for training.')
+
+parser.add_argument('REMAIN', nargs='*')
+
+args_parser = parser.parse_args()
+
+from models.protoseg_core.lib.utils.distributed import handle_distributed
+handle_distributed(args_parser, os.path.expanduser(os.path.abspath(__file__)))
+
+cudnn.enabled = True
+cudnn.benchmark = args_parser.cudnn
+
+# configer = Configer(args_parser=args_parser)
+
+class Configer(object):
+
+    def __init__(self, args_parser=args_parser, configs=None, config_dict=None):
+        if config_dict is not None:
+            self.params_root = config_dict
+
+        elif configs is not None:
+            if not os.path.exists(configs):
+                Log.error('Json Path:{} not exists!'.format(configs))
+                exit(0)
+
+            json_stream = open(configs, 'r')
+            self.params_root = json.load(json_stream)
+            json_stream.close()
+
+        elif args_parser is not None:
+            self.args_dict = args_parser.__dict__
+            self.params_root = None
+
+            if not os.path.exists(args_parser.configs):
+                print('Json Path:{} not exists!'.format(args_parser.configs))
+                exit(1)
+
+            json_stream = open(args_parser.configs, 'r')
+            self.params_root = json.load(json_stream)
+            json_stream.close()
+
+            for key, value in self.args_dict.items():
+                if not self.exists(*key.split(':')):
+                    self.add(key.split(':'), value)
+                elif value is not None:
+                    self.update(key.split(':'), value)
+
+            self._handle_remaining_args(args_parser.REMAIN)
+
+        self.conditions = _ConditionHelper(self)
+
+
+    def _handle_remaining_args(self, remain):
+
+        def _parse_value(x: str):
+            """
+            We first try to parse `x` as python literal object.
+            If failed, we regard x as string.
+            """
+            try:
+                return literal_eval(x)
+            except ValueError:
+                return x
+
+        def _set_value(key, value):
+            """
+            We directly operate on `params_root`.
+            """
+            remained_parts = key.split('.')
+            consumed_parts = []
+
+            parent_dict = self.params_root
+            while len(remained_parts) > 1:
+                cur_key = remained_parts.pop(0)
+                consumed_parts.append(cur_key)
+
+                if cur_key not in parent_dict:
+                    parent_dict[cur_key] = dict()
+                    Log.info('{} not exists, set as `dict()`.'.format('.'.join(consumed_parts)))
+                elif not isinstance(parent_dict[cur_key], dict):
+                    Log.error(
+                        'Cannot set {child_name} on {root_name}, as {root_name} is `{root_type}`.'.format(
+                            root_name='.'.join(consumed_parts),
+                            child_name='.'.join(remained_parts),
+                            root_type=type(parent_dict[cur_key])
+                        )
+                    )
+                    sys.exit(1)
+                
+                parent_dict = parent_dict[cur_key]
+
+            cur_key = remained_parts.pop(0)
+            consumed_parts.append(cur_key)
+
+            if cur_key.endswith('+'):
+                cur_key = cur_key[:-1]
+                target = parent_dict.get(cur_key)
+
+                if not isinstance(target, list):
+                    Log.error(
+                        'Cannot append to {key}, as its type is {target_type}.'
+                        .format(
+                            key=key[:-1],
+                            target_type=type(target)
+                        )
+                    )
+                    sys.exit(1)
+
+                target.append(value)
+                Log.info(
+                    'Append {value} to {key}. Current: {target}.'
+                    .format(
+                        key=key[:-1],
+                        value=value,
+                        target=target,
+                    )
+                )
+                return
+
+            existing_value = parent_dict.get(cur_key)
+            if existing_value is not None:
+                Log.warn(
+                    'Override {key} using {value}. Previous value: {old_value}.'
+                    .format(
+                        key=key,
+                        value=value,
+                        old_value=existing_value
+                    )
+                )
+            else:
+                Log.info(
+                    'Set {key} as {value}.'.format(key=key, value=value)
+                )
+            parent_dict[cur_key] = value
+
+        assert len(remain) % 2 == 0, remain
+        args = {}
+        for i in range(len(remain) // 2):
+            key, value = remain[2 * i: 2 * i + 2]
+            _set_value(key, _parse_value(value))
+
+    def clone(self):
+        from copy import deepcopy
+        return Configer(config_dict=deepcopy(self.params_root))
+
+    def _get_caller(self):
+        filename = os.path.basename(sys._getframe().f_back.f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_back.f_lineno
+        prefix = '{}, {}'.format(filename, lineno)
+        return prefix
+
+    def get(self, *key):
+        if len(key) == 0:
+            return self.params_root
+
+        elif len(key) == 1:
+            if key[0] in self.params_root:
+                return self.params_root[key[0]]
+            else:
+                Log.error('{} KeyError: {}.'.format(self._get_caller(), key))
+                exit(1)
+
+        elif len(key) == 2:
+            if key[0] in self.params_root and key[1] in self.params_root[key[0]]:
+                return self.params_root[key[0]][key[1]]
+            else:
+                Log.error('{} KeyError: {}.'.format(self._get_caller(), key))
+                exit(1)
+
+        else:
+            Log.error('{} KeyError: {}.'.format(self._get_caller(), key))
+            exit(1)
+
+    def exists(self, *key):
+        if len(key) == 1 and key[0] in self.params_root:
+            return True
+
+        if len(key) == 2 and (key[0] in self.params_root and key[1] in self.params_root[key[0]]):
+            return True
+
+        return False
+
+    def add(self, key_tuple, value):
+        if self.exists(*key_tuple):
+            Log.error('{} Key: {} existed!!!'.format(self._get_caller(), key_tuple))
+            exit(1)
+
+        if len(key_tuple) == 1:
+            self.params_root[key_tuple[0]] = value
+
+        elif len(key_tuple) == 2:
+            if key_tuple[0] not in self.params_root:
+                self.params_root[key_tuple[0]] = dict()
+
+            self.params_root[key_tuple[0]][key_tuple[1]] = value
+
+        else:
+            Log.error('{} KeyError: {}.'.format(self._get_caller(), key_tuple))
+            exit(1)
+
+    def update(self, key_tuple, value):
+        if not self.exists(*key_tuple):
+            Log.error('{} Key: {} not existed!!!'.format(self._get_caller(), key_tuple))
+            exit(1)
+
+        if len(key_tuple) == 1 and not isinstance(self.params_root[key_tuple[0]], dict):
+            self.params_root[key_tuple[0]] = value
+
+        elif len(key_tuple) == 2:
+            self.params_root[key_tuple[0]][key_tuple[1]] = value
+
+        else:
+            Log.error('{} Key: {} not existed!!!'.format(self._get_caller(), key_tuple))
+            exit(1)
+
+    def resume(self, config_dict):
+        self.params_root = config_dict
+
+    def plus_one(self, *key):
+        if not self.exists(*key):
+            Log.error('{} Key: {} not existed!!!'.format(self._get_caller(), key))
+            exit(1)
+
+        if len(key) == 1 and not isinstance(self.params_root[key[0]], dict):
+            self.params_root[key[0]] += 1
+
+        elif len(key) == 2:
+            self.params_root[key[0]][key[1]] += 1
+
+        else:
+            Log.error('{} KeyError: {} !!!'.format(self._get_caller(), key))
+            exit(1)
+
+    def to_dict(self):
+        return self.params_root
+
+
+class _ConditionHelper:
+    """Handy helper"""
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    @property
+    def use_multi_dataset(self):
+        root_dirs = self.configer.get('data', 'data_dir')
+        return isinstance(root_dirs, (tuple, list)) and len(root_dirs) > 1
+
+    @property
+    def pred_sw_offset(self):
+        return self.configer.exists('data', 'pred_sw_offset')
+
+    @property
+    def pred_dt_offset(self):
+        return self.configer.exists('data', 'pred_dt_offset')
+
+    @property
+    def use_sw_offset(self):
+        return self.configer.exists('data', 'use_sw_offset')
+
+    @property
+    def use_dt_offset(self):
+        return self.configer.exists('data', 'use_dt_offset')
+
+    @property
+    def use_ground_truth(self):
+        return self.config_equals(('use_ground_truth',), True)
+
+    @property
+    def pred_ml_dt_offset(self):
+        return self.configer.exists('data', 'pred_ml_dt_offset')
+
+    def loss_contains(self, name):
+        return name in self.configer.get('loss', 'loss_type')
+
+    def model_contains(self, name):
+        return name in self.configer.get('network', 'model_name')
+
+    def config_equals(self, key, value):
+        if not self.configer.exists(*key):
+            return False
+
+        return self.configer.get(*key) == value
+
+    def config_exists(self, key):
+        return self.configer.exists(*key)
+
+    def environ_exists(self, key):
+        return os.environ.get(key) is not None
+
+    @property
+    def diverse_size(self):
+        return self.configer.get('val', 'data_transformer')['size_mode'] == 'diverse_size'
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--configs', default='../../configs/cls/flower/fc_vgg19_flower_cls.json', type=str,
+                        dest='configs', help='The file of the hyper parameters.')
+    parser.add_argument('--phase', default='train', type=str,
+                        dest='phase', help='The phase of Pose Estimator.')
+    parser.add_argument('--gpu', default=[0, 1, 2, 3], nargs='+', type=int,
+                        dest='gpu', help='The gpu used.')
+    parser.add_argument('--resume', default=None, type=str,
+                        dest='network:resume', help='The path of pretrained model.')
+    parser.add_argument('--train_dir', default=None, type=str,
+                        dest='data:train_dir', help='The path of train data.')
+
+    args_parser = parser.parse_args()
+
+    configer = Configer(args_parser=args_parser)
+
+    configer.add(('project_dir',), 'root')
+    configer.update(('project_dir',), 'root1')
+
+    print (configer.get('project_dir'))
+    print (configer.get('network', 'resume'))
+    print (configer.get('logging', 'log_file'))
+    print(configer.get('data', 'train_dir'))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/logger.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/logger.py
new file mode 100644
index 0000000..7f0c8a4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/logger.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Logging tool implemented with the python Package logging.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import logging
+import os
+import sys
+
+
+DEFAULT_LOGFILE_LEVEL = 'debug'
+DEFAULT_STDOUT_LEVEL = 'info' # info
+DEFAULT_LOG_FILE = './default.log'
+DEFAULT_LOG_FORMAT = '%(asctime)s %(levelname)-7s %(message)s'
+
+LOG_LEVEL_DICT = {
+    'debug': logging.DEBUG,
+    'info': logging.INFO,
+    'warning': logging.WARNING,
+    'error': logging.ERROR,
+    'critical': logging.CRITICAL
+}
+
+
+class Logger(object):
+    """
+    Args:
+      Log level: CRITICAL>ERROR>WARNING>INFO>DEBUG.
+      Log file: The file that stores the logging info.
+      rewrite: Clear the log file.
+      log format: The format of log messages.
+      stdout level: The log level to print on the screen.
+    """
+    logfile_level = None
+    log_file = None
+    log_format = None
+    rewrite = None
+    stdout_level = None
+    logger = None
+
+    _caches = {}
+
+    @staticmethod
+    def init(logfile_level=DEFAULT_LOGFILE_LEVEL,
+             log_file=DEFAULT_LOG_FILE,
+             log_format=DEFAULT_LOG_FORMAT,
+             rewrite=False,
+             stdout_level=None):
+        Logger.logfile_level = logfile_level
+        Logger.log_file = log_file
+        Logger.log_format = log_format
+        Logger.rewrite = rewrite
+        Logger.stdout_level = stdout_level
+
+        Logger.logger = logging.getLogger()
+        Logger.logger.handlers = []
+        fmt = logging.Formatter(Logger.log_format)
+
+        if Logger.logfile_level is not None:
+            filemode = 'w'
+            if not Logger.rewrite:
+                filemode = 'a'
+
+            dir_name = os.path.dirname(os.path.abspath(Logger.log_file))
+            if not os.path.exists(dir_name):
+                os.makedirs(dir_name)
+
+            if Logger.logfile_level not in LOG_LEVEL_DICT:
+                print('Invalid logging level: {}'.format(Logger.logfile_level))
+                Logger.logfile_level = DEFAULT_LOGFILE_LEVEL
+
+            Logger.logger.setLevel(LOG_LEVEL_DICT[Logger.logfile_level])
+
+            fh = logging.FileHandler(Logger.log_file, mode=filemode)
+            fh.setFormatter(fmt)
+            fh.setLevel(LOG_LEVEL_DICT[Logger.logfile_level])
+
+            Logger.logger.addHandler(fh)
+
+        if stdout_level is not None:
+            if Logger.logfile_level is None:
+                Logger.logger.setLevel(LOG_LEVEL_DICT[Logger.stdout_level])
+
+            console = logging.StreamHandler()
+            if Logger.stdout_level not in LOG_LEVEL_DICT:
+                print('Invalid logging level: {}'.format(Logger.stdout_level))
+                return
+
+            console.setLevel(LOG_LEVEL_DICT[Logger.stdout_level])
+            console.setFormatter(fmt)
+            Logger.logger.addHandler(console)
+
+    @staticmethod
+    def set_log_file(file_path):
+        Logger.log_file = file_path
+        Logger.init(log_file=file_path)
+
+    @staticmethod
+    def set_logfile_level(log_level):
+        if log_level not in LOG_LEVEL_DICT:
+            print('Invalid logging level: {}'.format(log_level))
+            return
+
+        Logger.init(logfile_level=log_level)
+
+    @staticmethod
+    def clear_log_file():
+        Logger.rewrite = True
+        Logger.init(rewrite=True)
+
+    @staticmethod
+    def check_logger():
+        if Logger.logger is None:
+            Logger.init(logfile_level=None, stdout_level=DEFAULT_STDOUT_LEVEL)
+
+    @staticmethod
+    def set_stdout_level(log_level):
+        if log_level not in LOG_LEVEL_DICT:
+            print('Invalid logging level: {}'.format(log_level))
+            return
+
+        Logger.init(stdout_level=log_level)
+
+    @staticmethod
+    def debug(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename,lineno)
+        Logger.logger.debug('{} {}'.format(prefix, message))
+
+    @staticmethod
+    def info(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename,lineno)
+        Logger.logger.info('{} {}'.format(prefix, message))
+
+    @staticmethod
+    def info_once(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename, lineno)
+
+        if Logger._caches.get((prefix, message)) is not None:
+            return
+
+        Logger.logger.info('{} {}'.format(prefix, message))
+        Logger._caches[(prefix, message)] = True
+
+    @staticmethod
+    def warn(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename,lineno)
+        Logger.logger.warn('{} {}'.format(prefix, message))
+
+    @staticmethod
+    def error(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename,lineno)
+        Logger.logger.error('{} {}'.format(prefix, message))
+
+    @staticmethod
+    def critical(message):
+        Logger.check_logger()
+        filename = os.path.basename(sys._getframe().f_back.f_code.co_filename)
+        lineno = sys._getframe().f_back.f_lineno
+        prefix = '[{}, {}]'.format(filename,lineno)
+        Logger.logger.critical('{} {}'.format(prefix, message))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--logfile_level', default="debug", type=str,
+                        dest='logfile_level', help='To set the log level to files.')
+    parser.add_argument('--stdout_level', default=None, type=str,
+                        dest='stdout_level', help='To set the level to print to screen.')
+    parser.add_argument('--log_file', default="./default.log", type=str,
+                        dest='log_file', help='The path of log files.')
+    parser.add_argument('--log_format', default="%(asctime)s %(levelname)-7s %(message)s",
+                        type=str, dest='log_format', help='The format of log messages.')
+    parser.add_argument('--rewrite', default=False, type=bool,
+                        dest='rewrite', help='Clear the log files existed.')
+
+    args = parser.parse_args()
+    Logger.init(logfile_level=args.logfile_level, stdout_level=args.stdout_level,
+                log_file=args.log_file, log_format=args.log_format, rewrite=args.rewrite)
+
+    Logger.info("info test.")
+    Logger.debug("debug test.")
+    Logger.warn("warn test.")
+    Logger.error("error test.")
+    Logger.debug("debug test.")
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/timer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/timer.py
new file mode 100644
index 0000000..ce4d8b8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/utils/tools/timer.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import time
+
+
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.reset()
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
+
+    def reset(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/attention_visualizer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/attention_visualizer.py
new file mode 100644
index 0000000..14a2ca8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/attention_visualizer.py
@@ -0,0 +1,279 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Modified from: https://github.com/AlexHex7/Non-local_pytorch
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2018
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+import matplotlib
+matplotlib.use('Agg')
+
+import torch
+import os
+import sys
+import pdb
+import cv2
+import numpy as np
+from torch import nn
+from torch.nn import functional as F
+import functools
+
+import matplotlib.pyplot as plt
+from sklearn import svm, datasets
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix
+from PIL import Image as PILImage
+
+
+torch_ver = torch.__version__[:3]
+
+ignore_label = 255
+id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
+              3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
+              7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
+              14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
+              18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
+              28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
+
+class_name_dict = {0:'road', 1:'sidewalk', 2:'building', 3:'wall', 4:'fence', 5:'pole',
+                   6:'trafficlight', 7:'trafficsign', 8:'vegetation', 9:'terrian', 10:'sky', 
+                   11:'person', 12:'rider', 13:'car', 14:'truck', 15:'bus', 16:'train',
+                   17:'motorcycle', 18:'bicycle', 255: 'none'}
+
+
+def get_palette(num_cls):
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+
+    palette = [0] * (num_cls * 3)
+    palette[0:3] = (128, 64, 128)       # 0: 'road' 
+    palette[3:6] = (244, 35,232)        # 1 'sidewalk'
+    palette[6:9] = (70, 70, 70)         # 2''building'
+    palette[9:12] = (102,102,156)       # 3 wall
+    palette[12:15] =  (190,153,153)     # 4 fence
+    palette[15:18] = (153,153,153)      # 5 pole
+    palette[18:21] = (250,170, 30)      # 6 'traffic light'
+    palette[21:24] = (220,220, 0)       # 7 'traffic sign'
+    palette[24:27] = (107,142, 35)      # 8 'vegetation'
+    palette[27:30] = (152,251,152)      # 9 'terrain'
+    palette[30:33] = ( 70,130,180)      # 10 sky
+    palette[33:36] = (220, 20, 60)      # 11 person
+    palette[36:39] = (255, 0, 0)        # 12 rider
+    palette[39:42] = (0, 0, 142)        # 13 car
+    palette[42:45] = (0, 0, 70)         # 14 truck
+    palette[45:48] = (0, 60,100)        # 15 bus
+    palette[48:51] = (0, 80,100)        # 16 train
+    palette[51:54] = (0, 0,230)         # 17 'motorcycle'
+    palette[54:57] = (119, 11, 32)      # 18 'bicycle'
+    palette[57:60] = (105, 105, 105)
+    return palette
+
+palette = get_palette(20)
+
+def id2trainId(label, id_to_trainid, reverse=False):
+        label_copy = label.copy()
+        if reverse:
+            for v, k in id_to_trainid.items():
+                label_copy[label == k] = v
+        else:
+            for k, v in id_to_trainid.items():
+                label_copy[label == k] = v
+        return label_copy
+
+def down_sample_target(target, scale):
+    row, col = target.shape
+    step = scale
+    r_target = target[0:row:step, :]  
+    c_target = r_target[:, 0:col:step]
+    return c_target
+
+
+def visualize_map(atten, shape, out_path):
+    atten_np = atten.cpu().data.numpy() # c x hw
+    (h, w) = shape
+    for row in range(2):
+        for col in range(9):
+            # plt.subplot(5,8,9+row*8+col)
+            # pdb.set_trace()
+            cm = atten_np[row*8+col] 
+            cm = np.reshape(cm, (h, w))
+            plt.tight_layout()
+            plt.imshow(cm, cmap='Blues', interpolation='nearest')
+            plt.axis('off')  
+            plt.savefig(out_path+'regionmap_'+str(row*8+col)+'png', bbox_inches='tight', pad_inches = 0)
+    pdb.set_trace()
+
+
+def Vis_A2_Atten(img_path,
+                   label_path,
+                   image,
+                   label,
+                   atten, 
+                   shape,
+                   cmap=plt.cm.Blues,
+                   index=1,
+                   choice=1,
+                   maps_count=32):
+    """
+    This function prints and plots the attention weight matrix.
+    Input:
+        choice: 1 represents plotting the histogram of the weights' distribution
+                2 represents plotting the attention weights' map
+    """
+    atten_np = atten.cpu().data.numpy() # c x hw
+    (h, w) = shape
+
+    if choice == 1:
+        # read image/ label from the given paths
+        image = cv2.imread(img_path[index], cv2.IMREAD_COLOR) #1024x2048x3
+        image = image[:, :, -1]
+        image = cv2.resize(image, dsize=(h, w),interpolation=cv2.INTER_CUBIC)
+        label = cv2.imread(label_path[index], cv2.IMREAD_GRAYSCALE) #1024x2048
+        label = id2trainId(label, id_to_trainid)
+        label = down_sample_target(label, 8)
+
+    else:
+        # use the image crop directly.
+        image = image.astype(np.float)[index] #3x1024x2048
+        image = np.transpose(image, (1,2,0))
+        mean = (102.9801, 115.9465, 122.7717)
+        image += mean
+        image = image.astype(np.uint8)
+        image = cv2.resize(image, dsize=(w, h),interpolation=cv2.INTER_CUBIC)
+        label = label.cpu().numpy().astype(np.uint8)[index]
+        label = down_sample_target(label, 8)
+
+    img_label = PILImage.fromarray(label)
+    img_label.putpalette(palette)
+
+    plt.tight_layout()
+    plt.figure(figsize=(48, 24))
+    plt.axis('off')
+
+    plt.subplot(5,8,1)
+    plt.imshow(image)
+    plt.axis('off')
+
+    plt.subplot(5,8,2)
+    plt.imshow(img_label)
+    plt.axis('off')
+
+    for row in range(4):
+        for col in range(8):
+            plt.subplot(5,8,9+row*8+col)
+            cm = atten_np[row*8+col]
+            cm = np.reshape(cm, (h, w))
+            plt.imshow(cm, cmap='Blues', interpolation='nearest')
+            plt.axis('off')
+            plt.gca().set_title("Attention Map %d" %(row*8+col))
+  
+    # plt.subplot(3,7,1)
+    # plt.imshow(image)
+    # plt.axis('off')
+
+    # plt.subplot(3,7,2)
+    # plt.imshow(img_label)
+    # plt.axis('off')
+
+    # for row in range(3):
+    #     for col in range(7):
+    #         if (row*7+col) == 0 or (row*7+col) == 1:
+    #             continue
+    #         plt.subplot(3,7,row*7+col+1)
+    #         cm = atten_np[row*7+col-2]
+    #         cm = np.reshape(cm, (h, w))
+    #         plt.imshow(cm, cmap='Blues', interpolation='nearest')
+    #         plt.axis('off')
+    #         plt.gca().set_title("Attention Map %d" %(row*7+col-2))
+
+    plt.show()
+    outpath='./object_context_vis/a2map_32/'
+    plt.savefig(outpath+'a2map_'+str(img_path[0][0:-3].split('/')[-1])+'png', bbox_inches='tight', pad_inches = 0)
+    print("image id: {}".format(img_path[0][0:-3].split('/')[-1]))
+
+
+def Vis_FastOC_Atten(img_path,
+                   label_path,
+                   image,
+                   label,
+                   atten, 
+                   shape,
+                   cmap=plt.cm.Blues,
+                   index=1,
+                   choice=1,
+                   subplot=False):
+    """
+    This function prints and plots the attention weight matrix.
+    Input:
+        choice: 1 represents plotting the histogram of the weights' distribution
+                2 represents plotting the attention weights' map
+    """
+    atten_np = atten.cpu().data.numpy() # c x hw
+    (h, w) = shape
+
+    if choice == 1:
+        # read image/ label from the given paths
+        image = cv2.imread(img_path[index], cv2.IMREAD_COLOR) #1024x2048x3
+        image = image[:, :, -1]
+        image = cv2.resize(image, dsize=(h, w),interpolation=cv2.INTER_CUBIC)
+        label = cv2.imread(label_path[index], cv2.IMREAD_GRAYSCALE) #1024x2048
+        label = id2trainId(label, id_to_trainid)
+        label = down_sample_target(label, 8)
+
+    else:
+        # use the image crop directly.
+        image = image.astype(np.float)[index] #3x1024x2048
+        image = np.transpose(image, (1,2,0))
+        mean = (102.9801, 115.9465, 122.7717)
+        image += mean
+        image = image.astype(np.uint8)
+        image = cv2.resize(image, dsize=(w, h),interpolation=cv2.INTER_CUBIC)
+        label = label.cpu().numpy().astype(np.uint8)[index]
+        label = down_sample_target(label, 8)
+
+    img_label = PILImage.fromarray(label)
+    img_label.putpalette(palette)
+
+    plt.tight_layout()
+    plt.figure(figsize=(48, 24))
+    plt.axis('off')
+ 
+    if subplot: 
+        plt.subplot(3,7,1)
+        plt.imshow(image)
+        plt.axis('off')
+
+        plt.subplot(3,7,2)
+        plt.imshow(img_label)
+        plt.axis('off')
+
+    for row in range(3):
+        for col in range(7):
+            if (row*7+col) == 0 or (row*7+col) == 1:
+                continue
+            if subplot:
+                plt.subplot(3,7,row*7+col+1)
+            cm = atten_np[row*7+col-2]
+            cm = np.reshape(cm, (h, w))
+            plt.imshow(cm, cmap='Blues', interpolation='nearest')
+            plt.axis('off')
+            if not subplot:
+                plt.show()
+                outpath='./object_context_vis/fast_baseoc_map/'
+                plt.savefig(outpath+'fast_baseoc_map_'+str(img_path[0][0:-3].split('/')[-1])+'_'+str(row*7+col-2)+'.png', bbox_inches='tight', pad_inches = 0)
+            else:
+                plt.gca().set_title("Attention Map %d" %(row*7+col-2))
+
+    if subplot:
+        plt.show()
+        outpath='./object_context_vis/fast_baseoc_map/'
+        plt.savefig(outpath+'fast_baseoc_map_'+str(img_path[0][0:-3].split('/')[-1])+'png', bbox_inches='tight', pad_inches = 0)
+    print("image id: {}".format(img_path[0][0:-3].split('/')[-1]))
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color150.mat b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color150.mat
new file mode 100644
index 0000000..c518b64
Binary files /dev/null and b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color150.mat differ
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color60.mat b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color60.mat
new file mode 100644
index 0000000..b328b5f
Binary files /dev/null and b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/color60.mat differ
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/log_visualizer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/log_visualizer.py
new file mode 100644
index 0000000..ebe584d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/log_visualizer.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Visualize the log files.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class LogVisualizer(object):
+
+    def vis_loss(self, log_file):
+        with open(log_file, 'r') as file_stream:
+            train_ax = list()
+            train_ay = list()
+            test_ax = list()
+            test_ay = list()
+            test_mark = 0
+
+            for line in file_stream.readlines():
+                if 'Iteration' in line:
+                    m = re.match(r'.*Iteration:(.*)Learning.*', line)
+                    iter = int(m.group(1))
+                    train_ax.append(iter)
+                    test_mark = iter
+
+                elif 'TrainLoss' in line:
+                    m = re.match(r'.*TrainLoss = (.*)', line)
+                    loss = float(m.group(1))
+                    train_ay.append(loss)
+
+                elif 'TestLoss' in line:
+                    m = re.match(r'.*TestLoss = (.*)', line)
+                    loss = float(m.group(1))
+                    test_ax.append(test_mark)
+                    test_ay.append(loss)
+
+                else:
+                    continue
+
+        train_ax = np.array(train_ax)
+        train_ay = np.array(train_ay)
+        test_ax = np.array(test_ax)
+        test_ay = np.array(test_ay)
+        plt.plot(train_ax, train_ay, label='Train Loss')
+        plt.plot(test_ax, test_ay, label='Test Loss')
+        plt.legend()
+        plt.show()
+
+    def vis_acc(self, log_file):
+        with open(log_file, 'r') as file_stream:
+            acc_ax = list()
+            acc_ay = list()
+            test_mark = 0
+
+            for line in file_stream.readlines():
+                if 'Iteration' in line and 'Train' in line:
+                    m = re.match(r'.*Iteration:(.*)Learning.*', line)
+                    iter = int(m.group(1))
+                    test_mark = iter
+
+                if 'Accuracy' in line:
+                    m = re.match(r'.*Accuracy = (.*)', line)
+                    loss = float(m.group(1))
+                    acc_ax.append(test_mark)
+                    acc_ay.append(loss)
+
+                else:
+                    continue
+
+        plt.plot(acc_ax, acc_ay, label='Acc')
+        plt.legend()
+        plt.show()
+
+
+if __name__ == "__main__":
+    #if len(sys.argv) != 2:
+    #    print >> sys.stderr, "Need one args: log_file"
+    #    exit(0)
+
+    log_visualizer = LogVisualizer()
+    log_visualizer.vis_loss('../../log/cls/fc_flower_cls.log')
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/palette.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/palette.py
new file mode 100644
index 0000000..c05ea1c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/palette.py
@@ -0,0 +1,217 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import sys
+import cv2
+import pdb
+import numpy as np
+import scipy.io as sio
+
+
+def get_autonue21_colors():
+    """
+    https://github.com/AutoNUE/public-code/blob/master/helpers/anue_labels.py
+    """
+    num_cls = 26
+    colors = [0] * (num_cls * 3)
+    colors[0:3] = (128, 64, 128)
+    colors[3:6] = (250, 170, 160)
+    colors[6:9] = (244, 35, 232)
+    colors[9:12] = (230, 150, 140)
+    colors[12:15] = (220, 20, 60)
+    colors[15:18] = (255, 0, 0)
+    colors[18:21] = (0, 0, 230)
+    colors[21:24] = (119, 11, 32)
+    colors[24:27] = (255, 204, 54)
+    colors[27:30] = (0, 0, 142)
+    colors[30:33] = (0, 0, 70)
+    colors[33:36] = (0, 60, 100)
+    colors[36:39] = (0, 0, 90)
+    colors[39:42] = (220, 190, 40)
+    colors[42:45] = (102, 102, 156)
+    colors[45:48] = (190, 153, 153)
+    colors[48:51] = (190, 153, 153)
+    colors[51:54] = (180, 165, 180)
+    colors[54:57] = (174, 64, 67)
+    colors[57:60] = (220, 220, 0)
+    colors[60:63] = (250, 170, 30)
+    colors[63:66] = (153, 153, 153)
+    colors[66:69] = (169, 187, 214)
+    colors[69:72] = (70, 70, 70)
+    colors[72:75] = (150, 100, 100)
+    colors[75:78] = (107, 142, 35)
+    colors[78:81] = (70, 130, 180)
+    return colors
+
+
+# Sky = [128,128,128]
+# 	Building = [128,0,0]
+# 	Pole = [192,192,128]
+# 	Road = [128,64,128]
+# 	Pavement = [60,40,222]
+# 	Tree = [128,128,0]
+# 	SignSymbol = [192,128,128]
+# 	Fence = [64,64,128]
+# 	Car = [64,0,128]
+# 	Pedestrian = [64,64,0]
+# 	Bicyclist = [0,128,192]
+# 	Unlabelled = [0,0,0]
+def get_camvid_colors():
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    num_cls = 12
+    colors = [0] * (num_cls * 3)
+    colors[0:3] = (128, 128, 128)
+    colors[3:6] = (128, 0, 0)
+    colors[6:9] = (192, 192, 128)
+    colors[9:12] = (128, 64, 128)
+    colors[12:15] = (60, 40, 222)
+    colors[15:18] = (128, 128, 0)
+    colors[18:21] = (192, 128, 128)
+    colors[21:24] = (64, 64, 128)
+    colors[24:27] = (64, 0, 128)
+    colors[27:30] = (64, 64, 0)
+    colors[30:33] = (0, 128, 192)
+    colors[33:36] = (0, 0, 0)
+    return colors
+
+
+def get_cityscapes_colors():
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    num_cls = 20
+    colors = [0] * (num_cls * 3)
+    colors[0:3] = (128, 64, 128)  # 0: 'road'
+    colors[3:6] = (244, 35, 232)  # 1 'sidewalk'
+    colors[6:9] = (70, 70, 70)  # 2''building'
+    colors[9:12] = (102, 102, 156)  # 3 wall
+    colors[12:15] = (190, 153, 153)  # 4 fence
+    colors[15:18] = (153, 153, 153)  # 5 pole
+    colors[18:21] = (250, 170, 30)  # 6 'traffic light'
+    colors[21:24] = (220, 220, 0)  # 7 'traffic sign'
+    colors[24:27] = (107, 142, 35)  # 8 'vegetation'
+    colors[27:30] = (152, 251, 152)  # 9 'terrain'
+    colors[30:33] = (70, 130, 180)  # 10 sky
+    colors[33:36] = (220, 20, 60)  # 11 person
+    colors[36:39] = (255, 0, 0)  # 12 rider
+    colors[39:42] = (0, 0, 142)  # 13 car
+    colors[42:45] = (0, 0, 70)  # 14 truck
+    colors[45:48] = (0, 60, 100)  # 15 bus
+    colors[48:51] = (0, 80, 100)  # 16 train
+    colors[51:54] = (0, 0, 230)  # 17 'motorcycle'
+    colors[54:57] = (119, 11, 32)  # 18 'bicycle'
+    colors[57:60] = (105, 105, 105)
+    return colors
+
+
+def get_ade_colors():
+    colors = sio.loadmat(os.path.dirname(os.path.abspath(__file__)) + '/color150.mat')['colors']
+    colors = colors[:, ::-1, ]
+    colors = np.array(colors).astype(int).tolist()
+    colors.insert(0, [0, 0, 0])
+    colors = sum(colors, [])
+    return colors
+
+
+def get_pascal_context_colors():
+    colors = sio.loadmat(os.path.dirname(os.path.abspath(__file__)) + '/color60.mat')['color60']
+    colors = colors[:, ::-1, ]
+    colors = np.array(colors).astype(int).tolist()
+    colors = sum(colors, [])
+    return colors
+
+
+def get_lip_colors():
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    n = 20
+    colors = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        colors[j * 3 + 0] = 0
+        colors[j * 3 + 1] = 0
+        colors[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            colors[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+            colors[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+            colors[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+            i += 1
+            lab >>= 3
+    return colors
+
+
+def get_cocostuff_colors():
+    """ Returns the color map for visualizing the segmentation mask.
+    Args:
+        num_cls: Number of classes
+    Returns:
+        The color map
+    """
+    n = 171
+    colors = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        colors[j * 3 + 0] = 0
+        colors[j * 3 + 1] = 0
+        colors[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            colors[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+            colors[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+            colors[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+            i += 1
+            lab >>= 3
+    return colors
+
+
+def get_pascal_voc_colors():
+    """Load the mapping that associates pascal classes with label colors
+        Returns:
+            np.ndarray with dimensions (21, 3)
+        """
+    return np.asarray(
+        [
+            [0, 0, 0],
+            [128, 0, 0],
+            [0, 128, 0],
+            [128, 128, 0],
+            [0, 0, 128],
+            [128, 0, 128],
+            [0, 128, 128],
+            [128, 128, 128],
+            [64, 0, 0],
+            [192, 0, 0],
+            [64, 128, 0],
+            [192, 128, 0],
+            [64, 0, 128],
+            [192, 0, 128],
+            [64, 128, 128],
+            [192, 128, 128],
+            [0, 64, 0],
+            [128, 64, 0],
+            [0, 192, 0],
+            [128, 192, 0],
+            [0, 64, 128],
+        ]
+    )
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_parser.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_parser.py
new file mode 100644
index 0000000..6365784
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_parser.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Parse label file of segmentation.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+
+import cv2
+import numpy as np
+from PIL import Image
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.utils.tools.configer import Configer
+
+
+class SegParser(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def parse_img_seg(self, image_file, label_file):
+        if image_file is None or not os.path.exists(image_file):
+            Log.error('Image file: {} not existed.'.format(image_file))
+            return
+
+        if label_file is None or not os.path.exists(label_file):
+            Log.error('Label file: {} not existed.'.format(label_file))
+            return
+
+        image_canvas = cv2.imread(image_file)  # B, G, R order.
+
+        mask_canvas = self.colorize(np.array(Image.open(label_file).convert('P')))
+        image_canvas = cv2.addWeighted(image_canvas, 0.6, mask_canvas, 0.4, 0)
+
+        cv2.imshow('main', image_canvas)
+        cv2.waitKey()
+
+    def parse_dir_seg(self, image_dir, label_dir):
+        if image_dir is None or not os.path.exists(image_dir):
+            Log.error('Image Dir: {} not existed.'.format(image_dir))
+            return
+
+        if label_dir is None or not os.path.exists(label_dir):
+            Log.error('Label Dir: {} not existed.'.format(label_dir))
+            return
+
+        for image_file in os.listdir(image_dir):
+            shotname, extension = os.path.splitext(image_file)
+            Log.info(image_file)
+            image_canvas = cv2.imread(os.path.join(image_dir, image_file))  # B, G, R order.
+            label_file = os.path.join(label_dir, '{}.png'.format(shotname))
+            mask_canvas = self.colorize(np.array(Image.open(label_file).convert('P')))
+            image_canvas = cv2.addWeighted(image_canvas, 0.6, mask_canvas, 0.4, 0)
+
+            cv2.imshow('main', image_canvas)
+            cv2.waitKey()
+
+    def colorize(self, label_map, image_canvas=None):
+        height, width = label_map.shape
+        color_dst = np.zeros((height, width, 3), dtype=np.uint8)
+        color_list = self.configer.get('details', 'color_list')
+        for i in range(self.configer.get('data', 'num_classes')):
+            color_dst[label_map == i] = color_list[i % len(color_list)]
+
+        color_img_rgb = np.array(color_dst, dtype=np.uint8)
+        color_img_bgr = cv2.cvtColor(color_img_rgb, cv2.COLOR_RGB2BGR)
+
+        if image_canvas is not None:
+            image_canvas = cv2.addWeighted(image_canvas, 0.6, color_img_bgr, 0.4, 0)
+            return image_canvas
+
+        else:
+            return color_img_bgr
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--configs', default=None, type=str,
+                        dest='configs', help='The file of the hyper parameters.')
+    parser.add_argument('--image_file', default=None, type=str,
+                        dest='image_file', help='The image file of Seg Parser.')
+    parser.add_argument('--label_file', default=None, type=str,
+                        dest='label_file', help='The label file of Seg Parser.')
+    parser.add_argument('--image_dir', default=None, type=str,
+                        dest='image_dir', help='The image directory of Seg Parser.')
+    parser.add_argument('--label_dir', default=None, type=str,
+                        dest='label_dir', help='The label directory of Seg Parser.')
+
+    args_parser = parser.parse_args()
+
+    seg_parser = SegParser(Configer(configs=args_parser.configs))
+    seg_parser.parse_img_seg(args_parser.image_file, args_parser.label_file)
+    seg_parser.parse_dir_seg(args_parser.image_dir, args_parser.label_dir)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_visualizer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_visualizer.py
new file mode 100644
index 0000000..609538d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/seg_visualizer.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Visualizer for segmentation.
+
+
+import os
+
+import cv2
+import numpy as np
+
+from models.protoseg_core.lib.datasets.tools.transforms import DeNormalize
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+SEG_DIR = 'vis/results/seg'
+
+
+class SegVisualizer(object):
+
+    def __init__(self, configer=None):
+        self.configer = configer
+
+    def vis_fn(self, preds, targets, ori_img_in=None, name='default', sub_dir='fn'):
+        base_dir = os.path.join(self.configer.get('project_dir'), SEG_DIR, sub_dir)
+        if not os.path.exists(base_dir):
+            Log.error('Dir:{} not exists!'.format(base_dir))
+            os.makedirs(base_dir)
+
+        if not isinstance(preds, np.ndarray):
+            if len(preds.size()) > 3:
+                Log.error('Preds size is not valid.')
+                exit(1)
+
+            if len(preds.size()) == 3:
+                preds = preds.clone().data.cpu().numpy()
+
+            if len(preds.size()) == 2:
+                preds = preds.unsqueeze(0).data.cpu().numpy()
+
+        else:
+            if len(preds.shape) > 3:
+                Log.error('Preds size is not valid.')
+                exit(1)
+
+            if len(preds.shape) == 2:
+                preds = preds.unsqueeze(0)
+
+        if not isinstance(targets, np.ndarray):
+
+            if len(targets.size()) == 3:
+                targets = targets.clone().data.cpu().numpy()
+
+            if len(targets.size()) == 2:
+                targets = targets.unsqueeze(0).data.cpu().numpy()
+
+        else:
+            if len(targets.shape) == 2:
+                targets = targets.unsqueeze(0)
+
+        if ori_img_in is not None:
+            if not isinstance(ori_img_in, np.ndarray):
+                if len(ori_img_in.size()) < 3:
+                    Log.error('Image size is not valid.')
+                    exit(1)
+
+                if len(ori_img_in.size()) == 4:
+                    ori_img_in = ori_img_in.data.cpu()
+
+                if len(ori_img_in.size()) == 3:
+                    ori_img_in = ori_img_in.unsqueeze(0).data.cpu()
+
+                ori_img = ori_img_in.clone()
+                for i in range(ori_img_in.size(0)):
+                    ori_img[i] = DeNormalize(div_value=self.configer.get('normalize', 'div_value'),
+                                             mean=self.configer.get('normalize', 'mean'),
+                                             std=self.configer.get('normalize', 'std'))(ori_img_in.clone())
+
+                ori_img = ori_img.numpy().transpose(2, 3, 1).astype(np.uint8)
+
+            else:
+                if len(ori_img_in.shape) == 3:
+                    ori_img_in = ori_img_in.unsqueeze(0)
+
+                ori_img = ori_img_in.copy()
+
+        for img_id in range(preds.shape[0]):
+            label = targets[img_id]
+            pred = preds[img_id]
+            result = np.zeros(shape=(pred.shape[0], pred.shape[1], 3), dtype=np.uint8)
+
+            for i in range(self.configer.get('data', 'num_classes')):
+                mask0 = np.zeros_like(label, dtype=np.uint8)
+                mask1 = np.zeros_like(label, dtype=np.uint8)
+                mask0[label[:] == i] += 1
+                mask0[pred[:] == i] += 1
+                mask1[pred[:] == i] += 1
+                result[mask0[:] == 1] = self.configer.get('details', 'color_list')[i]
+                result[mask1[:] == 1] = (0, 0, 0)
+
+            image_result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
+            if ori_img_in is not None:
+                image_result  = cv2.addWeighted(ori_img[i], 0.6, image_result, 0.4, 0)
+
+            cv2.imwrite(os.path.join(base_dir, '{}_{}.jpg'.format(name, img_id)), image_result)
+
+    def vis_fp(self, preds, targets, ori_img_in=None, name='default', sub_dir='fp'):
+        base_dir = os.path.join(self.configer.get('project_dir'), SEG_DIR, sub_dir)
+        if not os.path.exists(base_dir):
+            Log.error('Dir:{} not exists!'.format(base_dir))
+            os.makedirs(base_dir)
+
+        if not isinstance(preds, np.ndarray):
+            if len(preds.size()) > 3:
+                Log.error('Preds size is not valid.')
+                exit(1)
+
+            if len(preds.size()) == 3:
+                preds = preds.clone().data.cpu().numpy()
+
+            if len(preds.size()) == 2:
+                preds = preds.unsqueeze(0).data.cpu().numpy()
+
+        else:
+            if len(preds.shape) > 3:
+                Log.error('Preds size is not valid.')
+                exit(1)
+
+            if len(preds.shape) == 2:
+                preds = preds.unsqueeze(0)
+
+        if not isinstance(targets, np.ndarray):
+
+            if len(targets.size()) == 3:
+                targets = targets.clone().data.cpu().numpy()
+
+            if len(targets.size()) == 2:
+                targets = targets.unsqueeze(0).data.cpu().numpy()
+
+        else:
+            if len(targets.shape) == 2:
+                targets = targets.unsqueeze(0)
+
+        if ori_img_in is not None:
+            if not isinstance(ori_img_in, np.ndarray):
+                if len(ori_img_in.size()) < 3:
+                    Log.error('Image size is not valid.')
+                    exit(1)
+
+                if len(ori_img_in.size()) == 4:
+                    ori_img_in = ori_img_in.data.cpu()
+
+                if len(ori_img_in.size()) == 3:
+                    ori_img_in = ori_img_in.unsqueeze(0).data.cpu()
+
+                ori_img = ori_img_in.clone()
+                for i in range(ori_img_in.size(0)):
+                    ori_img[i] = DeNormalize(div_value=self.configer.get('normalize', 'div_value'),
+                                             mean=self.configer.get('normalize', 'mean'),
+                                             std=self.configer.get('normalize', 'std'))(ori_img_in.clone())
+
+                ori_img = ori_img.numpy().transpose(2, 3, 1).astype(np.uint8)
+
+            else:
+                if len(ori_img_in.shape) == 3:
+                    ori_img_in = ori_img_in.unsqueeze(0)
+
+                ori_img = ori_img_in.copy()
+
+        for img_id in range(preds.shape[0]):
+            label = targets[img_id]
+            pred = preds[img_id]
+            result = np.zeros(shape=(pred.shape[0], pred.shape[1], 3), dtype=np.uint8)
+
+            for i in range(self.configer.get('data', 'num_classes')):
+                mask0 = np.zeros_like(label, dtype=np.uint8)
+                mask1 = np.zeros_like(label, dtype=np.uint8)
+                mask0[label[:] == i] += 1
+                mask0[pred[:] == i] += 1
+                mask1[label[:] == i] += 1
+                result[mask0[:] == 1] = self.configer.get('details', 'color_list')[i]
+                result[mask1[:] == 1] = (0, 0, 0)
+
+            image_result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
+            if ori_img_in is not None:
+                image_result = cv2.addWeighted(ori_img[i], 0.6, image_result, 0.4, 0)
+
+            cv2.imwrite(os.path.join(base_dir, '{}_{}.jpg'.format(name, img_id)), image_result)
+
+    def error_map(self, im, pred, gt):
+        canvas = im.copy()
+        canvas[np.where((gt - pred != [0, 0, 0]).all(axis=2))] = [0, 0, 0]
+        pred[np.where((gt - pred == [0, 0, 0]).all(axis=2))] = [0, 0, 0]
+        canvas = cv2.addWeighted(canvas, 1.0, pred, 1.0, 0)
+        # canvas = cv2.addWeighted(im, 0.3, canvas, 0.7, 0)
+        canvas[np.where((gt == [0, 0, 0]).all(axis=2))] = [0, 0, 0]
+        return canvas
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/tensor_visualizer.py b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/tensor_visualizer.py
new file mode 100644
index 0000000..02d1680
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/lib/vis/tensor_visualizer.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Visualize the tensor of the computer vision.
+
+
+import os
+
+import cv2
+import numpy as np
+
+from models.protoseg_core.lib.datasets.tools.transforms import DeNormalize
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+TENSOR_DIR = 'vis/results/tensor'
+
+
+class TensorVisualizer(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+
+    def vis_tensor(self, tensor, name='default', sub_dir=''):
+        base_dir = os.path.join(self.configer.get('project_dir'), TENSOR_DIR, sub_dir)
+
+        if not isinstance(tensor, np.ndarray):
+            if len(tensor.size()) != 3:
+                Log.error('Tensor size is not valid.')
+                exit(1)
+
+            tensor = tensor.data.cpu().numpy().transpose(1, 2, 0)
+
+        if not os.path.exists(base_dir):
+            Log.error('Dir:{} not exists!'.format(base_dir))
+            os.makedirs(base_dir)
+
+        tensor_img = cv2.resize(tensor, tuple(self.configer.get('data', 'input_size')))
+        cv2.imwrite(tensor_img, os.path.join(base_dir, '{}.jpg'.format(name)))
+
+    def vis_img(self, image_in, name='default', sub_dir='images'):
+        base_dir = os.path.join(self.configer.get('project_dir'), TENSOR_DIR, sub_dir)
+
+        if not isinstance(image_in, np.ndarray):
+            if len(image_in.size()) != 3:
+                Log.error('Image size is not valid.')
+                exit(1)
+
+            image = DeNormalize(div_value=self.configer.get('normalize', 'div_value'),
+                                mean=self.configer.get('normalize', 'mean'),
+                                std=self.configer.get('normalize', 'std'))(image_in.clone())
+            image = image.data.cpu().numpy().transpose(1, 2, 0)
+        else:
+            image = image_in.copy()
+
+        if not os.path.exists(base_dir):
+            Log.error('Dir:{} not exists!'.format(base_dir))
+            os.makedirs(base_dir)
+
+        img = cv2.resize(image, tuple(self.configer.get('data', 'input_size')))
+        cv2.imwrite(img, os.path.join(base_dir, '{}.jpg'.format(name)))
+
+
+if __name__ == "__main__":
+    # Test the visualizer.
+    pass
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train.sh
new file mode 100644
index 0000000..c1486bd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train.sh
@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_baseline_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=200000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_200k.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_200k.sh
new file mode 100644
index 0000000..1eb7c63
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_200k.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=150000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --max_iters 200000 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e2.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e2.sh
new file mode 100644
index 0000000..b523c35
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e2.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_lr1e2_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=200000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --base_lr 0.01 \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e3.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e3.sh
new file mode 100644
index 0000000..4542269
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr1e3.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_lr1e3_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=200000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --base_lr 0.001 \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr2e2.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr2e2.sh
new file mode 100644
index 0000000..2107d04
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr2e2.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_lr2e2_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=200000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --base_lr 0.02 \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr5e3.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr5e3.sh
new file mode 100644
index 0000000..0f057e8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/aml_run_h_48_d_4_ocr_train_lr5e3.sh
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+
+# $1 code path
+# $2 dataset path
+# $3 train or test
+# $4 log_suffix
+
+PYTHON="/opt/conda/bin/python"
+${PYTHON} -c "import torch; print(torch.__version__)"
+
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH=$1:$PYTHONPATH
+
+DATA_DIR="$2/face_parse/CelebAMask-HQ"
+SAVE_DIR="$2/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_lr5e3_"$4
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=200000
+
+if [ "$3"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --base_lr 0.005 \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$3"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$3"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$3"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$3"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_ocr_train.sh
new file mode 100644
index 0000000..0f02d9a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_ocr_train.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/face_parse/CelebAMask-HQ"
+SAVE_DIR="${DATA_ROOT}/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=150000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_train.sh
new file mode 100644
index 0000000..faf1123
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/celeba/run_h_48_d_4_train.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/face_parse/CelebAMask-HQ"
+SAVE_DIR="${DATA_ROOT}/seg_result/celeba/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/celeba/H_48_D_4.json"
+CONFIGS_TEST="configs/celeba/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/celeba/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=150000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3.sh
new file mode 100644
index 0000000..d001049
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=GeForceRTX2080Ti]"
+#BSUB -J "deeplab_v3"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes/
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/resnet101-imagenet.pth ${TMPDIR}/resnet101-imagenet.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_r_101_d_8_deeplabv3_train.sh train 'deeplab_v3' ${TMPDIR} ${SCRATCH_DIR} 'ss'
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3_contrast.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3_contrast.sh
new file mode 100644
index 0000000..030e6a3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/job_run_r_101_d_8_deeplabv3_contrast.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=GeForceRTX2080Ti]"
+#BSUB -J "deeplab_v3_contrast"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes/
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/resnet101-imagenet.pth ${TMPDIR}/resnet101-imagenet.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_r_101_d_8_deeplabv3_contrast_train.sh train 'deeplab_v3_contrast' ${TMPDIR} ${SCRATCH_DIR} 'ss'
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_contrast_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_contrast_train.sh
new file mode 100644
index 0000000..bfe4df8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_contrast_train.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/cityscapes"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="deeplab_v3_contrast"
+LOSS_TYPE="contrast_auxce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes/"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/resnet101-imagenet.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered y \
+                       --loss_balance n \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val --data_dir ${DATA_DIR}
+
+
+  cd lib/metrics
+  python -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_train.sh
new file mode 100644
index 0000000..47d8164
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/deeplab/run_r_101_d_8_deeplabv3_train.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/cityscapes"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="deeplab_v3"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes/"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/resnet101-imagenet.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered y \
+                       --loss_balance n \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val --data_dir ${DATA_DIR}
+
+
+  cd lib/metrics
+  python -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_train.sh
new file mode 100644
index 0000000..8529dd9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_train.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="fcnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_wo_dsn_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_wo_dsn_train.sh
new file mode 100644
index 0000000..49b00bc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/fcn/run_r_101_d_8_fcn_wo_dsn_train.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="fcnet_wo_dsn"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4.sh
new file mode 100644
index 0000000..d410ccc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=5000]"
+#BSUB -R "select[gpu_mtotal0>=10230]"
+#BSUB -J "hrnet_ce_80k"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+# activate env
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4.sh train 'hrnet_ce_80k' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast.sh
new file mode 100644
index 0000000..e49087d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=GeForceRTX2080Ti]"
+#BSUB -J "hrnet_contrast_dim512"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+# activate env
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_contrast.sh train 'hrnet_contrast_dim512' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast_mem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast_mem.sh
new file mode 100644
index 0000000..0d72d99
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_contrast_mem.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 72:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=TITANRTX]"
+#BSUB -J "citymemimagenet"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+## activate env
+#source /cluster/home/tiazhou/miniconda3/etc/profile.d/conda.sh
+#conda activate pytorch-1.7.1
+
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_contrast_mem.sh train 'hrnet_contrast_mem' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr.sh
new file mode 100644
index 0000000..be043c2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 72:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=GeForceRTX2080Ti]"
+#BSUB -J "ocr_40k"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_ocr.sh train 'ocr_40k' ${TMPDIR} ${SCRATCH_DIR} 'ss'
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr_contrast.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr_contrast.sh
new file mode 100644
index 0000000..fdcb647
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_ocr_contrast.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -n 8
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=2000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=TITANRTX]"
+#BSUB -J "ocr_contrast_40k"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_ocr_contrast.sh train 'ocr_contrast_40k' ${TMPDIR} ${SCRATCH_DIR} 'ss'
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_proto.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_proto.sh
new file mode 100644
index 0000000..aa05fe7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/job_run_h_48_d_4_proto.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 48:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=5000]"
+#BSUB -R "select[gpu_mtotal0>=10230]"
+#BSUB -J "hrnet_proto_80k"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+# activate env
+source ../../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/CityscapesZIP/openseg.tar ${TMPDIR}/
+mkdir ${TMPDIR}/Cityscapes
+tar -xf ${TMPDIR}/openseg.tar -C ${TMPDIR}/Cityscapes
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/hrnetv2_w48_imagenet_pretrained.pth ${TMPDIR}/hrnetv2_w48_imagenet_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_proto.sh train 'hrnet_proto_80k' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4.sh
new file mode 100644
index 0000000..742cfaf
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4.sh
@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/Cityscapes/seg_results/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes"
+CHECKPOINTS_NAME="${MODEL_NAME}_lr1x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=80000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast.sh
new file mode 100644
index 0000000..8667f60
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast.sh
@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/Cityscapes/seg_results/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_contrast"
+LOSS_TYPE="contrast_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes"
+CHECKPOINTS_NAME="${MODEL_NAME}_lr1x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast_mem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast_mem.sh
new file mode 100644
index 0000000..ca95516
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_contrast_mem.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/cityscapes"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4_MEM.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_mem"
+LOSS_TYPE="mem_contrast_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes/"
+CHECKPOINTS_NAME="${MODEL_NAME}_paddle_lr2x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS_TEST} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main_contrastive.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR}\
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main_contrastive.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR}\
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr.sh
new file mode 100644
index 0000000..0862ad4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/Cityscapes/seg_results/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes/"
+CHECKPOINTS_NAME="${MODEL_NAME}_paddle_lr2x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$5"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b.sh
new file mode 100644
index 0000000..ffb1ec2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=80000
+BATCHSIZE=8
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --include_val y  \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_ohem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_ohem.sh
new file mode 100644
index 0000000..15e9b0f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_ohem.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MAX_ITERS=50000
+BATCH_SIZE=16
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${BN_TYPE}_${BATCH_SIZE}_${MAX_ITERS}_trainval_coarse_mapillary_pretrain_freeze_bn_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+# PRETRAINED_MODEL="./pretrained_model/hrnet_w48_ocr_b_mapillary_bs16_500000_1024x1024_lr0.01_1_latest.pth"
+PRETRAINED_MODEL="./checkpoints/cityscapes/hrnet_w48_ocr_b_hrnet48__8_120000_trainval_ohem_mapillary_miou_508_1_latest.pth" # miou=83.63 on test.
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --only_coarse y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --resume_strict False \
+                       --resume_eval_train False \
+                       --resume_eval_val False \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --base_lr 0.0001 \
+                       --test_interval 2000 \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --only_coarse y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.0001 \
+                       2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label 2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms_6x_depth
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_trainval_ohem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_trainval_ohem.sh
new file mode 100644
index 0000000..3944959
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_coarse_trainval_ohem.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MAX_ITERS=20000
+BATCH_SIZE=8
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${BATCH_SIZE}_${MAX_ITERS}_trainval_coarse_trainval_mapillary_pretrain_freeze_bn_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+# PRETRAINED_MODEL="./pretrained_model/hrnet_w48_ocr_b_mapillary_bs16_500000_1024x1024_lr0.01_1_latest.pth"
+# PRETRAINED_MODEL="./checkpoints/cityscapes/hrnet_w48_ocr_b_hrnet48__8_120000_trainval_ohem_mapillary_miou_508_1_latest.pth" # miou=83.63 on test.
+PRETRAINED_MODEL="./checkpoints/cityscapes/hrnet_w48_ocr_b_hrnet48__16_50000_trainval_coarse_mapillary_pretrain_1_latest.pth"
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --include_val y  \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --resume_strict False \
+                       --resume_eval_train False \
+                       --resume_eval_val False \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --base_lr 0.0001 \
+                       --test_interval 2000 \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.0001 \
+                       2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label 2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms_6x_depth
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_ohem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_ohem.sh
new file mode 100644
index 0000000..2d0a14e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_b_mapillary_trainval_ohem.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MAX_ITERS=120000
+BATCH_SIZE=8
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${BN_TYPE}_${BATCH_SIZE}_${MAX_ITERS}_trainval_ohem_mapillary_miou_508_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnet_w48_ocr_b_mapillary_bs16_500000_1024x1024_lr0.01_1_latest.pth"
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --resume_strict False \
+                       --resume_eval_train False \
+                       --resume_eval_val False \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --base_lr 0.001 \
+                       --test_interval 2000 \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.001 \
+                       2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label 2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms_6x_depth
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_contrast.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_contrast.sh
new file mode 100644
index 0000000..b09b8a7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_contrast.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/Cityscapes/seg_results/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr_contrast"
+LOSS_TYPE="contrast_auxce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes/"
+CHECKPOINTS_NAME="${MODEL_NAME}_lr1x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=40000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$5"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_ohem.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_ohem.sh
new file mode 100644
index 0000000..c652db5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_ohem.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+
+MAX_ITERS=80000
+BATCH_SIZE=8
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${BN_TYPE}_${BATCH_SIZE}_${MAX_ITERS}_OHEM09_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE}\
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth --resume_val y \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label  >> "../../"${LOG_FILE} 2>&1
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_trainval.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_trainval.sh
new file mode 100644
index 0000000..de5dd87
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_ocr_trainval.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_48_D_4.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MAX_ITERS=100000
+BATCH_SIZE=8
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${BN_TYPE}_${BATCH_SIZE}_${MAX_ITERS}_val_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --train_batch_size ${BATCH_SIZE} --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label_edge_void_20
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_proto.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_proto.sh
new file mode 100644
index 0000000..dd5e547
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/hrnet/run_h_48_d_4_proto.sh
@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/Cityscapes"
+SAVE_DIR="${SCRATCH_ROOT}/Cityscapes/seg_results/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/cityscapes/H_48_D_4_proto.json"
+CONFIGS_TEST="configs/cityscapes/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_proto"
+LOSS_TYPE="pixel_prototype_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/Cityscapes"
+CHECKPOINTS_NAME="${MODEL_NAME}_lr1x_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/Cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=80000
+BATCH_SIZE=8
+BASE_LR=0.01
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --base_lr ${BASE_LR} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --resume_continue y \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y  --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  python -m lib.metrics.cityscapes_evaluator --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/isa/run_r_101_d_8_isa_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/isa/run_r_101_d_8_isa_train.sh
new file mode 100644
index 0000000..34de9d0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/isa/run_r_101_d_8_isa_train.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="isanet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                      #  2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_aspoc_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_aspoc_train.sh
new file mode 100644
index 0000000..3892981
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_aspoc_train.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="asp_ocnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_baseoc_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_baseoc_train.sh
new file mode 100644
index 0000000..b937117
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_baseoc_train.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="base_ocnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+                        
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_pyramidoc_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_pyramidoc_train.sh
new file mode 100644
index 0000000..2db613a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocnet/run_r_101_d_8_pyramidoc_train.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="pyramid_ocnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_distribute_ocrnet.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_distribute_ocrnet.sh
new file mode 100644
index 0000000..92c1cf9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_distribute_ocrnet.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="ideal_distribute_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv5_val 
+                       # --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crf_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv3_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_gather_ocrnet.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_gather_ocrnet.sh
new file mode 100644
index 0000000..63f799e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_gather_ocrnet.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="ideal_gather_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv5_val 
+                       # --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crf_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv3_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet.sh
new file mode 100644
index 0000000..39d8905
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_test.json"
+
+MODEL_NAME="ideal_spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                      2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv5_val 
+                       # --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crf_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv3_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_b.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_b.sh
new file mode 100644
index 0000000..ea4ea47
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_b.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="ideal_spatial_ocrnet_b"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv5_val 
+                       # --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crf_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv3_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_c.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_c.sh
new file mode 100644
index 0000000..b22b716
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_ideal_spatial_ocrnet_c.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="ideal_spatial_ocrnet_c"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv5_val 
+                       # --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crf_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_crfv3_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+
+elif [ "$1"x == "test"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/test --log_to_file n --out_dir test 2>&1 | tee -a ${LOG_FILE}
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_asp_ocrnet_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_asp_ocrnet_train.sh
new file mode 100644
index 0000000..5fcb973
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_asp_ocrnet_train.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_asp_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_train.sh
new file mode 100644
index 0000000..646feda
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_train.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=40000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_trainval.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_trainval.sh
new file mode 100644
index 0000000..a472759
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_r_101_d_8_ocrnet_trainval.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=120000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --include_val y
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --include_val y  \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse.sh
new file mode 100644
index 0000000..c910954
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_trainval_12w_coarse_10w_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+
+MAX_ITERS=100000
+PRETRAINED_MODEL="./checkpoints/cityscapes/spatial_ocrnet_deepbase_resnet101_dilated8_trainval_1_latest.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --only_coarse y \
+                       --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --only_coarse y --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label  >> "../../"${LOG_FILE} 2>&1
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse_trainval.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse_trainval.sh
new file mode 100644
index 0000000..05c8cd6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_coarse_trainval.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}_ohem.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxohemce_loss"
+
+MAX_ITERS=10000
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_trainval_12w_coarse_10w_trainval_1w_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+# PRETRAINED_MODEL="./checkpoints/cityscapes/spatial_ocrnet_deepbase_resnet101_dilated8_trainval_coarse_1_latest.pth"
+PRETRAINED_MODEL="./checkpoints/cityscapes/spatial_ocrnet_deepbase_resnet101_dilated8_trainval_12w_coarse_10w_1_latest.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --include_val y \
+                       --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --include_val y --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label  >> "../../"${LOG_FILE} 2>&1
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+  
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary.sh
new file mode 100644
index 0000000..1ec05dc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}_ohem.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_trainval_mapillary_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+
+MAX_ITERS=50000
+PRETRAINED_MODEL="./checkpoints/cityscapes/spatial_ocrnet_deepbase_resnet101_dilated8_trainval_1_latest.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --only_mapillary y \
+                       --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --only_mapillary y --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label  >> "../../"${LOG_FILE} 2>&1
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary_coarse.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary_coarse.sh
new file mode 100644
index 0000000..988253e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/ocrnet/run_spatial_ocrnet_trainval_mapillary_coarse.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnet101_dilated8"
+CONFIGS="configs/cityscapes/${BACKBONE}_ohem.json"
+CONFIGS_TEST="configs/cityscapes/${BACKBONE}_test.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_trainval_mapillary_coarse_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+
+MAX_ITERS=50000
+PRETRAINED_MODEL="./checkpoints/cityscapes/spatial_ocrnet_deepbase_resnet101_dilated8_trainval_mapillary_1_latest.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --only_coarse y \
+                       --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --resume ${PRETRAINED_MODEL} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --only_coarse y --base_lr 0.0001 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+
+elif [ "$1"x == "debug"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --phase debug --gpu 0 --log_to_file n  2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image --log_to_file n --out_dir val 2>&1 | tee -a ${LOG_FILE}
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ../../results/cityscapes/test_dir/${CHECKPOINTS_NAME}/val/label \
+                                       --gt_dir ${DATA_DIR}/val/label  >> "../../"${LOG_FILE} 2>&1
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/resnest/run_r_101_d_8_fcn.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/resnest/run_r_101_d_8_fcn.sh
new file mode 100644
index 0000000..28bc88b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/resnest/run_r_101_d_8_fcn.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+BACKBONE="deepbase_resnest101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="fcnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnest101-966fb78c.pth"
+MAX_ITERS=40000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE} 
+                       
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "val"x ]; then
+  # ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+  #                      --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+  #                      --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+  #                      --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+  #                      --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  # evaluate the mIoU
+  # ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+  #                                      --gt_dir ${DATA_DIR}/val/label
+  
+  # evaluate the boundary F-score
+  ${PYTHON} -u cityscapes_fscore_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix.py b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix.py
new file mode 100644
index 0000000..cc538ee
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+import scipy.io as io
+import subprocess
+import multiprocessing.pool as mpp
+
+DATA_ROOT = subprocess.check_output(
+    ['bash', '-c', "source config.profile; echo $DATA_ROOT"]
+).decode().strip()
+
+import os
+import sys
+import argparse
+import os.path as osp
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+class LabelTransformer:
+
+    label_list = [7, 8, 11, 12, 13, 17, 19, 20,
+                  21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33]
+
+    @staticmethod
+    def encode(labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(
+            shape=(shape[0], shape[1]), dtype=np.int) * 255
+        for i in range(len(LabelTransformer.label_list)):
+            class_id = LabelTransformer.label_list[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        return encoded_labelmap
+
+    @staticmethod
+    def decode(labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(
+            shape=(shape[0], shape[1]), dtype=np.uint8) * 255
+        for i in range(len(LabelTransformer.label_list)):
+            class_id = i
+            encoded_labelmap[labelmap ==
+                             class_id] = LabelTransformer.label_list[i]
+
+        return encoded_labelmap
+
+
+def gen_coord_map(H, W):
+    coord_vecs = [torch.arange(length, dtype=torch.float) for length in (H, W)]
+    coord_h, coord_w = torch.meshgrid(coord_vecs)
+    return coord_h, coord_w
+
+def shift(x, offset):
+    """
+    x: h x w
+    offset: 2 x h x w
+    """
+    h, w = x.shape
+    x = torch.from_numpy(x).unsqueeze(0)
+    offset = torch.from_numpy(offset).unsqueeze(0)
+    coord_map = gen_coord_map(h, w)
+    norm_factor = torch.FloatTensor([(w-1)/2, (h-1)/2])
+    grid_h = offset[:, 0]+coord_map[0]
+    grid_w = offset[:, 1]+coord_map[1]
+    grid = torch.stack([grid_w, grid_h], dim=-1) / norm_factor - 1
+    x = F.grid_sample(x.unsqueeze(1).float(), grid, padding_mode='border', mode='bilinear').squeeze().numpy()
+    x = np.round(x)
+    return x.astype(np.uint8)
+
+def get_offset(basename):
+    return io.loadmat(osp.join(offset_dir, basename+'.mat'))['mat']\
+        .astype(np.float32).transpose(2, 0, 1) * args.scale
+
+def process(basename):
+    infile = osp.join(in_label_dir, basename + '.png')
+    outfile = osp.join(out_label_dir, basename + '.png')
+
+    input_label_map = np.array(Image.open(infile).convert('P'))
+    input_label_map = LabelTransformer.encode(input_label_map)
+
+    offset_map = get_offset(basename)
+    output_label_map = shift(input_label_map, offset_map)
+    output_label_map = LabelTransformer.decode(output_label_map)
+    Image.fromarray(output_label_map).save(outfile)
+    print('Writing', outfile)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input')
+    parser.add_argument('--offset')
+    parser.add_argument('--out')
+    parser.add_argument('--split', choices=['val', 'test'], default='val')
+    parser.add_argument('--scale', type=float, default=2)
+    args = parser.parse_args()
+
+    if args.offset is None:
+        if args.split == 'val':
+            offset_dir = osp.join(DATA_ROOT, 'cityscapes', 'val', 'offset_pred', 'semantic', 'offset_hrnext')
+        else:
+            offset_dir = osp.join(DATA_ROOT, 'cityscapes', 'test_offset', 'semantic', 'offset_hrnext')
+    else:
+        offset_dir = args.offset
+
+    in_label_dir = args.input
+    if args.out is None:
+        if '/label' in in_label_dir:
+            out_label_dir = in_label_dir.replace('/label', '/label_w_segfix')
+        else:
+            out_label_dir = osp.join(in_label_dir, 'label_w_segfix')
+    else:
+        out_label_dir = args.out
+    print('Saving to', out_label_dir)
+
+    os.makedirs(out_label_dir, exist_ok=True)
+    input_args = [fn.rpartition('.')[0] for fn in os.listdir(in_label_dir)]
+    print(len(input_args), 'files in total.')
+    mpp.Pool().map(process, input_args)
+
+    if args.split == 'val':
+        os.system('{} lib/metrics/cityscapes_evaluator.py --gt_dir {}/cityscapes/val/label --pred_dir {}'.format(sys.executable, DATA_ROOT, out_label_dir))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix.sh
new file mode 100644
index 0000000..747fa46
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=80000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_inst.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_inst.sh
new file mode 100644
index 0000000..c46ac53
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_inst.sh
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+export sscrop=1
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset_inst_w_stuff"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=20000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_inst_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 1000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_trainval.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_trainval.sh
new file mode 100644
index 0000000..c051a9a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_h_48_d_4_segfix_trainval.sh
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet48"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=80000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_trainval_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix.sh
new file mode 100644
index 0000000..0bc6b17
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix.sh
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+export sscrop=1
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet2x20"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=80000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hr_rnet_bt_w20_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_inst.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_inst.sh
new file mode 100644
index 0000000..88bf2d5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_inst.sh
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+export sscrop=1
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset_inst_w_stuff"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet2x20"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=20000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_inst_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hr_rnet_bt_w20_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 1000 \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/instance/offset_${BACKBONE}/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_trainval.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_trainval.sh
new file mode 100644
index 0000000..b731581
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix/run_hx_20_d_2_segfix_trainval.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+export sscrop=1
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+BACKBONE="hrnet2x20"
+CONFIGS="configs/cityscapes/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=120000
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_trainval_"$2
+LOG_FILE="./log/cityscapes/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hr_rnet_bt_w20_imagenet_pretrained.pth"
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       --base_lr 0.04 \
+                       2>&1 | tee ${LOG_FILE} 
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --test_interval 2000 \
+                       --include_val y \
+                       --phase train --gathered n --loss_balance y --log_to_file n \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} --loss_type ${LOSS_TYPE} --gpu 0 1 2 3 \
+                       --resume_continue y --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "segfix_pred_val"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}_trainval/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_pred_test"x ]; then
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}_trainval/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop True
+
+elif [ "$1"x == "segfix_simple_pred_val"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}_trainval/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set val \
+                       test.sscrop False
+
+elif [ "$1"x == "segfix_simple_pred_test"x ]; then
+  export batch_size=4
+  OUT_DIR=$PWD/segfix_pred/cityscapes/test/semantic/offset_${BACKBONE}_trainval/
+  mkdir -p ${OUT_DIR}
+
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume $CKPT \
+                       --log_to_file n --loss_type $LOSS_TYPE \
+                       --out_dir ${OUT_DIR} \
+                       test.eval_set test \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_ade20k.py b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_ade20k.py
new file mode 100644
index 0000000..a66fac1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_ade20k.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+import scipy.io as io
+import subprocess
+import multiprocessing.pool as mpp
+
+DATA_ROOT = subprocess.check_output(
+    ['bash', '-c', "source config.profile; echo $DATA_ROOT"]
+).decode().strip()
+
+import os
+import sys
+import argparse
+import os.path as osp
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+class LabelTransformer:
+
+    label_list = list(range(1, 151))
+
+    @staticmethod
+    def encode(labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(
+            shape=(shape[0], shape[1]), dtype=np.int) * 255
+        for i in range(len(LabelTransformer.label_list)):
+            class_id = LabelTransformer.label_list[i]
+            encoded_labelmap[labelmap == class_id] = i
+
+        return encoded_labelmap
+
+    @staticmethod
+    def decode(labelmap):
+        labelmap = np.array(labelmap)
+
+        shape = labelmap.shape
+        encoded_labelmap = np.ones(
+            shape=(shape[0], shape[1]), dtype=np.uint8) * 255
+        for i in range(len(LabelTransformer.label_list)):
+            class_id = i
+            encoded_labelmap[labelmap ==
+                             class_id] = LabelTransformer.label_list[i]
+
+        return encoded_labelmap
+
+
+def gen_coord_map(H, W):
+    coord_vecs = [torch.arange(length, dtype=torch.float) for length in (H, W)]
+    coord_h, coord_w = torch.meshgrid(coord_vecs)
+    return coord_h, coord_w
+
+def shift(x, offset):
+    """
+    x: h x w
+    offset: 2 x h x w
+    """
+    h, w = x.shape
+    x = torch.from_numpy(x).unsqueeze(0)
+    offset = torch.from_numpy(offset).unsqueeze(0)
+    coord_map = gen_coord_map(h, w)
+    norm_factor = torch.FloatTensor([(w-1)/2, (h-1)/2])
+    grid_h = offset[:, 0]+coord_map[0]
+    grid_w = offset[:, 1]+coord_map[1]
+    grid = torch.stack([grid_w, grid_h], dim=-1) / norm_factor - 1
+    x = F.grid_sample(x.unsqueeze(1).float(), grid, padding_mode='border', mode='bilinear').squeeze().numpy()
+    x = np.round(x)
+    return x.astype(np.uint8)
+
+def get_offset(basename):
+    return io.loadmat(osp.join(offset_dir, basename+'.mat'))['mat']\
+        .astype(np.float32).transpose(2, 0, 1) * args.scale
+
+def process(basename):
+    infile = osp.join(in_label_dir, basename + '.png')
+    outfile = osp.join(out_label_dir, basename + '.png')
+
+    input_label_map = np.array(Image.open(infile).convert('P'))
+    input_label_map = LabelTransformer.encode(input_label_map)
+
+    offset_map = get_offset(basename)
+    output_label_map = shift(input_label_map, offset_map)
+    output_label_map = LabelTransformer.decode(output_label_map)
+    Image.fromarray(output_label_map).save(outfile)
+    print('Writing', outfile)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input')
+    parser.add_argument('--offset')
+    parser.add_argument('--out')
+    parser.add_argument('--split', choices=['val'], default='val')
+    parser.add_argument('--scale', type=float, default=2)
+    args = parser.parse_args()
+
+    if args.offset is None:
+        offset_dir = osp.join(DATA_ROOT, 'ade20k', 'val', 'offset_pred', 'semantic', 'offset_hrnext')
+    else:
+        offset_dir = args.offset
+
+    in_label_dir = args.input
+    if args.out is None:
+        if '/label' in in_label_dir:
+            out_label_dir = in_label_dir.replace('/label', '/label_w_segfix')
+        else:
+            out_label_dir = osp.join(in_label_dir, 'label_w_segfix')
+    else:
+        out_label_dir = args.out
+    print('Saving to', out_label_dir)
+
+    os.makedirs(out_label_dir, exist_ok=True)
+    input_args = [fn.rpartition('.')[0] for fn in os.listdir(in_label_dir)]
+    print(len(input_args), 'files in total.')
+    mpp.Pool().map(process, input_args)
+
+    if args.split == 'val':
+        os.system('{} lib/metrics/ade20k_evaluator.py --gt_dir {}/ade20k/val/label --pred_dir {}'.format(sys.executable, DATA_ROOT, out_label_dir))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_instance.py b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_instance.py
new file mode 100644
index 0000000..3852b84
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/cityscapes/segfix_instance.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Author: Jingyi Xie (hsfzxjy@gmail.com)
+"""
+
+import multiprocessing.pool as mpp
+import scipy.io as io
+import numpy as np
+from PIL import Image
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+
+import os
+import glob
+import shutil
+import os.path as osp
+import argparse
+import sys
+import time
+import cv2
+
+script_path = osp.abspath(osp.join(osp.dirname(__file__)))
+os.chdir(osp.join(script_path, '..', '..'))
+sys.path.insert(0, os.getcwd())
+os.environ['PYTHONPATH'] = os.getcwd() + ':' + os.environ.get('PYTHONPATH', '')
+
+def gen_coord_map(H, W):
+    coord_vecs = [torch.arange(length, dtype=torch.float) for length in (H, W)]
+    coord_h, coord_w = torch.meshgrid(coord_vecs)
+    return coord_h, coord_w
+
+
+def shift(x, offset):
+    """
+    x: c x h x w
+    offset: 2 x h x w
+    """
+
+    def do_shift(x, offset):
+        grid_h = offset[:, 0] + coord_map[0]
+        grid_w = offset[:, 1] + coord_map[1]
+        grid = torch.stack([grid_w, grid_h], dim=-1) / norm_factor - 1
+
+        x = F.grid_sample(
+            x, grid, padding_mode='border', mode='bilinear')
+
+        return x
+
+    c, h, w = x.shape
+    coord_map = gen_coord_map(h, w)
+    norm_factor = torch.FloatTensor([(w-1)/2, (h-1)/2])
+
+    x = torch.from_numpy(x).unsqueeze(0).float()
+
+    offset = torch.from_numpy(offset).unsqueeze(
+        0).clone() * args.scale
+    x = do_shift(x, offset)
+
+    return (x.squeeze(0).numpy() > 0.5).astype(np.uint8)
+
+
+def get_offset(basename):
+    return io.loadmat(osp.join(offset_dir, basename+'.mat'))['mat'].transpose(2, 0, 1).astype(np.float32)
+
+def process(filename):
+    infile = osp.join(in_dir, filename)
+    print('Processing', infile)
+
+    names = []
+    masks = []
+    with open(infile) as f:
+        for line in f:
+            name = line.strip().split()[0]
+            names.append(name)
+            mask = np.array(Image.open(osp.join(in_dir, name)).convert('P'))
+            masks.append(mask)
+
+    # Not that an image may have no instance prediction at all.
+    if masks:
+        masks = np.stack(masks, axis=0)
+        masks = (masks > 0).astype(np.uint8)
+
+        offset_map = get_offset(filename.replace('_pred.txt', ''))
+        output_masks = shift(masks, offset_map)
+    else:
+        output_masks = []
+
+    shutil.copy(infile, out_dir)
+    for name, mask in zip(names, output_masks):
+        out_name = osp.join(out_dir, name)
+        Image.fromarray(
+            mask * 255
+        ).save(out_name)
+
+
+def ensure_cityscapes_scripts():
+    """
+    Ensure that library `cityscapesscripts` is properly installed.
+
+    Note that the original implementation from https://github.com/mcordts/cityscapesScripts will
+    raise encoding error during installation. We then fork a copy for self-use.
+    """
+    try:
+        import cityscapesscripts
+    except ModuleNotFoundError:
+        os.system(
+            '{} -m pip install git+https://github.com/hsfzxjy/cityscapesScripts.git'.format(sys.executable))
+
+
+def evaluation(pred_dir):
+    """
+    See https://github.com/facebookresearch/detectron2/blob/d250fcc1b66d5a3686c15144480441b7abe31dec/detectron2/evaluation/cityscapes_evaluation.py#L80
+    """
+    os.environ["CITYSCAPES_DATASET"] = args.dataset_dir
+    ensure_cityscapes_scripts()
+    import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
+    cityscapes_eval.args.predictionPath = pred_dir
+    cityscapes_eval.args.predictionWalk = None
+    cityscapes_eval.args.JSONOutput = False
+    cityscapes_eval.args.colorized = False
+    cityscapes_eval.args.gtInstancesFile = os.path.join(
+        pred_dir, "gtInstances.json")
+
+    groundTruthImgList = glob.glob(cityscapes_eval.args.groundTruthSearch)
+    assert len(
+        groundTruthImgList
+    ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+        cityscapes_eval.args.groundTruthSearch
+    )
+    predictionImgList = []
+    for gt in groundTruthImgList:
+        predictionImgList.append(
+            cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
+    results = cityscapes_eval.evaluateImgLists(
+        predictionImgList, groundTruthImgList, cityscapes_eval.args
+    )["averages"]
+
+
+def copy_gt():
+    """
+    Copy ground-truth information to output directory.
+
+    The original file may use another dataset base dir, so we replace the keys 
+    with currently used one.
+    """
+    if not osp.isfile(osp.join(in_dir, 'gtInstances.json')):
+        return
+
+    import json
+    import re
+    with open(osp.join(in_dir, 'gtInstances.json')) as f:
+        content = json.load(f)
+    new_content = {}
+    target = osp.join(args.dataset_dir, 'gtFine')
+    if target.endswith('/'):
+        target = target[:-1]
+    for key, value in content.items():
+        key = re.sub(r'^.*?gtFine', target, key, 1)
+        new_content[key] = value
+    with open(osp.join(out_dir, 'gtInstances.json'), 'w') as f:
+        content = json.dump(new_content, f)
+
+
+if __name__ == '__main__':
+    print(
+'''======================= NOTE =======================
+To use this script, the name of your instance index 
+file should EXACTLY follow the scheme:
+
+    <city_name>_<id>_<id>_leftImg8bit_pred.txt
+
+e.g.
+
+    frankfurt_000001_042098_leftImg8bit_pred.txt
+
+. Otherwise the script may not function correctly.
+====================================================
+'''
+)
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input')
+    parser.add_argument('--offset')
+    parser.add_argument('--split', choices=['val', 'test'], default='val')
+    parser.add_argument('--scale', type=float, default=1)
+    parser.add_argument(
+        '--dataset_dir', default='/msravcshare/dataset/original_cityscapes/')
+    parser.add_argument('--out')
+    parser.add_argument('--eval_only', action='store_true')
+    args = parser.parse_args()
+
+    in_dir = args.input
+    if args.offset is None:
+        if args.split == 'val':
+            offset_dir = osp.join(DATA_ROOT, 'cityscapes', 'val', 'offset_pred', 'instance', 'offset_hrnext')
+        else:
+            offset_dir = osp.join(DATA_ROOT, 'cityscapes', 'test_offset', 'instance', 'offset_hrnext')
+    else:
+        offset_dir = args.offset
+    if args.out is not None:
+        out_dir = args.out
+    else:
+        out_dir = osp.join(in_dir, 'label_w_segfix')
+
+    os.makedirs(out_dir, exist_ok=True)
+    input_args = [fn for fn in os.listdir(in_dir) if fn.endswith('pred.txt')]
+    print(len(input_args), 'files in total.')
+    copy_gt()
+    if not args.eval_only:
+        mpp.Pool(processes=None).map(process, input_args)
+    if args.split == 'val':
+        evaluation(out_dir)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/job_run_h_48_d_4_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/job_run_h_48_d_4_paddle.sh
new file mode 100644
index 0000000..dde2f07
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/job_run_h_48_d_4_paddle.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 72:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=TITANRTX]"
+#BSUB -J "hrnet_contrast_paddle"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+## activate env
+#source /cluster/home/tiazhou/miniconda3/etc/profile.d/conda.sh
+#conda activate pytorch-1.7.1
+
+source ../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/cocostuff.tar ${TMPDIR}/
+tar -xf ${TMPDIR}/cocostuff.tar -C ${TMPDIR}/
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/HRNet_W48_C_ssld_pretrained.pth ${TMPDIR}/HRNet_W48_C_ssld_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_paddle.sh val 'hrnet_paddle' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_isa_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_isa_train.sh
new file mode 100644
index 0000000..09b14de
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_isa_train.sh
@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet48_isa"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train.sh
new file mode 100644
index 0000000..d30f384
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_ohem_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+BATCH_SIZE=16
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --test_interval ${MAX_ITERS} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u cocostuff_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_paddle.sh
new file mode 100644
index 0000000..7c10e5c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_paddle.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+#PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
+#DATA_ROOT="/home/yuhui/teamdrive/dataset"
+# check the enviroment info
+
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxohemce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_ohem_paddle_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+BATCH_SIZE=16
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --test_interval ${MAX_ITERS} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    ${PYTHON} -u main.py --configs ${CONFIGS} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 \
+                          --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss
+
+    cd lib/metrics
+    ${PYTHON} -u cocostuff_evaluator.py --configs ../../${CONFIGS} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss/label \
+                                      --gt_dir ${DATA_DIR}/val/label
+  else
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+    cd lib/metrics
+    ${PYTHON} -u cocostuff_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                      --gt_dir ${DATA_DIR}/val/label 
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_rmi_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_rmi_paddle.sh
new file mode 100644
index 0000000..00f1fe5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_ohem_train_rmi_paddle.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
+# DATA_ROOT="/home/yuhui/teamdrive/dataset"
+# DATA_ROOT=$3
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4_RMI.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_aux_rmi_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_ohem_rmi_paddle_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+BATCH_SIZE=16
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       --test_interval 10000 \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    ${PYTHON} -u main.py --configs ${CONFIGS} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 \
+                          --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss
+
+    cd lib/metrics
+    ${PYTHON} -u cocostuff_evaluator.py --configs ../../${CONFIGS} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss/label \
+                                      --gt_dir ${DATA_DIR}/val/label
+  else
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+    cd lib/metrics
+    ${PYTHON} -u cocostuff_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                      --gt_dir ${DATA_DIR}/val/label 
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_train.sh
new file mode 100644
index 0000000..7d5a2f8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_ocr_train.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_paddle.sh
new file mode 100644
index 0000000..5dbb82e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_paddle.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/cocostuff"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/coco_stuff/"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+BATCH_SIZE=1
+
+
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --distributed \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --test_interval 10000 \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 \
+                       --val_batch_size ${BATCH_SIZE} \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss
+
+#  cd lib/metrics
+#  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+#                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+#                                   --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_train.sh
new file mode 100644
index 0000000..527d958
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_h_48_d_4_train.sh
@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/coco_stuff/H_48_D_4.json"
+CONFIGS_TEST="configs/coco_stuff/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_gt_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_gt_ocr_train.sh
new file mode 100644
index 0000000..5c6f096
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_gt_ocr_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/coco_stuff/R_101_D_8.json"
+CONFIGS_TEST="configs/coco_stuff/R_101_D_8_TEST.json"
+
+MODEL_NAME="ideal_spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_ocr_train.sh
new file mode 100644
index 0000000..3993067
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_ocr_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/coco_stuff/R_101_D_8.json"
+CONFIGS_TEST="configs/coco_stuff/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_train.sh
new file mode 100644
index 0000000..49746d9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/coco_stuff/run_r_101_d_8_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/coco_stuff_10k"
+SAVE_DIR="${DATA_ROOT}/seg_result/coco_stuff/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/coco_stuff/R_101_D_8.json"
+CONFIGS_TEST="configs/coco_stuff/R_101_D_8_TEST.json"
+
+MODEL_NAME="fcnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/coco_stuff/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/coco_stuff/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_isa_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_isa_train.sh
new file mode 100644
index 0000000..4406e77
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_isa_train.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_isa"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_b_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_b_train.sh
new file mode 100644
index 0000000..1018cd7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_b_train.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train.sh
new file mode 100644
index 0000000..ddb8a43
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_paddle.sh
new file mode 100644
index 0000000..574b0b7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_paddle.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_paddle_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_rmi_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_rmi_paddle.sh
new file mode 100644
index 0000000..79d0af3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_ocr_train_rmi_paddle.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+DATA_ROOT=$3
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4_RMI.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_rmi_paddle_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_train.sh
new file mode 100644
index 0000000..300bf75
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_h_48_d_4_train.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/lip/H_48_D_4.json"
+CONFIGS_TEST="configs/lip/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_gt_ocrnet_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_gt_ocrnet_train.sh
new file mode 100644
index 0000000..490fecd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_gt_ocrnet_train.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="deepbase_resnet101_dilated16"
+
+CONFIGS="configs/lip/R_101_D_16.json"
+
+MODEL_NAME="ce2p_ideal_ocrnet"
+LOSS_TYPE="ce2p_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_ocr_train.sh
new file mode 100644
index 0000000..d3f49bf
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/lip/run_r_101_d_16_ce2p_ocr_train.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/lip"
+SAVE_DIR="${DATA_ROOT}/seg_result/lip/"
+BACKBONE="deepbase_resnet101_dilated16"
+
+CONFIGS="configs/lip/R_101_D_16.json"
+
+MODEL_NAME="ce2p_ocrnet"
+LOSS_TYPE="ce2p_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/lip/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=100000
+BATCHSIZE=32
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val 
+
+  cd lib/metrics
+  ${PYTHON} -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/lip/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/mapillary/run_h_48_d_4_ocr_b.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/mapillary/run_h_48_d_4_ocr_b.sh
new file mode 100644
index 0000000..0959fdd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/mapillary/run_h_48_d_4_ocr_b.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/mapillary-vista-v1.1"
+SAVE_DIR="${DATA_ROOT}/seg_result/mapillary/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/mapillary/H_48_D_4_1024x1024.json"
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_mapillary_"$2
+LOG_FILE="./log/mapillary/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=500000
+BATCH_SIZE=16
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE}
+                       --base_lr 0.02 \
+                       --test_interval 10000 \
+                       2>&1 | tee ${LOG_FILE}
+
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --include_val y  \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --train_batch_size ${BATCH_SIZE}
+                        2>&1 | tee -a ${LOG_FILE}
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_h_48_d_4_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_h_48_d_4_paddle.sh
new file mode 100644
index 0000000..e87912d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_h_48_d_4_paddle.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 24:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=TITANRTX]"
+#BSUB -J "hrnet_paddle"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+## activate env
+#source /cluster/home/tiazhou/miniconda3/etc/profile.d/conda.sh
+#conda activate pytorch-1.7.1
+
+source ../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/pascalcontext.tar ${TMPDIR}/
+tar -xf ${TMPDIR}/pascalcontext.tar -C ${TMPDIR}/
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/HRNet_W48_C_ssld_pretrained.pth ${TMPDIR}/HRNet_W48_C_ssld_pretrained.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_h_48_d_4_paddle.sh val 'hrnet_paddle' ${TMPDIR} ${SCRATCH_DIR}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_r_101_d_8_deeplabv3.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_r_101_d_8_deeplabv3.sh
new file mode 100644
index 0000000..339fbff
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/job_run_r_101_d_8_deeplabv3.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+#BSUB -n 16
+#BSUB -W 72:00
+#BSUB -R "rusage[mem=4000,ngpus_excl_p=4,scratch=10000]"
+#BSUB -R "select[gpu_model0=TITANRTX]"
+#BSUB -J "deeplab_v3"
+#BSUB -B
+#BSUB -N
+#BSUB -oo logs/
+
+# activate env
+#source /cluster/home/tiazhou/miniconda3/etc/profile.d/conda.sh
+#conda activate pytorch-1.7.1
+
+source ../../../pytorch-1.7.1/bin/activate
+
+# copy data
+rsync -aP /cluster/work/cvl/tiazhou/data/pascalcontext.tar ${TMPDIR}/
+tar -xf ${TMPDIR}/pascalcontext.tar -C ${TMPDIR}/
+
+# copy assets
+rsync -aP /cluster/work/cvl/tiazhou/assets/openseg/resnet101-imagenet.pth ${TMPDIR}/resnet101-imagenet.pth
+
+# define scratch dir
+SCRATCH_DIR="/cluster/scratch/tiazhou/Openseg"
+
+sh run_r_101_d_8_deeplabv3_train.sh train 'deeplab_v3' ${TMPDIR} ${SCRATCH_DIR} 'ss'
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_isa_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_isa_train.sh
new file mode 100644
index 0000000..e285118
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_isa_train.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_isa"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_b_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_b_train.sh
new file mode 100644
index 0000000..ccd534c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_b_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr_b"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train.sh
new file mode 100644
index 0000000..4c36ddb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_paddle.sh
new file mode 100644
index 0000000..019260c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_paddle.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
+# DATA_ROOT="/home/yuhui/teamdrive/dataset"
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_paddle_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       --test_interval ${MAX_ITERS} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    ${PYTHON} -u main.py --configs ${CONFIGS} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss
+
+    cd lib/metrics
+    ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss/label \
+                                      --gt_dir ${DATA_DIR}/val/label  
+  else
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+    cd lib/metrics
+    ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                      --gt_dir ${DATA_DIR}/val/label  
+  fi
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_rmi_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_rmi_paddle.sh
new file mode 100644
index 0000000..656f031
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_ocr_train_rmi_paddle.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
+# DATA_ROOT=$3
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4_RMI.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48_ocr"
+LOSS_TYPE="fs_aux_rmi_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_rmi_paddle_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 4 5 6 7 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --distributed \
+                       --test_interval 10000 \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    ${PYTHON} -u main.py --configs ${CONFIGS} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss
+
+    cd lib/metrics
+    ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ss/label \
+                                      --gt_dir ${DATA_DIR}/val/label  
+  else
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                          --data_dir ${DATA_DIR} \
+                          --backbone ${BACKBONE} \
+                          --model_name ${MODEL_NAME} \
+                          --checkpoints_name ${CHECKPOINTS_NAME} \
+                          --phase test \
+                          --gpu 0 1 2 3 4 5 6 7 \
+                          --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                          --test_dir ${DATA_DIR}/val/image \
+                          --log_to_file n \
+                          --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+    cd lib/metrics
+    ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                      --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                      --gt_dir ${DATA_DIR}/val/label  
+  fi
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/val/image --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_paddle.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_paddle.sh
new file mode 100644
index 0000000..6d1394b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_paddle.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/pascalcontext"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_ce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/pascal_context/"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/HRNet_W48_C_ssld_pretrained.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --distributed \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --test_interval 10000 \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ${CHECKPOINTS_ROOT}/checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+#  cd lib/metrics
+#  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+#                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+#                                   --gt_dir ${DATA_DIR}/val/label
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_train.sh
new file mode 100644
index 0000000..7c567e2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_h_48_d_4_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="hrnet48"
+
+CONFIGS="configs/pascal_context/H_48_D_4.json"
+CONFIGS_TEST="configs/pascal_context/H_48_D_4_TEST.json"
+
+MODEL_NAME="hrnet_w48"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hrnetv2_w48_imagenet_pretrained.pth"
+MAX_ITERS=60000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_aspocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_aspocr_train.sh
new file mode 100644
index 0000000..ba7b476
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_aspocr_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/pascal_context/R_101_D_8.json"
+CONFIGS_TEST="configs/pascal_context/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_asp_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=30000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_baseoc_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_baseoc_train.sh
new file mode 100644
index 0000000..dba4009
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_baseoc_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/pascal_context/R_101_D_8.json"
+CONFIGS_TEST="configs/pascal_context/R_101_D_8_TEST.json"
+
+MODEL_NAME="base_ocnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=30000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_deeplabv3_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_deeplabv3_train.sh
new file mode 100644
index 0000000..ede09a0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_deeplabv3_train.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/pascalcontext"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/cityscapes/R_101_D_8.json"
+CONFIGS_TEST="configs/cityscapes/R_101_D_8_TEST.json"
+
+MODEL_NAME="deeplab_v3"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_ROOT="${SCRATCH_ROOT}/pascal_context/"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="${SCRATCH_ROOT}/logs/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="${ASSET_ROOT}/resnet101-imagenet.pth"
+MAX_ITERS=60000
+BATCH_SIZE=16
+
+if [ "$1"x == "train"x ]; then
+  python -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_root ${CHECKPOINTS_ROOT} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --train_batch_size ${BATCH_SIZE} \
+                       --distributed \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  python -u main_contrastive.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --phase train \
+                       --gathered y \
+                       --loss_balance n \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+elif [ "$1"x == "val"x ]; then
+  python -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                       --loss_type ${LOSS_TYPE} --test_dir ${DATA_DIR}/val/image \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val --data_dir ${DATA_DIR}
+
+
+  cd lib/metrics
+  python -u cityscapes_evaluator.py --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val/label  \
+                                       --gt_dir ${DATA_DIR}/val/label
+
+elif [ "$1"x == "segfix"x ]; then
+  if [ "$3"x == "test"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split test \
+      --offset ${DATA_ROOT}/cityscapes/test_offset/semantic/offset_hrnext/
+  elif [ "$3"x == "val"x ]; then
+    DIR=${SAVE_DIR}${CHECKPOINTS_NAME}_val/label
+    echo "Applying SegFix for $DIR"
+    ${PYTHON} scripts/cityscapes/segfix.py \
+      --input $DIR \
+      --split val \
+      --offset ${DATA_ROOT}/cityscapes/val/offset_pred/semantic/offset_hrnext/
+  fi
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$5"x == "ss"x ]; then
+    echo "[single scale] test"
+    python -u main.py --configs ${CONFIGS} --drop_last y --data_dir ${DATA_DIR} \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ${CHECKPOINTS_ROOT}/checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    python -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/cityscapes/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_gt_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_gt_ocr_train.sh
new file mode 100644
index 0000000..087fa05
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_gt_ocr_train.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/pascal_context/R_101_D_8.json"
+CONFIGS_TEST="configs/pascal_context/R_101_D_8_TEST.json"
+
+MODEL_NAME="ideal_spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=30000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --use_ground_truth \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --use_ground_truth \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_ocr_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_ocr_train.sh
new file mode 100644
index 0000000..406bcdb
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_ocr_train.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+
+# check the enviroment info
+nvidia-smi
+
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+
+export PYTHONPATH="$PWD":$PYTHONPATH
+
+DATA_DIR="${DATA_ROOT}/pascal_context"
+SAVE_DIR="${DATA_ROOT}/seg_result/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/pascal_context/R_101_D_8.json"
+CONFIGS_TEST="configs/pascal_context/R_101_D_8_TEST.json"
+
+MODEL_NAME="spatial_ocrnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=30000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_train.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_train.sh
new file mode 100644
index 0000000..8e27bd0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/pascal_context/run_r_101_d_8_train.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+
+
+DATA_ROOT=$3
+SCRATCH_ROOT=$4
+ASSET_ROOT=${DATA_ROOT}
+
+DATA_DIR="${DATA_ROOT}/pascalcontext"
+SAVE_DIR="${SCRATCH_ROOT}/seg_results/pascal_context/"
+BACKBONE="deepbase_resnet101_dilated8"
+
+CONFIGS="configs/pascal_context/R_101_D_8.json"
+CONFIGS_TEST="configs/pascal_context/R_101_D_8_TEST.json"
+
+MODEL_NAME="fcnet"
+LOSS_TYPE="fs_auxce_loss"
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_"$2
+LOG_FILE="./log/pascal_context/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/resnet101-imagenet.pth"
+MAX_ITERS=30000
+
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       2>&1 | tee ${LOG_FILE}
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --nbb_mult 10 \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                        2>&1 | tee -a ${LOG_FILE}
+
+
+elif [ "$1"x == "val"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS_TEST} \
+                       --data_dir ${DATA_DIR} \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test \
+                       --gpu 0 1 2 3 \
+                       --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                       --test_dir ${DATA_DIR}/val/image \
+                       --log_to_file n \
+                       --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms
+
+  cd lib/metrics
+  ${PYTHON} -u ade20k_evaluator.py --configs ../../${CONFIGS_TEST} \
+                                   --pred_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_val_ms/label \
+                                   --gt_dir ${DATA_DIR}/val/label  
+
+
+elif [ "$1"x == "test"x ]; then
+  if [ "$3"x == "ss"x ]; then
+    echo "[single scale] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ss
+  else
+    echo "[multiple scale + flip] test"
+    ${PYTHON} -u main.py --configs ${CONFIGS_TEST} --drop_last y \
+                         --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                         --phase test --gpu 0 1 2 3 --resume ./checkpoints/pascal_context/${CHECKPOINTS_NAME}_latest.pth \
+                         --test_dir ${DATA_DIR}/test --log_to_file n \
+                         --out_dir ${SAVE_DIR}${CHECKPOINTS_NAME}_test_ms
+  fi
+
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/scripts/segfix/run_hx_20_d_2_cityscapes_ade20k.sh b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/segfix/run_hx_20_d_2_cityscapes_ade20k.sh
new file mode 100644
index 0000000..08e623a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/scripts/segfix/run_hx_20_d_2_cityscapes_ade20k.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+cd $SCRIPTPATH
+cd ../../
+. config.profile
+# check the enviroment info
+nvidia-smi
+export PYTHONPATH="$PWD":$PYTHONPATH
+${PYTHON} -m pip install yacs
+${PYTHON} -m pip install torchcontrib
+${PYTHON} -m pip install git+https://github.com/lucasb-eyer/pydensecrf.git
+DATA_DIR="${DATA_ROOT}/cityscapes"
+SAVE_DIR="${DATA_ROOT}/seg_result/cityscapes/"
+
+export sscrop=1
+
+if [ -z $dt_max_distance ]; then
+  export dt_max_distance=5
+fi
+
+echo dt_max_distance: $dt_max_distance
+
+export dt_num_classes=8
+
+echo dt_num_classes: $dt_num_classes
+
+if [ -z $offset_dir ]; then
+  offset_dir="offset_gt/dt_offset"
+fi
+
+export offset_dir=$offset_dir
+echo offset_dir: $offset_dir
+
+BACKBONE="hrnet2x20"
+CONFIGS="configs/segfix/H_SEGFIX.json"
+
+MODEL_NAME="segfix_hrnet"
+LOSS_TYPE="segfix_loss"
+MAX_ITERS=100000
+LR=0.04
+BATCH_SIZE=16
+
+CHECKPOINTS_NAME="${MODEL_NAME}_${BACKBONE}_${LOSS_TYPE}_"$2
+LOG_FILE="./log/segfix/${CHECKPOINTS_NAME}.log"
+echo "Logging to $LOG_FILE"
+mkdir -p `dirname $LOG_FILE`
+
+PRETRAINED_MODEL="./pretrained_model/hr_rnet_bt_w20_imagenet_pretrained.pth"
+
+DATA_DIR="${DATA_ROOT}/cityscapes ${DATA_ROOT}/ade20k"
+CHILD_CONFIGS="['configs/cityscapes/H_SEGFIX.json', 'configs/ade20k/H_SEGFIX.json']"
+
+if [ "$1"x == "train"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --base_lr $LR \
+                       --train_batch_size $BATCH_SIZE \
+                       --val_batch_size $BATCH_SIZE \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --gpu 0 1 2 3 \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --max_iters ${MAX_ITERS} \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --pretrained ${PRETRAINED_MODEL} \
+                       --test_interval 1000 \
+                       \
+                       child_config_files "${CHILD_CONFIGS}" \
+                       use_adaptive_transform True \
+                       2>&1 | tee ${LOG_FILE} 
+                       
+
+elif [ "$1"x == "resume"x ]; then
+  ${PYTHON} -u main.py --configs ${CONFIGS} \
+                       --drop_last y \
+                       --train_batch_size $BATCH_SIZE \
+                       --val_batch_size $BATCH_SIZE \
+                       --phase train \
+                       --gathered n \
+                       --loss_balance y \
+                       --log_to_file n \
+                       --backbone ${BACKBONE} \
+                       --model_name ${MODEL_NAME} \
+                       --max_iters ${MAX_ITERS} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type ${LOSS_TYPE} \
+                       --gpu 0 1 2 3 \
+                       --resume_continue y \
+                       --resume ./checkpoints/segfix/${CHECKPOINTS_NAME}_latest.pth \
+                       --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --test_interval 1000 \
+                       \
+                       child_config_files "${CHILD_CONFIGS}" \
+                       use_adaptive_transform True \
+                       2>&1 | tee -a ${LOG_FILE} 
+
+elif [ "$1"x == "test_offset"x ]; then
+  if [ -z "$3" ]; then
+    CKPT=./checkpoints/segfix/${CHECKPOINTS_NAME}_latest.pth
+  else
+    CKPT=$3
+  fi
+
+  OUT_DIR=$PWD/segfix_pred/cityscapes/semantic/offset_${BACKBONE}_joint/
+  mkdir -p ${OUT_DIR}
+  DATA_DIR="/msravcshare/dataset/cityscapes"
+  CONFIGS="configs/cityscapes/H_48_D_4_DT_OFFSET.json"
+
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume .${CKPT} \
+                      --log_to_file n --out_dir ${OUT_DIR} \
+                       --loss_type $LOSS_TYPE --data_dir ${DATA_DIR} \
+                       test.eval_set val \
+                       test.sscrop True
+
+  ####################################################################
+
+  OUT_DIR=$PWD/segfix_pred/ade20k/semantic/offset_${BACKBONE}_joint/
+  mkdir -p ${OUT_DIR}
+  DATA_DIR="/msravcshare/dataset/ade20k"
+  CONFIGS="configs/ade20k/H_48_D_4_DT_OFFSET.json"
+
+  ${PYTHON} -u main.py --configs ${CONFIGS} --drop_last y \
+                       --backbone ${BACKBONE} --model_name ${MODEL_NAME} --checkpoints_name ${CHECKPOINTS_NAME} \
+                       --phase test_offset --gpu 0 1 2 3 --resume ${CKPT} \
+                       --log_to_file n --out_dir ${OUT_DIR} \
+                       --data_dir ${DATA_DIR} \
+                       --loss_type $LOSS_TYPE \
+                       val.data_transformer.size_mode diverse_size \
+                       test.eval_set val \
+                       test.sscrop False
+
+else
+  echo "$1"x" is invalid..."
+fi
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester.py
new file mode 100644
index 0000000..d1e7785
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester.py
@@ -0,0 +1,1283 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret, LayneH, Donny You
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import timeit
+import pdb
+import cv2
+import scipy
+import collections
+
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.protoseg_core.lib.utils.helpers.file_helper import FileHelper
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.utils.tools.average_meter import AverageMeter
+from models.protoseg_core.lib.datasets.data_loader import DataLoader
+from models.protoseg_core.lib.loss.loss_manager import LossManager
+from models.protoseg_core.lib.models.model_manager import ModelManager
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.metrics.running_score import RunningScore
+from models.protoseg_core.lib.vis.seg_visualizer import SegVisualizer
+from models.protoseg_core.lib.vis.palette import get_cityscapes_colors, get_ade_colors, get_lip_colors, get_camvid_colors
+from models.protoseg_core.lib.vis.palette import get_pascal_context_colors, get_cocostuff_colors, get_pascal_voc_colors, get_autonue21_colors
+from models.protoseg_core.segmentor.tools.module_runner import ModuleRunner
+from models.protoseg_core.segmentor.tools.optim_scheduler import OptimScheduler
+from scipy import ndimage
+from PIL import Image
+from math import ceil
+
+
+class Tester_inference(object):
+    """
+      The class for Pose Estimation. Include train, val, val & predict.
+    """
+
+    def __init__(self, configer):
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.seg_visualizer = SegVisualizer(configer)
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.optim_scheduler = OptimScheduler(configer)
+        self.seg_data_loader = DataLoader(configer)
+        self.save_dir = self.configer.get('test', 'out_dir')
+        self.seg_net = None
+        self.test_loader = None
+        self.test_size = None
+        self.infer_time = 0
+        self.infer_cnt = 0
+        self._init_model()
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        if 'test' in self.save_dir:
+            self.test_loader = self.seg_data_loader.get_testloader()
+            self.test_size = len(self.test_loader) * self.configer.get('test', 'batch_size')
+        else:
+            self.test_loader = self.seg_data_loader.get_valloader()
+            self.test_size = len(self.test_loader) * self.configer.get('val', 'batch_size')
+
+        self.seg_net.eval()
+
+    def __relabel(self, label_map):
+        height, width = label_map.shape
+        label_dst = np.zeros((height, width), dtype=np.uint8)
+        for i in range(self.configer.get('data', 'num_classes')):
+            label_dst[label_map == i] = self.configer.get('data', 'label_list')[i]
+
+        label_dst = np.array(label_dst, dtype=np.uint8)
+
+        return label_dst
+
+    def test_deep_exemplar(self, I_list=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        start_time = time.time()
+        image_id = 0
+
+        Log.info('save dir {}'.format(self.save_dir))
+        FileHelper.make_dirs(self.save_dir, is_file=False)
+
+        if self.configer.get('dataset') in ['cityscapes', 'gta5', 'woodscape']:
+            colors = get_cityscapes_colors()
+        elif self.configer.get('dataset') == 'ade20k':
+            colors = get_ade_colors()
+        elif self.configer.get('dataset') == 'lip':
+            colors = get_lip_colors()
+        elif self.configer.get('dataset') == 'pascal_context':
+            colors = get_pascal_context_colors()
+        elif self.configer.get('dataset') == 'pascal_voc':
+            colors = get_pascal_voc_colors()
+        elif self.configer.get('dataset') == 'coco_stuff':
+            colors = get_cocostuff_colors()
+        elif self.configer.get('dataset') == 'camvid':
+            colors = get_camvid_colors()
+        elif self.configer.get('dataset') == 'autonue21':
+            colors = get_autonue21_colors()
+        else:
+            raise RuntimeError("Unsupport colors")
+
+        save_prob = False
+        if self.configer.get('test', 'save_prob'):
+            save_prob = self.configer.get('test', 'save_prob')
+
+            def softmax(X, axis=0):
+                max_prob = np.max(X, axis=axis, keepdims=True)
+                X -= max_prob
+                X = np.exp(X)
+                sum_prob = np.sum(X, axis=axis, keepdims=True)
+                X /= sum_prob
+                return X
+        n = I_list.shape[0]
+
+        inputs = I_list[:, :,:,:]
+        names = range(n)
+        with torch.no_grad():
+            outputs = self.ss_test(inputs)
+        return outputs
+        # print(outputs.shape);assert 1==0
+
+        # for j, input in enumerate(I_list):
+        for j in range(n):
+            # inputs = data_dict['img']
+            # names = data_dict['name']
+            # metas = data_dict['meta']
+
+            input = I_list[j, :,:,:].unsqueeze(0)
+            names = range(n)
+
+            # print(inputs.shape)
+            # print(torch.max(inputs), torch.min(inputs))
+            # assert 1==0
+
+            # if 'subfolder' in data_dict:
+            #     subfolder = data_dict['subfolder']
+
+            # if '/val/' in self.save_dir: #and os.environ.get('save_gt_label'):
+            #     labels = data_dict['labelmap']
+
+            with torch.no_grad():
+                # Forward pass.
+                # if self.configer.exists('data', 'use_offset') and self.configer.get('data', 'use_offset') == 'offline':
+                #     offset_h_maps = data_dict['offsetmap_h']
+                #     offset_w_maps = data_dict['offsetmap_w']
+                #     outputs = self.offset_test(inputs, offset_h_maps, offset_w_maps)
+                # elif self.configer.get('test', 'mode') == 'ss_test':
+                #     outputs = self.ss_test(inputs)
+                # elif self.configer.get('test', 'mode') == 'ms_test':
+                #     outputs = self.ms_test(inputs)
+                # elif self.configer.get('test', 'mode') == 'ms_test_depth':
+                #     outputs = self.ms_test_depth(inputs, names)
+                # elif self.configer.get('test', 'mode') == 'sscrop_test':
+                #     crop_size = self.configer.get('test', 'crop_size')
+                #     outputs = self.sscrop_test(inputs, crop_size)
+                # elif self.configer.get('test', 'mode') == 'mscrop_test':
+                #     crop_size = self.configer.get('test', 'crop_size')
+                #     outputs = self.mscrop_test(inputs, crop_size)
+                # elif self.configer.get('test', 'mode') == 'crf_ss_test':
+                #     import pydensecrf.densecrf as dcrf
+                #     import pydensecrf.utils as dcrf_utils
+                #     outputs = self.ss_test(inputs)
+                #     outputs = self.dense_crf_process(inputs, outputs)
+
+                outputs = self.ss_test(input)
+                print(outputs.shape);assert 1==0
+                if isinstance(outputs, torch.Tensor):
+                    outputs = outputs.permute(0, 2, 3, 1).cpu().numpy()
+                    n = outputs.shape[0]
+                else:
+                    outputs = [output.permute(0, 2, 3, 1).cpu().numpy().squeeze() for output in outputs]
+                    n = len(outputs)
+
+                for k in range(n):
+                    image_id += 1
+                    # ori_img_size = metas[k]['ori_img_size']
+                    # border_size = metas[k]['border_size']
+                    # logits = cv2.resize(outputs[k][:border_size[1], :border_size[0]],
+                    #                     tuple(ori_img_size), interpolation=cv2.INTER_CUBIC)
+
+                    logits = outputs[k]
+
+                    # save the logits map
+                    if self.configer.get('test', 'save_prob'):
+                        prob_path = os.path.join(self.save_dir, "prob/", '{}.npy'.format(names[k]))
+                        FileHelper.make_dirs(prob_path, is_file=True)
+                        np.save(prob_path, softmax(logits, axis=-1))
+
+                    label_img = np.asarray(np.argmax(logits, axis=-1), dtype=np.uint8)
+                    if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data',
+                                                                                               'reduce_zero_label'):
+                        label_img = label_img + 1
+                        label_img = label_img.astype(np.uint8)
+                    if self.configer.exists('data', 'label_list'):
+                        label_img_ = self.__relabel(label_img)
+                    else:
+                        label_img_ = label_img
+                    label_img_ = Image.fromarray(label_img_, 'P')
+                    Log.info('{:4d}/{:4d} label map generated'.format(image_id, self.test_size))
+                    if 'subfolder' not in data_dict or len(subfolder[k]) == 0:
+                        label_path = os.path.join(self.save_dir, "label/", '{}.png'.format(names[k]))
+                    else:
+                        label_path = os.path.join(self.save_dir, "label/", '{}/{}.png'.format(subfolder[k], names[k]))
+
+                    FileHelper.make_dirs(label_path, is_file=True)
+                    ImageHelper.save(label_img_, label_path)
+
+                    # colorize the label-map
+                    if os.environ.get('save_gt_label'):
+                        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data','reduce_zero_label'):
+                            label_img = labels[k]
+                            label_img = np.asarray(label_img, dtype=np.uint8)
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "gt_vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+                    else:
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        # Print the log info & reset the states.
+        Log.info('Test Time {batch_time.sum:.3f}s'.format(batch_time=self.batch_time))
+
+
+
+    def test(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        start_time = time.time()
+        image_id = 0
+
+        Log.info('save dir {}'.format(self.save_dir))
+        FileHelper.make_dirs(self.save_dir, is_file=False)
+
+        if self.configer.get('dataset') in ['cityscapes', 'gta5', 'woodscape']:
+            colors = get_cityscapes_colors()
+        elif self.configer.get('dataset') == 'ade20k':
+            colors = get_ade_colors()
+        elif self.configer.get('dataset') == 'lip':
+            colors = get_lip_colors()
+        elif self.configer.get('dataset') == 'pascal_context':
+            colors = get_pascal_context_colors()
+        elif self.configer.get('dataset') == 'pascal_voc':
+            colors = get_pascal_voc_colors()
+        elif self.configer.get('dataset') == 'coco_stuff':
+            colors = get_cocostuff_colors()
+        elif self.configer.get('dataset') == 'camvid':
+            colors = get_camvid_colors()
+        elif self.configer.get('dataset') == 'autonue21':
+            colors = get_autonue21_colors()
+        else:
+            raise RuntimeError("Unsupport colors")
+
+        save_prob = False
+        if self.configer.get('test', 'save_prob'):
+            save_prob = self.configer.get('test', 'save_prob')
+
+            def softmax(X, axis=0):
+                max_prob = np.max(X, axis=axis, keepdims=True)
+                X -= max_prob
+                X = np.exp(X)
+                sum_prob = np.sum(X, axis=axis, keepdims=True)
+                X /= sum_prob
+                return X
+
+        for j, data_dict in enumerate(self.test_loader):
+            inputs = data_dict['img']
+            names = data_dict['name']
+            metas = data_dict['meta']
+
+            # print(inputs.shape)
+            # print(torch.max(inputs), torch.min(inputs))
+            # assert 1==0
+
+            if 'subfolder' in data_dict:
+                subfolder = data_dict['subfolder']
+
+            if '/val/' in self.save_dir: #and os.environ.get('save_gt_label'):
+                labels = data_dict['labelmap']
+
+            with torch.no_grad():
+                # Forward pass.
+                if self.configer.exists('data', 'use_offset') and self.configer.get('data', 'use_offset') == 'offline':
+                    offset_h_maps = data_dict['offsetmap_h']
+                    offset_w_maps = data_dict['offsetmap_w']
+                    outputs = self.offset_test(inputs, offset_h_maps, offset_w_maps)
+                elif self.configer.get('test', 'mode') == 'ss_test':
+                    outputs = self.ss_test(inputs)
+                elif self.configer.get('test', 'mode') == 'ms_test':
+                    outputs = self.ms_test(inputs)
+                elif self.configer.get('test', 'mode') == 'ms_test_depth':
+                    outputs = self.ms_test_depth(inputs, names)
+                elif self.configer.get('test', 'mode') == 'sscrop_test':
+                    crop_size = self.configer.get('test', 'crop_size')
+                    outputs = self.sscrop_test(inputs, crop_size)
+                elif self.configer.get('test', 'mode') == 'mscrop_test':
+                    crop_size = self.configer.get('test', 'crop_size')
+                    outputs = self.mscrop_test(inputs, crop_size)
+                elif self.configer.get('test', 'mode') == 'crf_ss_test':
+                    import pydensecrf.densecrf as dcrf
+                    import pydensecrf.utils as dcrf_utils
+                    outputs = self.ss_test(inputs)
+                    outputs = self.dense_crf_process(inputs, outputs)
+
+                if isinstance(outputs, torch.Tensor):
+                    outputs = outputs.permute(0, 2, 3, 1).cpu().numpy()
+                    n = outputs.shape[0]
+                else:
+                    outputs = [output.permute(0, 2, 3, 1).cpu().numpy().squeeze() for output in outputs]
+                    n = len(outputs)
+
+                for k in range(n):
+                    image_id += 1
+                    ori_img_size = metas[k]['ori_img_size']
+                    border_size = metas[k]['border_size']
+                    logits = cv2.resize(outputs[k][:border_size[1], :border_size[0]],
+                                        tuple(ori_img_size), interpolation=cv2.INTER_CUBIC)
+
+                    # save the logits map
+                    if self.configer.get('test', 'save_prob'):
+                        prob_path = os.path.join(self.save_dir, "prob/", '{}.npy'.format(names[k]))
+                        FileHelper.make_dirs(prob_path, is_file=True)
+                        np.save(prob_path, softmax(logits, axis=-1))
+
+                    label_img = np.asarray(np.argmax(logits, axis=-1), dtype=np.uint8)
+                    if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data',
+                                                                                               'reduce_zero_label'):
+                        label_img = label_img + 1
+                        label_img = label_img.astype(np.uint8)
+                    if self.configer.exists('data', 'label_list'):
+                        label_img_ = self.__relabel(label_img)
+                    else:
+                        label_img_ = label_img
+                    label_img_ = Image.fromarray(label_img_, 'P')
+                    Log.info('{:4d}/{:4d} label map generated'.format(image_id, self.test_size))
+                    if 'subfolder' not in data_dict or len(subfolder[k]) == 0:
+                        label_path = os.path.join(self.save_dir, "label/", '{}.png'.format(names[k]))
+                    else:
+                        label_path = os.path.join(self.save_dir, "label/", '{}/{}.png'.format(subfolder[k], names[k]))
+
+                    FileHelper.make_dirs(label_path, is_file=True)
+                    ImageHelper.save(label_img_, label_path)
+
+                    # colorize the label-map
+                    if os.environ.get('save_gt_label'):
+                        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data','reduce_zero_label'):
+                            label_img = labels[k]
+                            label_img = np.asarray(label_img, dtype=np.uint8)
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "gt_vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+                    else:
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+
+                    # # visualize
+                    # from lib.datasets.tools.transforms import DeNormalize
+                    # mean = self.configer.get('normalize', 'mean')
+                    # std = self.configer.get('normalize', 'std')
+                    # div_value = self.configer.get('normalize', 'div_value')
+                    # org_img = DeNormalize(div_value, mean, std)(inputs[k])
+                    # org_img = org_img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
+                    # org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
+
+
+                    # # colorize the label-map
+                    # if os.environ.get('save_gt_label'):
+                    #     if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data',
+                    #                                                                                'reduce_zero_label'):
+                    #         label_img = labels[k] + 1
+                    #         label_img = np.asarray(label_img, dtype=np.uint8)
+                    #
+                    #     label_img = cv2.resize(label_img, (org_img.shape[1], org_img.shape[0]),
+                    #                            interpolation=cv2.INTER_NEAREST)
+                    #     color_img_ = Image.fromarray(label_img)
+                    #     color_img_.putpalette(colors)
+                    #     color_img_ = np.asarray(color_img_.convert('RGB'), np.uint8)
+                    #
+                    #     sys_img_part = cv2.addWeighted(org_img, 0.5, color_img_, 0.5, 0.0)
+                    #     sys_img_part = cv2.cvtColor(sys_img_part, cv2.COLOR_RGB2BGR)
+                    #
+                    #     for i in range(0, 200):
+                    #         mask = np.zeros_like(label_img)
+                    #         mask[label_img == i] = 1
+                    #
+                    #         contours = cv2.findContours(mask.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2]
+                    #         cv2.drawContours(sys_img_part, contours, -1, (255, 255, 255),
+                    #                          1, cv2.LINE_AA)
+                    #
+                    #     vis_path = os.path.join(self.save_dir, "gt_vis_overlay/", '{}.png'.format(names[k]))
+                    #     FileHelper.make_dirs(vis_path, is_file=True)
+                    #     ImageHelper.save(sys_img_part, save_path=vis_path)
+                    #
+                    # else:
+                    #     label_img = cv2.resize(label_img, (org_img.shape[1], org_img.shape[0]), interpolation=cv2.INTER_NEAREST)
+                    #     color_img_ = Image.fromarray(label_img)
+                    #     color_img_.putpalette(colors)
+                    #     color_img_ = np.asarray(color_img_.convert('RGB'), np.uint8)
+                    #
+                    #     sys_img_part = cv2.addWeighted(org_img, 0.5, color_img_, 0.5, 0.0)
+                    #
+                    #     sys_img_part = cv2.cvtColor(sys_img_part, cv2.COLOR_RGB2BGR)
+                    #
+                    #     for i in range(0, 200):
+                    #         mask = np.zeros_like(label_img)
+                    #         mask[label_img == i] = 1
+                    #
+                    #         contours = cv2.findContours(mask.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2]
+                    #         cv2.drawContours(sys_img_part, contours, -1, (255, 255, 255),
+                    #                          1, cv2.LINE_AA)
+                    #
+                    #     vis_path = os.path.join(self.save_dir, "vis_overlay/", '{}.png'.format(names[k]))
+                    #     FileHelper.make_dirs(vis_path, is_file=True)
+                    #     ImageHelper.save(sys_img_part, save_path=vis_path)
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        # Print the log info & reset the states.
+        Log.info('Test Time {batch_time.sum:.3f}s'.format(batch_time=self.batch_time))
+
+
+    def offset_test(self, inputs, offset_h_maps, offset_w_maps, scale=1):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            start = timeit.default_timer()
+            outputs = self.seg_net.forward(inputs, offset_h_maps, offset_w_maps)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            if (self.configer.get('loss', 'loss_type') == "fs_auxce_loss") or (
+                    self.configer.get('loss', 'loss_type') == "triple_auxce_loss"):
+                outputs = outputs[-1]
+            elif self.configer.get('loss', 'loss_type') == "pyramid_auxce_loss":
+                outputs = outputs[1] + outputs[2] + outputs[3] + outputs[4]
+
+            outputs = F.interpolate(outputs, size=(h, w), mode='bilinear', align_corners=True)
+            return outputs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def ss_test(self, inputs, scale=1):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            scaled_inputs = F.interpolate(inputs, size=(int(h * scale), int(w * scale)), mode="bilinear",
+                                          align_corners=True)
+            start = timeit.default_timer()
+            outputs = self.seg_net.forward(scaled_inputs)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            if isinstance(outputs, list):
+                outputs = outputs[-1]
+            elif isinstance(outputs, dict):
+                outputs = outputs['seg']
+            elif isinstance(outputs, tuple):
+                outputs = outputs[-1]
+            outputs = F.interpolate(outputs, size=(h, w), mode='bilinear', align_corners=True)
+            return outputs
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            replicas = nn.parallel.replicate(self.seg_net.module, device_ids)
+            scaled_inputs, ori_size, outputs = [], [], []
+            for i, d in zip(inputs, device_ids):
+                h, w = i.size(1), i.size(2)
+                ori_size.append((h, w))
+                i = F.interpolate(i.unsqueeze(0), size=(int(h * scale), int(w * scale)), mode="bilinear",
+                                  align_corners=True)
+                scaled_inputs.append(i.cuda(d, non_blocking=True))
+            scaled_outputs = nn.parallel.parallel_apply(replicas[:len(scaled_inputs)], scaled_inputs)
+            for i, output in enumerate(scaled_outputs):
+                outputs.append(F.interpolate(output[-1], size=ori_size[i], mode='bilinear', align_corners=True))
+            return outputs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def flip(self, x, dim):
+        indices = [slice(None)] * x.dim()
+        indices[dim] = torch.arange(x.size(dim) - 1, -1, -1,
+                                    dtype=torch.long, device=x.device)
+        return x[tuple(indices)]
+
+    def sscrop_test(self, inputs, crop_size, scale=1):
+        '''
+        Currently, sscrop_test does not support diverse_size testing
+        '''
+        n, c, ori_h, ori_w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+        scaled_inputs = F.interpolate(inputs, size=(int(ori_h * scale), int(ori_w * scale)), mode="bilinear",
+                                      align_corners=True)
+        n, c, h, w = scaled_inputs.size(0), scaled_inputs.size(1), scaled_inputs.size(2), scaled_inputs.size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+        count_predictions = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+        crop_counter = 0
+
+        height_starts = self._decide_intersection(h, crop_size[0])
+        width_starts = self._decide_intersection(w, crop_size[1])
+
+        for height in height_starts:
+            for width in width_starts:
+                crop_inputs = scaled_inputs[:, :, height:height + crop_size[0], width:width + crop_size[1]]
+                prediction = self.ss_test(crop_inputs)
+                count_predictions[:, :, height:height + crop_size[0], width:width + crop_size[1]] += 1
+                full_probs[:, :, height:height + crop_size[0], width:width + crop_size[1]] += prediction
+                crop_counter += 1
+                Log.info('predicting {:d}-th crop'.format(crop_counter))
+
+        full_probs /= count_predictions
+        full_probs = F.interpolate(full_probs, size=(ori_h, ori_w), mode='bilinear', align_corners=True)
+        return full_probs
+
+    def ms_test(self, inputs):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+            if self.configer.exists('test', 'scale_weights'):
+                for scale, weight in zip(self.configer.get('test', 'scale_search'),
+                                         self.configer.get('test', 'scale_weights')):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                    probs = probs + self.flip(flip_probs, 3)
+                    full_probs += weight * probs
+                return full_probs
+            else:
+                for scale in self.configer.get('test', 'scale_search'):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                    probs = probs + self.flip(flip_probs, 3)
+                    full_probs += probs
+                return full_probs
+
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            full_probs = [torch.zeros(1, self.configer.get('data', 'num_classes'),
+                                      i.size(1), i.size(2)).cuda(device_ids[index], non_blocking=True)
+                          for index, i in enumerate(inputs)]
+            flip_inputs = [self.flip(i, 2) for i in inputs]
+
+            if self.configer.exists('test', 'scale_weights'):
+                for scale, weight in zip(self.configer.get('test', 'scale_search'),
+                                         self.configer.get('test', 'scale_weights')):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(flip_inputs, scale)
+                    for i in range(len(inputs)):
+                        full_probs[i] += weight * (probs[i] + self.flip(flip_probs[i], 3))
+                return full_probs
+            else:
+                for scale in self.configer.get('test', 'scale_search'):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(flip_inputs, scale)
+                    for i in range(len(inputs)):
+                        full_probs[i] += (probs[i] + self.flip(flip_probs[i], 3))
+                return full_probs
+
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def ms_test_depth(self, inputs, names):
+        prob_list = []
+        scale_list = []
+
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                probs = probs + self.flip(flip_probs, 3)
+                prob_list.append(probs)
+                scale_list.append(scale)
+
+            full_probs = self.fuse_with_depth(prob_list, scale_list, names)
+            return full_probs
+
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def fuse_with_depth(self, probs, scales, names):
+        MAX_DEPTH = 63
+        POWER_BASE = 0.8
+        if 'test' in self.save_dir:
+            stereo_path = "/msravcshare/dataset/cityscapes/stereo/test/"
+        else:
+            stereo_path = "/msravcshare/dataset/cityscapes/stereo/val/"
+
+        n, c, h, w = probs[0].size(0), probs[0].size(1), probs[0].size(2), probs[0].size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+        for index, name in enumerate(names):
+            stereo_map = cv2.imread(stereo_path + name + '.png', -1)
+            depth_map = stereo_map / 256.0
+            depth_map = 0.5 / depth_map
+            depth_map = 500 * depth_map
+
+            depth_map = np.clip(depth_map, 0, MAX_DEPTH)
+            depth_map = depth_map // (MAX_DEPTH // len(scales))
+
+            for prob, scale in zip(probs, scales):
+                scale_index = self._locate_scale_index(scale, scales)
+                weight_map = np.abs(depth_map - scale_index)
+                weight_map = np.power(POWER_BASE, weight_map)
+                weight_map = cv2.resize(weight_map, (w, h))
+                full_probs[index, :, :, :] += torch.from_numpy(np.expand_dims(weight_map, axis=0)).type(
+                    torch.cuda.FloatTensor) * prob[index, :, :, :]
+
+        return full_probs
+
+    @staticmethod
+    def _locate_scale_index(scale, scales):
+        for idx, s in enumerate(scales):
+            if scale == s:
+                return idx
+        return 0
+
+    def ms_test_wo_flip(self, inputs):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                full_probs += probs
+            return full_probs
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            full_probs = [torch.zeros(1, self.configer.get('data', 'num_classes'),
+                                      i.size(1), i.size(2)).cuda(device_ids[index], non_blocking=True)
+                          for index, i, in enumerate(inputs)]
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                for i in range(len(inputs)):
+                    full_probs[i] += probs[i]
+            return full_probs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def mscrop_test(self, inputs, crop_size):
+        '''
+        Currently, mscrop_test does not support diverse_size testing
+        '''
+        n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+        for scale in self.configer.get('test', 'scale_search'):
+            Log.info('Scale {0:.2f} prediction'.format(scale))
+            if scale < 1:
+                probs = self.ss_test(inputs, scale)
+                flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                probs = probs + self.flip(flip_probs, 3)
+                full_probs += probs
+            else:
+                probs = self.sscrop_test(inputs, crop_size, scale)
+                flip_probs = self.sscrop_test(self.flip(inputs, 3), crop_size, scale)
+                probs = probs + self.flip(flip_probs, 3)
+                full_probs += probs
+        return full_probs
+
+    def _decide_intersection(self, total_length, crop_length):
+        stride = crop_length
+        times = (total_length - crop_length) // stride + 1
+        cropped_starting = []
+        for i in range(times):
+            cropped_starting.append(stride * i)
+        if total_length - cropped_starting[-1] > crop_length:
+            cropped_starting.append(total_length - crop_length)  # must cover the total image
+        return cropped_starting
+
+    def dense_crf_process(self, images, outputs):
+        '''
+        Reference: https://github.com/kazuto1011/deeplab-pytorch/blob/master/libs/utils/crf.py
+        '''
+        # hyperparameters of the dense crf 
+        # baseline = 79.5
+        # bi_xy_std = 67, 79.1
+        # bi_xy_std = 20, 79.6
+        # bi_xy_std = 10, 79.7
+        # bi_xy_std = 10, iter_max = 20, v4 79.7
+        # bi_xy_std = 10, iter_max = 5, v5 79.7
+        # bi_xy_std = 5, v3 79.7
+        iter_max = 10
+        pos_w = 3
+        pos_xy_std = 1
+        bi_w = 4
+        bi_xy_std = 10
+        bi_rgb_std = 3
+
+        b = images.size(0)
+        mean_vector = np.expand_dims(np.expand_dims(np.transpose(np.array([102.9801, 115.9465, 122.7717])), axis=1),
+                                     axis=2)
+        outputs = F.softmax(outputs, dim=1)
+        for i in range(b):
+            unary = outputs[i].data.cpu().numpy()
+            C, H, W = unary.shape
+            unary = dcrf_utils.unary_from_softmax(unary)
+            unary = np.ascontiguousarray(unary)
+
+            image = np.ascontiguousarray(images[i]) + mean_vector
+            image = image.astype(np.ubyte)
+            image = np.ascontiguousarray(image.transpose(1, 2, 0))
+
+            d = dcrf.DenseCRF2D(W, H, C)
+            d.setUnaryEnergy(unary)
+            d.addPairwiseGaussian(sxy=pos_xy_std, compat=pos_w)
+            d.addPairwiseBilateral(sxy=bi_xy_std, srgb=bi_rgb_std, rgbim=image, compat=bi_w)
+            out_crf = np.array(d.inference(iter_max))
+            outputs[i] = torch.from_numpy(out_crf).cuda().view(C, H, W)
+
+        return outputs
+
+
+
+class Tester(object):
+    """
+      The class for Pose Estimation. Include train, val, val & predict.
+    """
+
+    def __init__(self, configer):
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.seg_visualizer = SegVisualizer(configer)
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.optim_scheduler = OptimScheduler(configer)
+        self.seg_data_loader = DataLoader(configer)
+        self.save_dir = self.configer.get('test', 'out_dir')
+        self.seg_net = None
+        self.test_loader = None
+        self.test_size = None
+        self.infer_time = 0
+        self.infer_cnt = 0
+        self._init_model()
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        if 'test' in self.save_dir:
+            self.test_loader = self.seg_data_loader.get_testloader()
+            self.test_size = len(self.test_loader) * self.configer.get('test', 'batch_size')
+        else:
+            self.test_loader = self.seg_data_loader.get_valloader()
+            self.test_size = len(self.test_loader) * self.configer.get('val', 'batch_size')
+
+        self.seg_net.eval()
+
+    def __relabel(self, label_map):
+        height, width = label_map.shape
+        label_dst = np.zeros((height, width), dtype=np.uint8)
+        for i in range(self.configer.get('data', 'num_classes')):
+            label_dst[label_map == i] = self.configer.get('data', 'label_list')[i]
+
+        label_dst = np.array(label_dst, dtype=np.uint8)
+
+        return label_dst
+
+    def test(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        start_time = time.time()
+        image_id = 0
+
+        Log.info('save dir {}'.format(self.save_dir))
+        FileHelper.make_dirs(self.save_dir, is_file=False)
+
+        if self.configer.get('dataset') in ['cityscapes', 'gta5', 'woodscape']:
+            colors = get_cityscapes_colors()
+        elif self.configer.get('dataset') == 'ade20k':
+            colors = get_ade_colors()
+        elif self.configer.get('dataset') == 'lip':
+            colors = get_lip_colors()
+        elif self.configer.get('dataset') == 'pascal_context':
+            colors = get_pascal_context_colors()
+        elif self.configer.get('dataset') == 'pascal_voc':
+            colors = get_pascal_voc_colors()
+        elif self.configer.get('dataset') == 'coco_stuff':
+            colors = get_cocostuff_colors()
+        elif self.configer.get('dataset') == 'camvid':
+            colors = get_camvid_colors()
+        elif self.configer.get('dataset') == 'autonue21':
+            colors = get_autonue21_colors()
+        else:
+            raise RuntimeError("Unsupport colors")
+
+        save_prob = False
+        if self.configer.get('test', 'save_prob'):
+            save_prob = self.configer.get('test', 'save_prob')
+
+            def softmax(X, axis=0):
+                max_prob = np.max(X, axis=axis, keepdims=True)
+                X -= max_prob
+                X = np.exp(X)
+                sum_prob = np.sum(X, axis=axis, keepdims=True)
+                X /= sum_prob
+                return X
+
+        for j, data_dict in enumerate(self.test_loader):
+            inputs = data_dict['img']
+            names = data_dict['name']
+            metas = data_dict['meta']
+
+            # print(inputs.shape)
+            # print(torch.max(inputs), torch.min(inputs))
+            # assert 1==0
+
+            if 'subfolder' in data_dict:
+                subfolder = data_dict['subfolder']
+
+            if '/val/' in self.save_dir: #and os.environ.get('save_gt_label'):
+                labels = data_dict['labelmap']
+
+            with torch.no_grad():
+                # Forward pass.
+                if self.configer.exists('data', 'use_offset') and self.configer.get('data', 'use_offset') == 'offline':
+                    offset_h_maps = data_dict['offsetmap_h']
+                    offset_w_maps = data_dict['offsetmap_w']
+                    outputs = self.offset_test(inputs, offset_h_maps, offset_w_maps)
+                elif self.configer.get('test', 'mode') == 'ss_test':
+                    outputs = self.ss_test(inputs)
+                elif self.configer.get('test', 'mode') == 'ms_test':
+                    outputs = self.ms_test(inputs)
+                elif self.configer.get('test', 'mode') == 'ms_test_depth':
+                    outputs = self.ms_test_depth(inputs, names)
+                elif self.configer.get('test', 'mode') == 'sscrop_test':
+                    crop_size = self.configer.get('test', 'crop_size')
+                    outputs = self.sscrop_test(inputs, crop_size)
+                elif self.configer.get('test', 'mode') == 'mscrop_test':
+                    crop_size = self.configer.get('test', 'crop_size')
+                    outputs = self.mscrop_test(inputs, crop_size)
+                elif self.configer.get('test', 'mode') == 'crf_ss_test':
+                    import pydensecrf.densecrf as dcrf
+                    import pydensecrf.utils as dcrf_utils
+                    outputs = self.ss_test(inputs)
+                    outputs = self.dense_crf_process(inputs, outputs)
+
+                if isinstance(outputs, torch.Tensor):
+                    outputs = outputs.permute(0, 2, 3, 1).cpu().numpy()
+                    n = outputs.shape[0]
+                else:
+                    outputs = [output.permute(0, 2, 3, 1).cpu().numpy().squeeze() for output in outputs]
+                    n = len(outputs)
+
+                for k in range(n):
+                    image_id += 1
+                    ori_img_size = metas[k]['ori_img_size']
+                    border_size = metas[k]['border_size']
+                    logits = cv2.resize(outputs[k][:border_size[1], :border_size[0]],
+                                        tuple(ori_img_size), interpolation=cv2.INTER_CUBIC)
+
+                    # save the logits map
+                    if self.configer.get('test', 'save_prob'):
+                        prob_path = os.path.join(self.save_dir, "prob/", '{}.npy'.format(names[k]))
+                        FileHelper.make_dirs(prob_path, is_file=True)
+                        np.save(prob_path, softmax(logits, axis=-1))
+
+                    label_img = np.asarray(np.argmax(logits, axis=-1), dtype=np.uint8)
+                    if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data',
+                                                                                               'reduce_zero_label'):
+                        label_img = label_img + 1
+                        label_img = label_img.astype(np.uint8)
+                    if self.configer.exists('data', 'label_list'):
+                        label_img_ = self.__relabel(label_img)
+                    else:
+                        label_img_ = label_img
+                    label_img_ = Image.fromarray(label_img_, 'P')
+                    Log.info('{:4d}/{:4d} label map generated'.format(image_id, self.test_size))
+                    if 'subfolder' not in data_dict or len(subfolder[k]) == 0:
+                        label_path = os.path.join(self.save_dir, "label/", '{}.png'.format(names[k]))
+                    else:
+                        label_path = os.path.join(self.save_dir, "label/", '{}/{}.png'.format(subfolder[k], names[k]))
+
+                    FileHelper.make_dirs(label_path, is_file=True)
+                    ImageHelper.save(label_img_, label_path)
+
+                    # colorize the label-map
+                    if os.environ.get('save_gt_label'):
+                        if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data','reduce_zero_label'):
+                            label_img = labels[k]
+                            label_img = np.asarray(label_img, dtype=np.uint8)
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "gt_vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+                    else:
+                        color_img_ = Image.fromarray(label_img)
+                        color_img_.putpalette(colors)
+                        vis_path = os.path.join(self.save_dir, "vis/", '{}.png'.format(names[k]))
+                        FileHelper.make_dirs(vis_path, is_file=True)
+                        ImageHelper.save(color_img_, save_path=vis_path)
+
+                    # # visualize
+                    # from lib.datasets.tools.transforms import DeNormalize
+                    # mean = self.configer.get('normalize', 'mean')
+                    # std = self.configer.get('normalize', 'std')
+                    # div_value = self.configer.get('normalize', 'div_value')
+                    # org_img = DeNormalize(div_value, mean, std)(inputs[k])
+                    # org_img = org_img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)
+                    # org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
+
+
+                    # # colorize the label-map
+                    # if os.environ.get('save_gt_label'):
+                    #     if self.configer.exists('data', 'reduce_zero_label') and self.configer.get('data',
+                    #                                                                                'reduce_zero_label'):
+                    #         label_img = labels[k] + 1
+                    #         label_img = np.asarray(label_img, dtype=np.uint8)
+                    #
+                    #     label_img = cv2.resize(label_img, (org_img.shape[1], org_img.shape[0]),
+                    #                            interpolation=cv2.INTER_NEAREST)
+                    #     color_img_ = Image.fromarray(label_img)
+                    #     color_img_.putpalette(colors)
+                    #     color_img_ = np.asarray(color_img_.convert('RGB'), np.uint8)
+                    #
+                    #     sys_img_part = cv2.addWeighted(org_img, 0.5, color_img_, 0.5, 0.0)
+                    #     sys_img_part = cv2.cvtColor(sys_img_part, cv2.COLOR_RGB2BGR)
+                    #
+                    #     for i in range(0, 200):
+                    #         mask = np.zeros_like(label_img)
+                    #         mask[label_img == i] = 1
+                    #
+                    #         contours = cv2.findContours(mask.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2]
+                    #         cv2.drawContours(sys_img_part, contours, -1, (255, 255, 255),
+                    #                          1, cv2.LINE_AA)
+                    #
+                    #     vis_path = os.path.join(self.save_dir, "gt_vis_overlay/", '{}.png'.format(names[k]))
+                    #     FileHelper.make_dirs(vis_path, is_file=True)
+                    #     ImageHelper.save(sys_img_part, save_path=vis_path)
+                    #
+                    # else:
+                    #     label_img = cv2.resize(label_img, (org_img.shape[1], org_img.shape[0]), interpolation=cv2.INTER_NEAREST)
+                    #     color_img_ = Image.fromarray(label_img)
+                    #     color_img_.putpalette(colors)
+                    #     color_img_ = np.asarray(color_img_.convert('RGB'), np.uint8)
+                    #
+                    #     sys_img_part = cv2.addWeighted(org_img, 0.5, color_img_, 0.5, 0.0)
+                    #
+                    #     sys_img_part = cv2.cvtColor(sys_img_part, cv2.COLOR_RGB2BGR)
+                    #
+                    #     for i in range(0, 200):
+                    #         mask = np.zeros_like(label_img)
+                    #         mask[label_img == i] = 1
+                    #
+                    #         contours = cv2.findContours(mask.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[-2]
+                    #         cv2.drawContours(sys_img_part, contours, -1, (255, 255, 255),
+                    #                          1, cv2.LINE_AA)
+                    #
+                    #     vis_path = os.path.join(self.save_dir, "vis_overlay/", '{}.png'.format(names[k]))
+                    #     FileHelper.make_dirs(vis_path, is_file=True)
+                    #     ImageHelper.save(sys_img_part, save_path=vis_path)
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        # Print the log info & reset the states.
+        Log.info('Test Time {batch_time.sum:.3f}s'.format(batch_time=self.batch_time))
+
+    def offset_test(self, inputs, offset_h_maps, offset_w_maps, scale=1):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            start = timeit.default_timer()
+            outputs = self.seg_net.forward(inputs, offset_h_maps, offset_w_maps)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            if (self.configer.get('loss', 'loss_type') == "fs_auxce_loss") or (
+                    self.configer.get('loss', 'loss_type') == "triple_auxce_loss"):
+                outputs = outputs[-1]
+            elif self.configer.get('loss', 'loss_type') == "pyramid_auxce_loss":
+                outputs = outputs[1] + outputs[2] + outputs[3] + outputs[4]
+
+            outputs = F.interpolate(outputs, size=(h, w), mode='bilinear', align_corners=True)
+            return outputs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def ss_test(self, inputs, scale=1):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            scaled_inputs = F.interpolate(inputs, size=(int(h * scale), int(w * scale)), mode="bilinear",
+                                          align_corners=True)
+            start = timeit.default_timer()
+            outputs = self.seg_net.forward(scaled_inputs)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            if isinstance(outputs, list):
+                outputs = outputs[-1]
+            elif isinstance(outputs, dict):
+                outputs = outputs['seg']
+            elif isinstance(outputs, tuple):
+                outputs = outputs[-1]
+            outputs = F.interpolate(outputs, size=(h, w), mode='bilinear', align_corners=True)
+            return outputs
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            replicas = nn.parallel.replicate(self.seg_net.module, device_ids)
+            scaled_inputs, ori_size, outputs = [], [], []
+            for i, d in zip(inputs, device_ids):
+                h, w = i.size(1), i.size(2)
+                ori_size.append((h, w))
+                i = F.interpolate(i.unsqueeze(0), size=(int(h * scale), int(w * scale)), mode="bilinear",
+                                  align_corners=True)
+                scaled_inputs.append(i.cuda(d, non_blocking=True))
+            scaled_outputs = nn.parallel.parallel_apply(replicas[:len(scaled_inputs)], scaled_inputs)
+            for i, output in enumerate(scaled_outputs):
+                outputs.append(F.interpolate(output[-1], size=ori_size[i], mode='bilinear', align_corners=True))
+            return outputs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def flip(self, x, dim):
+        indices = [slice(None)] * x.dim()
+        indices[dim] = torch.arange(x.size(dim) - 1, -1, -1,
+                                    dtype=torch.long, device=x.device)
+        return x[tuple(indices)]
+
+    def sscrop_test(self, inputs, crop_size, scale=1):
+        '''
+        Currently, sscrop_test does not support diverse_size testing
+        '''
+        n, c, ori_h, ori_w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+        scaled_inputs = F.interpolate(inputs, size=(int(ori_h * scale), int(ori_w * scale)), mode="bilinear",
+                                      align_corners=True)
+        n, c, h, w = scaled_inputs.size(0), scaled_inputs.size(1), scaled_inputs.size(2), scaled_inputs.size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+        count_predictions = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+        crop_counter = 0
+
+        height_starts = self._decide_intersection(h, crop_size[0])
+        width_starts = self._decide_intersection(w, crop_size[1])
+
+        for height in height_starts:
+            for width in width_starts:
+                crop_inputs = scaled_inputs[:, :, height:height + crop_size[0], width:width + crop_size[1]]
+                prediction = self.ss_test(crop_inputs)
+                count_predictions[:, :, height:height + crop_size[0], width:width + crop_size[1]] += 1
+                full_probs[:, :, height:height + crop_size[0], width:width + crop_size[1]] += prediction
+                crop_counter += 1
+                Log.info('predicting {:d}-th crop'.format(crop_counter))
+
+        full_probs /= count_predictions
+        full_probs = F.interpolate(full_probs, size=(ori_h, ori_w), mode='bilinear', align_corners=True)
+        return full_probs
+
+    def ms_test(self, inputs):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+            if self.configer.exists('test', 'scale_weights'):
+                for scale, weight in zip(self.configer.get('test', 'scale_search'),
+                                         self.configer.get('test', 'scale_weights')):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                    probs = probs + self.flip(flip_probs, 3)
+                    full_probs += weight * probs
+                return full_probs
+            else:
+                for scale in self.configer.get('test', 'scale_search'):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                    probs = probs + self.flip(flip_probs, 3)
+                    full_probs += probs
+                return full_probs
+
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            full_probs = [torch.zeros(1, self.configer.get('data', 'num_classes'),
+                                      i.size(1), i.size(2)).cuda(device_ids[index], non_blocking=True)
+                          for index, i in enumerate(inputs)]
+            flip_inputs = [self.flip(i, 2) for i in inputs]
+
+            if self.configer.exists('test', 'scale_weights'):
+                for scale, weight in zip(self.configer.get('test', 'scale_search'),
+                                         self.configer.get('test', 'scale_weights')):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(flip_inputs, scale)
+                    for i in range(len(inputs)):
+                        full_probs[i] += weight * (probs[i] + self.flip(flip_probs[i], 3))
+                return full_probs
+            else:
+                for scale in self.configer.get('test', 'scale_search'):
+                    probs = self.ss_test(inputs, scale)
+                    flip_probs = self.ss_test(flip_inputs, scale)
+                    for i in range(len(inputs)):
+                        full_probs[i] += (probs[i] + self.flip(flip_probs[i], 3))
+                return full_probs
+
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def ms_test_depth(self, inputs, names):
+        prob_list = []
+        scale_list = []
+
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                probs = probs + self.flip(flip_probs, 3)
+                prob_list.append(probs)
+                scale_list.append(scale)
+
+            full_probs = self.fuse_with_depth(prob_list, scale_list, names)
+            return full_probs
+
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def fuse_with_depth(self, probs, scales, names):
+        MAX_DEPTH = 63
+        POWER_BASE = 0.8
+        if 'test' in self.save_dir:
+            stereo_path = "/msravcshare/dataset/cityscapes/stereo/test/"
+        else:
+            stereo_path = "/msravcshare/dataset/cityscapes/stereo/val/"
+
+        n, c, h, w = probs[0].size(0), probs[0].size(1), probs[0].size(2), probs[0].size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+
+        for index, name in enumerate(names):
+            stereo_map = cv2.imread(stereo_path + name + '.png', -1)
+            depth_map = stereo_map / 256.0
+            depth_map = 0.5 / depth_map
+            depth_map = 500 * depth_map
+
+            depth_map = np.clip(depth_map, 0, MAX_DEPTH)
+            depth_map = depth_map // (MAX_DEPTH // len(scales))
+
+            for prob, scale in zip(probs, scales):
+                scale_index = self._locate_scale_index(scale, scales)
+                weight_map = np.abs(depth_map - scale_index)
+                weight_map = np.power(POWER_BASE, weight_map)
+                weight_map = cv2.resize(weight_map, (w, h))
+                full_probs[index, :, :, :] += torch.from_numpy(np.expand_dims(weight_map, axis=0)).type(
+                    torch.cuda.FloatTensor) * prob[index, :, :, :]
+
+        return full_probs
+
+    @staticmethod
+    def _locate_scale_index(scale, scales):
+        for idx, s in enumerate(scales):
+            if scale == s:
+                return idx
+        return 0
+
+    def ms_test_wo_flip(self, inputs):
+        if isinstance(inputs, torch.Tensor):
+            n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+            full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                full_probs += probs
+            return full_probs
+        elif isinstance(inputs, collections.Sequence):
+            device_ids = self.configer.get('gpu')
+            full_probs = [torch.zeros(1, self.configer.get('data', 'num_classes'),
+                                      i.size(1), i.size(2)).cuda(device_ids[index], non_blocking=True)
+                          for index, i, in enumerate(inputs)]
+            for scale in self.configer.get('test', 'scale_search'):
+                probs = self.ss_test(inputs, scale)
+                for i in range(len(inputs)):
+                    full_probs[i] += probs[i]
+            return full_probs
+        else:
+            raise RuntimeError("Unsupport data type: {}".format(type(inputs)))
+
+    def mscrop_test(self, inputs, crop_size):
+        '''
+        Currently, mscrop_test does not support diverse_size testing
+        '''
+        n, c, h, w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3)
+        full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0)
+        for scale in self.configer.get('test', 'scale_search'):
+            Log.info('Scale {0:.2f} prediction'.format(scale))
+            if scale < 1:
+                probs = self.ss_test(inputs, scale)
+                flip_probs = self.ss_test(self.flip(inputs, 3), scale)
+                probs = probs + self.flip(flip_probs, 3)
+                full_probs += probs
+            else:
+                probs = self.sscrop_test(inputs, crop_size, scale)
+                flip_probs = self.sscrop_test(self.flip(inputs, 3), crop_size, scale)
+                probs = probs + self.flip(flip_probs, 3)
+                full_probs += probs
+        return full_probs
+
+    def _decide_intersection(self, total_length, crop_length):
+        stride = crop_length
+        times = (total_length - crop_length) // stride + 1
+        cropped_starting = []
+        for i in range(times):
+            cropped_starting.append(stride * i)
+        if total_length - cropped_starting[-1] > crop_length:
+            cropped_starting.append(total_length - crop_length)  # must cover the total image
+        return cropped_starting
+
+    def dense_crf_process(self, images, outputs):
+        '''
+        Reference: https://github.com/kazuto1011/deeplab-pytorch/blob/master/libs/utils/crf.py
+        '''
+        # hyperparameters of the dense crf 
+        # baseline = 79.5
+        # bi_xy_std = 67, 79.1
+        # bi_xy_std = 20, 79.6
+        # bi_xy_std = 10, 79.7
+        # bi_xy_std = 10, iter_max = 20, v4 79.7
+        # bi_xy_std = 10, iter_max = 5, v5 79.7
+        # bi_xy_std = 5, v3 79.7
+        iter_max = 10
+        pos_w = 3
+        pos_xy_std = 1
+        bi_w = 4
+        bi_xy_std = 10
+        bi_rgb_std = 3
+
+        b = images.size(0)
+        mean_vector = np.expand_dims(np.expand_dims(np.transpose(np.array([102.9801, 115.9465, 122.7717])), axis=1),
+                                     axis=2)
+        outputs = F.softmax(outputs, dim=1)
+        for i in range(b):
+            unary = outputs[i].data.cpu().numpy()
+            C, H, W = unary.shape
+            unary = dcrf_utils.unary_from_softmax(unary)
+            unary = np.ascontiguousarray(unary)
+
+            image = np.ascontiguousarray(images[i]) + mean_vector
+            image = image.astype(np.ubyte)
+            image = np.ascontiguousarray(image.transpose(1, 2, 0))
+
+            d = dcrf.DenseCRF2D(W, H, C)
+            d.setUnaryEnergy(unary)
+            d.addPairwiseGaussian(sxy=pos_xy_std, compat=pos_w)
+            d.addPairwiseBilateral(sxy=bi_xy_std, srgb=bi_rgb_std, rgbim=image, compat=bi_w)
+            out_crf = np.array(d.inference(iter_max))
+            outputs[i] = torch.from_numpy(out_crf).cuda().view(C, H, W)
+
+        return outputs
+
+
+if __name__ == "__main__":
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester_offset.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester_offset.py
new file mode 100644
index 0000000..939e239
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tester_offset.py
@@ -0,0 +1,701 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: JingyiXie
+## Microsoft Research
+## hsfzxjy@gmail.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import time
+import timeit
+import pdb
+import cv2
+import scipy
+import pprint
+import collections
+
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+
+from PIL import Image
+from models.protoseg_core.lib.utils.helpers.file_helper import FileHelper
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+from models.protoseg_core.lib.utils.tools.average_meter import AverageMeter
+from models.protoseg_core.lib.datasets.data_loader import DataLoader
+from models.protoseg_core.lib.loss.loss_manager import LossManager
+from models.protoseg_core.lib.models.model_manager import ModelManager
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.metrics.running_score import RunningScore
+from models.protoseg_core.segmentor.tools.module_runner import ModuleRunner
+from scipy import ndimage
+from PIL import Image
+from math import ceil
+
+from models.protoseg_core.lib.utils.helpers.offset_helper import DTOffsetConfig, DTOffsetHelper
+
+
+class Tester_inference(object):
+    def __init__(self, configer):
+        self.crop_size = configer.get('train',
+                                      'data_transformer')['input_size'][::-1]
+        val_trans_seq = [
+            x for x in configer.get('val_trans', 'trans_seq')
+            if 'random' not in x
+        ]
+        configer.update(('val_trans', 'trans_seq'), val_trans_seq)
+        configer.get('val', 'data_transformer')['input_size'] = configer.get(
+            'test', 'data_transformer').get('input_size', None)
+        configer.update(('train', 'data_transformer'),
+                        configer.get('val', 'data_transformer'))
+        configer.update(('val', 'batch_size'),
+                        int(os.environ.get('batch_size', 16)))
+        configer.update(('test', 'batch_size'),
+                        int(os.environ.get('batch_size', 16)))
+
+        # print(self.crop_size, ); assert 1==0
+
+        self.save_dir = configer.get('test', 'out_dir')
+        self.dataset_name = configer.get('test', 'eval_set')
+        self.sscrop = configer.get('test', 'sscrop')
+
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.seg_data_loader = DataLoader(configer)
+        self.seg_net = None
+        self.test_loader = None
+        self.test_size = None
+        self.infer_time = 0
+        self.infer_cnt = 0
+        self._init_model()
+
+        pprint.pprint(configer.params_root)
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        assert self.dataset_name in ('train', 'val',
+                                     'test'), 'Cannot infer dataset name'
+
+        self.size_mode = self.configer.get(self.dataset_name,
+                                           'data_transformer')['size_mode']
+
+        if self.dataset_name != 'test':
+            self.test_loader = self.seg_data_loader.get_valloader(
+                self.dataset_name)
+        else:
+            self.test_loader = self.seg_data_loader.get_testloader(
+                self.dataset_name)
+        self.test_size = len(self.test_loader) * self.configer.get(
+            'val', 'batch_size')
+
+    def test(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        start_time = time.time()
+        image_id = 0
+
+        Log.info('save dir {}'.format(self.save_dir))
+        FileHelper.make_dirs(self.save_dir, is_file=False)
+
+        print('Total batches', len(self.test_loader))
+        for j, data_dict in enumerate(self.test_loader):
+            inputs = [data_dict['img']]
+            names = data_dict['name']
+            metas = data_dict['meta']
+
+            dest_dir = self.save_dir
+
+            with torch.no_grad():
+                offsets, logits = self.extract_offset(inputs)
+                print([x.shape for x in logits])
+                for k in range(len(inputs[0])):
+                    image_id += 1
+                    ori_img_size = metas[k]['ori_img_size']
+                    border_size = metas[k]['border_size']
+                    offset = offsets[k].squeeze().cpu().numpy()
+                    offset = cv2.resize(
+                        offset[:border_size[1], :border_size[0]],
+                        tuple(ori_img_size),
+                        interpolation=cv2.INTER_NEAREST)
+                    print(image_id)
+
+                    os.makedirs(dest_dir, exist_ok=True)
+
+                    if names[k].rpartition('.')[0]:
+                        dest_name = names[k].rpartition('.')[0] + '.mat'
+                    else:
+                        dest_name = names[k] + '.mat'
+                    dest_name = os.path.join(dest_dir, dest_name)
+                    print('Shape:', offset.shape, 'Saving to', dest_name)
+
+                    data_dict = {'mat': offset}
+
+                    scipy.io.savemat(dest_name, data_dict, do_compression=True)
+                    try:
+                        scipy.io.loadmat(dest_name)
+                    except Exception as e:
+                        print(e)
+                        scipy.io.savemat(dest_name,
+                                         data_dict,
+                                         do_compression=False)
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        Log.info('Test Time {batch_time.sum:.3f}s'.format(
+            batch_time=self.batch_time))
+
+    def extract_offset(self, inputs):
+        if self.sscrop:
+            outputs = self.sscrop_test(inputs, self.crop_size)
+        elif self.configer.get('test', 'mode') == 'ss_test':
+            outputs = self.ss_test(inputs)
+
+        offsets = []
+        logits = []
+
+        for mask_logits, dir_logits, img in zip(*outputs[:2], inputs[0]):
+            h, w = img.shape[1:]
+
+            mask_logits = F.interpolate(mask_logits.unsqueeze(0),
+                                        size=(h, w),
+                                        mode='bilinear',
+                                        align_corners=True)
+            dir_logits = F.interpolate(dir_logits.unsqueeze(0),
+                                       size=(h, w),
+                                       mode='bilinear',
+                                       align_corners=True)
+
+            logit = torch.softmax(dir_logits, dim=1)
+            zero_mask = mask_logits.argmax(dim=1, keepdim=True) == 0
+            logits.append(mask_logits[:, 1])
+
+            offset = self._get_offset(mask_logits, dir_logits)
+            offsets.append(offset)
+        print([x.shape for x in offsets])
+        return offsets, logits
+
+    def _get_offset(self, mask_logits, dir_logits):
+
+        edge_mask = mask_logits[:, 1] > 0.5
+        dir_logits = torch.softmax(dir_logits, dim=1)
+        n, _, h, w = dir_logits.shape
+
+        keep_mask = edge_mask
+
+        dir_label = torch.argmax(dir_logits, dim=1).float()
+        offset = DTOffsetHelper.label_to_vector(dir_label)
+        offset = offset.permute(0, 2, 3, 1)
+        offset[~keep_mask, :] = 0
+        return offset
+
+    def _flip(self, x, dim=-1):
+        indices = [slice(None)] * x.dim()
+        indices[dim] = torch.arange(x.size(dim) - 1,
+                                    -1,
+                                    -1,
+                                    dtype=torch.long,
+                                    device=x.device)
+        return x[tuple(indices)]
+
+    def _flip_offset(self, x):
+        x = self._flip(x, dim=-1)
+        if len(x.shape) == 4:
+            return x[:, DTOffsetHelper.flipping_indices()]
+        else:
+            return x[DTOffsetHelper.flipping_indices()]
+
+    def _flip_inputs(self, inputs):
+
+        if self.size_mode == 'fix_size':
+            return [self._flip(x, -1) for x in inputs]
+        else:
+            return [[self._flip(x, -1) for x in xs] for xs in inputs]
+
+    def _flip_outputs(self, outputs):
+        funcs = [self._flip, self._flip_offset]
+        if self.size_mode == 'fix_size':
+            return [f(x) for f, x in zip(funcs, outputs)]
+        else:
+            return [[f(x) for x in xs] for f, xs in zip(funcs, outputs)]
+
+    def _tuple_sum(self, tup1, tup2, tup2_weight=1):
+        """
+        tup1 / tup2: tuple of tensors or tuple of list of tensors
+        """
+
+        if tup1 is None:
+            if self.size_mode == 'fix_size':
+                return [y * tup2_weight for y in tup2]
+            else:
+                return [[y * tup2_weight for y in ys] for ys in tup2]
+        else:
+            if self.size_mode == 'fix_size':
+                return [x + y * tup2_weight for x, y in zip(tup1, tup2)]
+            else:
+                return [[x + y * tup2_weight for x, y in zip(xs, ys)]
+                        for xs, ys in zip(tup1, tup2)]
+
+    def _scale_ss_inputs(self, inputs, scale):
+        n, c, h, w = inputs[0].shape
+        size = (int(h * scale), int(w * scale))
+        return [
+            F.interpolate(inputs[0],
+                          size=size,
+                          mode="bilinear",
+                          align_corners=True),
+        ], (h, w)
+
+    def sscrop_test(self, inputs, crop_size, scale=1):
+        '''
+        Currently, sscrop_test does not support diverse_size testing
+        '''
+        scaled_inputs = inputs
+        img = scaled_inputs[0]
+        n, c, h, w = img.size(0), img.size(1), img.size(2), img.size(3)
+        ori_h, ori_w = h, w
+        full_probs = [
+            torch.cuda.FloatTensor(n, dim, h, w).fill_(0) for dim in (2, 8)
+        ]
+        count_predictions = [
+            torch.cuda.FloatTensor(n, dim, h, w).fill_(0) for dim in (2, 8)
+        ]
+
+        crop_counter = 0
+
+        height_starts = self._decide_intersection(h, crop_size[0])
+        width_starts = self._decide_intersection(w, crop_size[1])
+
+        for height in height_starts:
+            for width in width_starts:
+                crop_inputs = [
+                    x[..., height:height + crop_size[0],
+                      width:width + crop_size[1]] for x in scaled_inputs
+                ]
+                prediction = self.ss_test(crop_inputs)
+
+                for j in range(2):
+                    count_predictions[j][:, :, height:height + crop_size[0],
+                                         width:width + crop_size[1]] += 1
+                    full_probs[j][:, :, height:height + crop_size[0],
+                                  width:width + crop_size[1]] += prediction[j]
+                crop_counter += 1
+                Log.info('predicting {:d}-th crop'.format(crop_counter))
+
+        for j in range(2):
+            full_probs[j] /= count_predictions[j]
+            full_probs[j] = F.interpolate(full_probs[j],
+                                          size=(ori_h, ori_w),
+                                          mode='bilinear',
+                                          align_corners=True)
+        return full_probs
+
+    def _scale_ss_outputs(self, outputs, size):
+        return [
+            F.interpolate(x, size=size, mode="bilinear", align_corners=True)
+            for x in outputs
+        ]
+
+    def ss_test(self, inputs, scale=1):
+        if self.size_mode == 'fix_size':
+
+            scaled_inputs, orig_size = self._scale_ss_inputs(inputs, scale)
+            print([x.shape for x in scaled_inputs])
+
+            start = timeit.default_timer()
+            outputs = list(self.seg_net.forward(*scaled_inputs))
+            if len(outputs) == 3:
+                outputs = (outputs[0], outputs[2])
+            else:
+                outputs[0] = F.softmax(outputs[0], dim=1)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            return self._scale_ss_outputs(outputs, orig_size)
+
+        else:
+            device_ids = self.configer.get('gpu')
+            replicas = nn.parallel.replicate(self.seg_net.module, device_ids)
+            scaled_inputs, ori_sizes, outputs = [], [], []
+
+            for *i, d in zip(*inputs, device_ids):
+                scaled_i, ori_size_i = self._scale_ss_inputs(
+                    [x.unsqueeze(0) for x in i], scale)
+                scaled_inputs.append(
+                    [x.cuda(d, non_blocking=True) for x in scaled_i])
+                ori_sizes.append(ori_size_i)
+
+            scaled_outputs = nn.parallel.parallel_apply(
+                replicas[:len(scaled_inputs)], scaled_inputs)
+
+            for o, ori_size in zip(scaled_outputs, ori_sizes):
+                o = self._scale_ss_outputs(o, ori_size)
+                if len(o) == 3:
+                    o = (o[0], o[2])
+                outputs.append([x.squeeze(0) for x in o])
+            outputs = list(map(list, zip(*outputs)))
+            return outputs
+
+    def _decide_intersection(self,
+                             total_length,
+                             crop_length,
+                             crop_stride_ratio=1 / 3):
+        stride = int(crop_length *
+                     crop_stride_ratio)  # set the stride as the paper do
+        times = (total_length - crop_length) // stride + 1
+        cropped_starting = []
+        for i in range(times):
+            cropped_starting.append(stride * i)
+
+        if total_length - cropped_starting[-1] > crop_length:
+            cropped_starting.append(total_length -
+                                    crop_length)  # must cover the total image
+
+        return cropped_starting
+
+
+
+
+class Tester(object):
+    def __init__(self, configer):
+        self.crop_size = configer.get('train',
+                                      'data_transformer')['input_size'][::-1]
+        val_trans_seq = [
+            x for x in configer.get('val_trans', 'trans_seq')
+            if 'random' not in x
+        ]
+        configer.update(('val_trans', 'trans_seq'), val_trans_seq)
+        configer.get('val', 'data_transformer')['input_size'] = configer.get(
+            'test', 'data_transformer').get('input_size', None)
+        configer.update(('train', 'data_transformer'),
+                        configer.get('val', 'data_transformer'))
+        configer.update(('val', 'batch_size'),
+                        int(os.environ.get('batch_size', 16)))
+        configer.update(('test', 'batch_size'),
+                        int(os.environ.get('batch_size', 16)))
+
+        self.save_dir = configer.get('test', 'out_dir')
+        self.dataset_name = configer.get('test', 'eval_set')
+        self.sscrop = configer.get('test', 'sscrop')
+
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.seg_data_loader = DataLoader(configer)
+        self.seg_net = None
+        self.test_loader = None
+        self.test_size = None
+        self.infer_time = 0
+        self.infer_cnt = 0
+        self._init_model()
+
+        pprint.pprint(configer.params_root)
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        assert self.dataset_name in ('train', 'val',
+                                     'test'), 'Cannot infer dataset name'
+
+        self.size_mode = self.configer.get(self.dataset_name,
+                                           'data_transformer')['size_mode']
+
+        if self.dataset_name != 'test':
+            self.test_loader = self.seg_data_loader.get_valloader(
+                self.dataset_name)
+        else:
+            self.test_loader = self.seg_data_loader.get_testloader(
+                self.dataset_name)
+        self.test_size = len(self.test_loader) * self.configer.get(
+            'val', 'batch_size')
+
+    def test(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        start_time = time.time()
+        image_id = 0
+
+        Log.info('save dir {}'.format(self.save_dir))
+        FileHelper.make_dirs(self.save_dir, is_file=False)
+
+        print('Total batches', len(self.test_loader))
+        for j, data_dict in enumerate(self.test_loader):
+            inputs = [data_dict['img']]
+            names = data_dict['name']
+            metas = data_dict['meta']
+
+            dest_dir = self.save_dir
+
+            with torch.no_grad():
+                offsets, logits = self.extract_offset(inputs)
+                print([x.shape for x in logits])
+                for k in range(len(inputs[0])):
+                    image_id += 1
+                    ori_img_size = metas[k]['ori_img_size']
+                    border_size = metas[k]['border_size']
+                    offset = offsets[k].squeeze().cpu().numpy()
+                    offset = cv2.resize(
+                        offset[:border_size[1], :border_size[0]],
+                        tuple(ori_img_size),
+                        interpolation=cv2.INTER_NEAREST)
+                    print(image_id)
+
+                    os.makedirs(dest_dir, exist_ok=True)
+
+                    if names[k].rpartition('.')[0]:
+                        dest_name = names[k].rpartition('.')[0] + '.mat'
+                    else:
+                        dest_name = names[k] + '.mat'
+                    dest_name = os.path.join(dest_dir, dest_name)
+                    print('Shape:', offset.shape, 'Saving to', dest_name)
+
+                    data_dict = {'mat': offset}
+
+                    scipy.io.savemat(dest_name, data_dict, do_compression=True)
+                    try:
+                        scipy.io.loadmat(dest_name)
+                    except Exception as e:
+                        print(e)
+                        scipy.io.savemat(dest_name,
+                                         data_dict,
+                                         do_compression=False)
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        Log.info('Test Time {batch_time.sum:.3f}s'.format(
+            batch_time=self.batch_time))
+
+    def extract_offset(self, inputs):
+        if self.sscrop:
+            outputs = self.sscrop_test(inputs, self.crop_size)
+        elif self.configer.get('test', 'mode') == 'ss_test':
+            outputs = self.ss_test(inputs)
+
+        offsets = []
+        logits = []
+
+        for mask_logits, dir_logits, img in zip(*outputs[:2], inputs[0]):
+            h, w = img.shape[1:]
+
+            mask_logits = F.interpolate(mask_logits.unsqueeze(0),
+                                        size=(h, w),
+                                        mode='bilinear',
+                                        align_corners=True)
+            dir_logits = F.interpolate(dir_logits.unsqueeze(0),
+                                       size=(h, w),
+                                       mode='bilinear',
+                                       align_corners=True)
+
+            logit = torch.softmax(dir_logits, dim=1)
+            zero_mask = mask_logits.argmax(dim=1, keepdim=True) == 0
+            logits.append(mask_logits[:, 1])
+
+            offset = self._get_offset(mask_logits, dir_logits)
+            offsets.append(offset)
+        print([x.shape for x in offsets])
+        return offsets, logits
+
+    def _get_offset(self, mask_logits, dir_logits):
+
+        edge_mask = mask_logits[:, 1] > 0.5
+        dir_logits = torch.softmax(dir_logits, dim=1)
+        n, _, h, w = dir_logits.shape
+
+        keep_mask = edge_mask
+
+        dir_label = torch.argmax(dir_logits, dim=1).float()
+        offset = DTOffsetHelper.label_to_vector(dir_label)
+        offset = offset.permute(0, 2, 3, 1)
+        offset[~keep_mask, :] = 0
+        return offset
+
+    def _flip(self, x, dim=-1):
+        indices = [slice(None)] * x.dim()
+        indices[dim] = torch.arange(x.size(dim) - 1,
+                                    -1,
+                                    -1,
+                                    dtype=torch.long,
+                                    device=x.device)
+        return x[tuple(indices)]
+
+    def _flip_offset(self, x):
+        x = self._flip(x, dim=-1)
+        if len(x.shape) == 4:
+            return x[:, DTOffsetHelper.flipping_indices()]
+        else:
+            return x[DTOffsetHelper.flipping_indices()]
+
+    def _flip_inputs(self, inputs):
+
+        if self.size_mode == 'fix_size':
+            return [self._flip(x, -1) for x in inputs]
+        else:
+            return [[self._flip(x, -1) for x in xs] for xs in inputs]
+
+    def _flip_outputs(self, outputs):
+        funcs = [self._flip, self._flip_offset]
+        if self.size_mode == 'fix_size':
+            return [f(x) for f, x in zip(funcs, outputs)]
+        else:
+            return [[f(x) for x in xs] for f, xs in zip(funcs, outputs)]
+
+    def _tuple_sum(self, tup1, tup2, tup2_weight=1):
+        """
+        tup1 / tup2: tuple of tensors or tuple of list of tensors
+        """
+
+        if tup1 is None:
+            if self.size_mode == 'fix_size':
+                return [y * tup2_weight for y in tup2]
+            else:
+                return [[y * tup2_weight for y in ys] for ys in tup2]
+        else:
+            if self.size_mode == 'fix_size':
+                return [x + y * tup2_weight for x, y in zip(tup1, tup2)]
+            else:
+                return [[x + y * tup2_weight for x, y in zip(xs, ys)]
+                        for xs, ys in zip(tup1, tup2)]
+
+    def _scale_ss_inputs(self, inputs, scale):
+        n, c, h, w = inputs[0].shape
+        size = (int(h * scale), int(w * scale))
+        return [
+            F.interpolate(inputs[0],
+                          size=size,
+                          mode="bilinear",
+                          align_corners=True),
+        ], (h, w)
+
+    def sscrop_test(self, inputs, crop_size, scale=1):
+        '''
+        Currently, sscrop_test does not support diverse_size testing
+        '''
+        scaled_inputs = inputs
+        img = scaled_inputs[0]
+        n, c, h, w = img.size(0), img.size(1), img.size(2), img.size(3)
+        ori_h, ori_w = h, w
+        full_probs = [
+            torch.cuda.FloatTensor(n, dim, h, w).fill_(0) for dim in (2, 8)
+        ]
+        count_predictions = [
+            torch.cuda.FloatTensor(n, dim, h, w).fill_(0) for dim in (2, 8)
+        ]
+
+        crop_counter = 0
+
+        height_starts = self._decide_intersection(h, crop_size[0])
+        width_starts = self._decide_intersection(w, crop_size[1])
+
+        for height in height_starts:
+            for width in width_starts:
+                crop_inputs = [
+                    x[..., height:height + crop_size[0],
+                      width:width + crop_size[1]] for x in scaled_inputs
+                ]
+                prediction = self.ss_test(crop_inputs)
+
+                for j in range(2):
+                    count_predictions[j][:, :, height:height + crop_size[0],
+                                         width:width + crop_size[1]] += 1
+                    full_probs[j][:, :, height:height + crop_size[0],
+                                  width:width + crop_size[1]] += prediction[j]
+                crop_counter += 1
+                Log.info('predicting {:d}-th crop'.format(crop_counter))
+
+        for j in range(2):
+            full_probs[j] /= count_predictions[j]
+            full_probs[j] = F.interpolate(full_probs[j],
+                                          size=(ori_h, ori_w),
+                                          mode='bilinear',
+                                          align_corners=True)
+        return full_probs
+
+    def _scale_ss_outputs(self, outputs, size):
+        return [
+            F.interpolate(x, size=size, mode="bilinear", align_corners=True)
+            for x in outputs
+        ]
+
+    def ss_test(self, inputs, scale=1):
+        if self.size_mode == 'fix_size':
+
+            scaled_inputs, orig_size = self._scale_ss_inputs(inputs, scale)
+            print([x.shape for x in scaled_inputs])
+
+            start = timeit.default_timer()
+            outputs = list(self.seg_net.forward(*scaled_inputs))
+            if len(outputs) == 3:
+                outputs = (outputs[0], outputs[2])
+            else:
+                outputs[0] = F.softmax(outputs[0], dim=1)
+            torch.cuda.synchronize()
+            end = timeit.default_timer()
+
+            return self._scale_ss_outputs(outputs, orig_size)
+
+        else:
+            device_ids = self.configer.get('gpu')
+            replicas = nn.parallel.replicate(self.seg_net.module, device_ids)
+            scaled_inputs, ori_sizes, outputs = [], [], []
+
+            for *i, d in zip(*inputs, device_ids):
+                scaled_i, ori_size_i = self._scale_ss_inputs(
+                    [x.unsqueeze(0) for x in i], scale)
+                scaled_inputs.append(
+                    [x.cuda(d, non_blocking=True) for x in scaled_i])
+                ori_sizes.append(ori_size_i)
+
+            scaled_outputs = nn.parallel.parallel_apply(
+                replicas[:len(scaled_inputs)], scaled_inputs)
+
+            for o, ori_size in zip(scaled_outputs, ori_sizes):
+                o = self._scale_ss_outputs(o, ori_size)
+                if len(o) == 3:
+                    o = (o[0], o[2])
+                outputs.append([x.squeeze(0) for x in o])
+            outputs = list(map(list, zip(*outputs)))
+            return outputs
+
+    def _decide_intersection(self,
+                             total_length,
+                             crop_length,
+                             crop_stride_ratio=1 / 3):
+        stride = int(crop_length *
+                     crop_stride_ratio)  # set the stride as the paper do
+        times = (total_length - crop_length) // stride + 1
+        cropped_starting = []
+        for i in range(times):
+            cropped_starting.append(stride * i)
+
+        if total_length - cropped_starting[-1] > crop_length:
+            cropped_starting.append(total_length -
+                                    crop_length)  # must cover the total image
+
+        return cropped_starting
+
+
+if __name__ == "__main__":
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/blob_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/blob_helper.py
new file mode 100644
index 0000000..e312aa4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/blob_helper.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You (youansheng@gmail.com)
+# Generate the inputs.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import pdb
+import numpy as np
+import torch
+
+from lib.utils.helpers.image_helper import ImageHelper
+from lib.datasets.tools.transforms import DeNormalize, ToTensor, Normalize
+
+
+class BlobHelper(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def make_input_batch(self, image_list, input_size=None, scale=1.0):
+        input_list = list()
+        for image in image_list:
+            input_list.append(self.make_input(image, input_size=input_size, scale=scale))
+
+        return torch.cat(input_list, 0)
+
+    def make_input(self, image=None, input_size=None,
+                   min_side_length=None, max_side_length=None, scale=None):
+        if input_size is not None and min_side_length is None and max_side_length is None:
+            if input_size[0] == -1 and input_size[1] == -1:
+                in_width, in_height = ImageHelper.get_size(image)
+
+            elif input_size[0] != -1 and input_size[1] != -1:
+                in_width, in_height = input_size
+
+            elif input_size[0] == -1 and input_size[1] != -1:
+                width, height = ImageHelper.get_size(image)
+                scale_ratio = input_size[1] / height
+                w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+                in_width, in_height = int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))
+
+            else:
+                assert input_size[0] != -1 and input_size[1] == -1
+                width, height = ImageHelper.get_size(image)
+                scale_ratio = input_size[0] / width
+                w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+                in_width, in_height = int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))
+
+        elif input_size is None and min_side_length is not None and max_side_length is None:
+            width, height = ImageHelper.get_size(image)
+            scale_ratio = min_side_length / min(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            in_width, in_height = int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))
+
+        elif input_size is None and min_side_length is None and max_side_length is not None:
+            width, height = ImageHelper.get_size(image)
+            scale_ratio = max_side_length / max(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            in_width, in_height = int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))
+
+        elif input_size is None and min_side_length is not None and max_side_length is not None:
+            width, height = ImageHelper.get_size(image)
+            scale_ratio = min_side_length / min(width, height)
+            bound_scale_ratio = max_side_length / max(width, height)
+            scale_ratio = min(scale_ratio, bound_scale_ratio)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            in_width, in_height = int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio))
+
+        else:
+            in_width, in_height = ImageHelper.get_size(image)
+
+        image = ImageHelper.resize(image, (int(in_width * scale), int(in_height * scale)), interpolation='cubic')
+        img_tensor = ToTensor()(image)
+        img_tensor = Normalize(div_value=self.configer.get('normalize', 'div_value'),
+                               mean=self.configer.get('normalize', 'mean'),
+                               std=self.configer.get('normalize', 'std'))(img_tensor)
+        # img_tensor = img_tensor.unsqueeze(0).to(torch.device('cpu' if self.configer.get('gpu') is None else 'cuda'))
+        img_tensor = img_tensor.unsqueeze(0)
+        img_tensor = img_tensor.cuda()
+        return img_tensor
+
+    def tensor2bgr(self, tensor):
+        assert len(tensor.size()) == 3
+
+        ori_img = DeNormalize(div_value=self.configer.get('normalize', 'div_value'),
+                              mean=self.configer.get('normalize', 'mean'),
+                              std=self.configer.get('normalize', 'std'))(tensor.cpu())
+        ori_img = ori_img.numpy().transpose(1, 2, 0).astype(np.uint8)
+
+        if self.configer.get('data', 'input_mode') == 'BGR':
+            return ori_img
+        else:
+            image_bgr = cv2.cvtColor(ori_img, cv2.COLOR_RGB2BGR)
+            return image_bgr
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/cost_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/cost_helper.py
new file mode 100644
index 0000000..cc489a0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/cost_helper.py
@@ -0,0 +1,148 @@
+import math
+
+def cost_conv(H, W, Cin, Cout, kernel_size, bias=True, stride=1, padding=0, dilation=1):
+    flatten_ksize = (kernel_size - 1) * dilation + 1
+    oh = (H + padding - flatten_ksize//2) // stride
+    ow = (W + padding - flatten_ksize//2) // stride
+    bias_cost = 1 if bias else 0
+    cost = oh * ow * (Cin * kernel_size**2 * 2 + bias_cost) * Cout
+    return cost
+
+def cost_affinity(N, C, num_subsample=None):
+    if num_subsample is None:
+        num_subsample = N
+    return N * num_subsample * C * 2
+
+
+def cost_ia(H, W, CI, CK, CV, CO, P=8):
+    dh = dw = P
+    cw = cost_conv(H, W, CV, CO, 1)
+    cqkv = cost_conv(H, W, CI, CK, 1) * 2 + cost_conv(H, W, CI, CV, 1)
+    cw *= 2     # long and short
+    cqkv *= 2
+    oh, ow = math.ceil(H / dh), math.ceil(W / dw)
+    caffinity = cost_affinity(oh * ow, CK) * dh * dw + cost_affinity(dh * dw, CK) * oh * ow
+    caggregate = cost_affinity(oh * ow, CV) * dh * dw + cost_affinity(dh * dw, CV) * oh * ow
+    c_skip = cost_conv(H, W, CI + CO, CO, 1)
+    c_ia = (cw + cqkv + caffinity + caggregate + c_skip)
+    return c_ia
+
+def cost_sa(H, W, CI, CK, CV, CO):
+    cqkv = cost_conv(H, W, CI, CK, 1) * 2 + cost_conv(H, W, CI, CV, 1)
+    cw = cost_conv(H, W, CV, CO, 1)
+    caffinity = cost_affinity(H * W, CK)
+    caggregate = cost_affinity(H * W, CV)
+    c_skip = cost_conv(H, W, CI + CO, CO, 1)
+    c_nl = (cw + cqkv + caffinity + caggregate + c_skip)
+    return c_nl
+
+
+def cost_rcca(H, W, CI, CInter, CK, CV, CO):
+    c_conva = cost_conv(H, W, 2048, CInter, 3, padding=1, bias=False)
+
+    cqkv = cost_conv(H, W, CInter, CK, 1) * 2 + cost_conv(H, W, CInter, CV, 1)
+    caffinity = cost_affinity(H*W, CK, num_subsample=(H+W-1))
+    caggregate = cost_affinity(H*W, CV, num_subsample=(H+W-1))
+
+    c_convb = cost_conv(H, W, CO, CO, 3, padding=1, bias=False)
+
+    c_bottleneck = cost_conv(H, W, CI + CO, CO, 3, padding=1, bias=False)
+
+    c_cc = ((cqkv + caffinity + caggregate) * 2 + c_conva + c_convb + c_bottleneck)
+
+    return c_cc
+
+
+def cost_double_attention(H, W, CI, factor, global_cnt):
+    c_gather_dist = (cost_conv(H, W, CI, global_cnt, 1) + cost_conv(H, W, global_cnt, global_cnt, 1)) * 2
+    c_down_up = cost_conv(H, W, CI, CI // factor, 1) * 2
+
+    c_mul_gather = global_cnt * H * W * (CI // factor)
+    c_mul_dist = global_cnt * H * W * (CI // factor)
+
+    c_skip = cost_conv(H, W, CI + CI, CI, 1)
+    c_da = (c_gather_dist + c_down_up + c_mul_gather + c_mul_dist + c_skip)
+    return c_da
+
+
+def cost_ocr(H, W, CI, CK, CV, CO, global_cnt):
+    c_gather = cost_affinity(H * W, CI, 19)
+
+    c_down_up = cost_conv(H, W, CI, CK, 1) * 2
+
+    c_pixel = cost_conv(H, W, CI, CK, 1)
+    c_region = cost_conv(1, global_cnt, CI, CK, 1)
+
+    c_affinity = cost_affinity(H*W, CK, global_cnt)
+
+    c_value = global_cnt * H * W * CK
+
+    c_skip = cost_conv(H, W, CI + CO, CO, 1)
+    c_ocr = (c_gather + c_down_up + c_pixel + c_region + c_affinity +c_value + c_skip)
+    return c_ocr
+
+
+def cost_ppm(H, W, CI, factor=4):
+    c_pool = cost_conv(1, 1, CI, CI // factor, 1) + \
+        cost_conv(2, 2, CI, CI // factor, 1) + \
+            cost_conv(3, 3, CI, CI // factor, 1) + \
+                cost_conv(6, 6, CI, CI // factor, 1)
+
+    c_fuse = cost_conv(H, W, CI * 2, CI // factor, 3, padding=1)
+
+    c_ppm = c_pool + c_fuse
+    return c_ppm
+
+def cost_aspp(H, W, CI, factor=8):
+    c_pool = cost_conv(1, 1, CI, CI // factor, 1)
+    c_conv1x1 = cost_conv(H, W, CI, CI // factor, 1)
+    c_conv3x3 = cost_conv(H, W, CI, CI // factor, 3, padding=12, dilation=12) + \
+        cost_conv(H, W, CI, CI // factor, 3, padding=24, dilation=24) + \
+            cost_conv(H, W, CI, CI // factor, 3, padding=36, dilation=36)
+
+    c_fuse = cost_conv(H, W, 5 * CI // factor, CI // factor, 1)
+    c_aspp = c_pool + c_conv1x1 + c_conv3x3 + c_fuse
+    return c_aspp
+
+
+if __name__ == "__main__":
+    H, W, C = 128, 128, 512
+
+    ##### Conv 3 x 3 
+    c_conv = cost_conv(H, W, 2048, 512, 3, padding=1)
+    print("GFLOPs of 3x3 conv: {:.1f}".format(c_conv / 1000**3))
+
+    ##### DA
+    CI, factor, global_cnt = C, 2, 64
+    c_da = cost_double_attention(H, W, CI, factor, global_cnt) + cost_conv(H, W, 2048, 512, 3, padding=1)
+    print("GFLOPs of double-attention: {:.1f}".format(c_da / 1000**3))
+
+    ##### OCR
+    CI, CK, CV, CO, global_cnt = C, C // 2, C // 2, C, 19
+    c_ocr = cost_ocr(H, W, CI, CK, CV, CO, global_cnt) + cost_conv(H, W, 2048, 512, 3, padding=1)
+    print("GFLOPs of ocr: {:.1f}".format(c_ocr / 1000**3))
+
+    ##### ASPP
+    CI, factor = 2048, 8
+    c_aspp = cost_aspp(H, W, CI, factor)
+    print("GFLOPs of ASPP: {:.1f}".format(c_aspp / 1000**3))
+
+    ##### PPM
+    CI, factor = 2048, 4
+    c_ppm = cost_ppm(H, W, CI, factor)
+    print("GFLOPs of PPM: {:.1f}".format(c_ppm / 1000**3))
+
+    ##### SA
+    CI, CK, CV, CO = C, C // 2, C//2, C
+    c_sa = cost_sa(H, W, CI, CK, CV, CO) + cost_conv(H, W, 2048, 512, 3, padding=1)
+    print("GFLOPs of SA: {:.1f}".format(c_sa / 1000**3))
+
+    ##### IA
+    CI, CK, CV, CO = C, C // 2, C, C
+    c_ia = cost_ia(H, W, CI, CK, CV, CO, P=8) + cost_conv(H, W, 2048, 512, 3, padding=1)
+    print("GFLOPs of IA: {:.1f}".format(c_ia / 1000**3))
+
+    ##### CC
+    CI, CK, CV, CO = 512, 64, 512, 512
+    c_cc = cost_rcca(H, W, 2048, CI, CK, CV, CO)
+    print("GFLOPs of CC: {:.1f}".format(c_cc / 1000**3))
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/data_helper.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/data_helper.py
new file mode 100644
index 0000000..4a8b808
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/data_helper.py
@@ -0,0 +1,150 @@
+import os
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+
+from models.protoseg_core.lib.utils.distributed import is_distributed
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+def _get_list_from_env(name):
+
+    value = os.environ.get(name)
+    if value is None:
+        return None
+
+    return [x.strip() for x in value.split(',')]
+
+
+class DataHelper:
+
+    def __init__(self, configer, trainer):
+        self.configer = configer
+        self.trainer = trainer
+        self.conditions = configer.conditions
+
+    def input_keys(self):
+        env_value = _get_list_from_env('input_keys')
+        if env_value is not None:
+            inputs = env_value
+        elif self.conditions.use_sw_offset:
+            inputs = ['img', 'offsetmap_h', 'offsetmap_w']
+        elif self.conditions.use_dt_offset:
+            inputs = ['img', 'distance_map', 'angle_map']
+        else:
+            inputs = ['img']
+
+        return inputs
+
+    def target_keys(self):
+
+        env_value = _get_list_from_env('target_keys')
+        if env_value is not None:
+            return env_value
+        elif self.conditions.pred_sw_offset:
+            targets = [
+                'labelmap',
+                'offsetmap_h',
+                'offsetmap_w',
+            ]
+        elif self.conditions.pred_dt_offset:
+            targets = [
+                'labelmap',
+                'distance_map',
+                'angle_map',
+            ]
+        elif self.conditions.pred_ml_dt_offset:
+            targets = [
+                'labelmap',
+                'distance_map',
+                'multi_label_direction_map',
+            ]
+        else:
+            targets = ['labelmap']
+
+        return targets
+
+    def _reverse_data_dict(self, data_dict):
+        result = {}
+        for k, x in data_dict.items():
+
+            if not isinstance(x, torch.Tensor):
+                result[k] = x
+                continue
+
+            new_x = torch.flip(x, [len(x.shape) - 1])
+
+            # since direction_label_map, direction_multilabel_map will not appear in inputs, we omit the flipping
+            if k == 'offsetmap_w':
+                new_x = -new_x
+            elif k == 'angle_map':
+                new_x = x.clone()
+                mask = (x > 0) & (x < 180)
+                new_x[mask] = 180 - x[mask]
+                mask = (x < 0) & (x > -180)
+                new_x[mask] = - (180 + x[mask])
+
+            result[k] = new_x
+
+        return result
+
+    def _prepare_sequence(self, seq, force_list=False):
+
+        def split_and_cuda(lst: 'List[List[Tensor, len=N]]', device_ids) -> 'List[List[Tensor], len=N]':
+            results = []
+            for *items, d in zip(*lst, device_ids):
+                if len(items) == 1 and not force_list:
+                    results.append(items[0].unsqueeze(0).cuda(d))
+                else:
+                    results.append([
+                        item.unsqueeze(0).cuda(d)
+                        for item in items
+                    ])
+            return results
+
+        if self.conditions.diverse_size and not self.trainer.seg_net.training:
+
+            if is_distributed():
+                assert len(seq) == 1
+                seq = [x.unsqueeze(0) for x in seq[0]]
+                return self.trainer.module_runner.to_device(*seq, force_list=force_list)
+
+            device_ids = list(range(len(self.configer.get('gpu'))))
+            return split_and_cuda(seq, device_ids)
+        else:
+            return self.trainer.module_runner.to_device(*seq, force_list=force_list)
+
+    def prepare_data(self, data_dict, want_reverse=False):
+
+        input_keys, target_keys = self.input_keys(), self.target_keys()
+
+        if self.conditions.use_ground_truth:
+            input_keys += target_keys
+
+        Log.info_once('Input keys: {}'.format(input_keys))
+        Log.info_once('Target keys: {}'.format(target_keys))
+
+        inputs = [data_dict[k] for k in input_keys]
+        batch_size = len(inputs[0])
+        targets = [data_dict[k] for k in target_keys]
+
+        sequences = [
+            self._prepare_sequence(inputs, force_list=True),
+            self._prepare_sequence(targets, force_list=False)
+        ]
+        if want_reverse:
+            rev_data_dict = self._reverse_data_dict(data_dict)
+            sequences.extend([
+                self._prepare_sequence(
+                    [rev_data_dict[k] for k in input_keys],
+                    force_list=True
+                ),
+                self._prepare_sequence(
+                    [rev_data_dict[k] for k in target_keys],
+                    force_list=False
+                )
+            ])
+
+        return sequences, batch_size
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/__init__.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/__init__.py
new file mode 100644
index 0000000..aa65fdc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/__init__.py
@@ -0,0 +1,19 @@
+import os
+
+from lib.utils.tools.logger import Logger as Log
+from . import standard
+
+evaluators = {
+    'standard': standard.StandardEvaluator
+}
+
+
+def get_evaluator(configer, trainer, name=None):
+    name = os.environ.get('evaluator', 'standard')
+
+    if not name in evaluators:
+        raise RuntimeError('Unknown evaluator name: {}'.format(name))
+    klass = evaluators[name]
+    Log.info('Using evaluator: {}'.format(klass.__name__))
+
+    return klass(configer, trainer)
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/base.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/base.py
new file mode 100644
index 0000000..fa9366a
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/base.py
@@ -0,0 +1,95 @@
+import os
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+
+from lib.utils.tools.logger import Logger as Log
+from lib.metrics import running_score as rslib
+from lib.metrics import F1_running_score as fscore_rslib
+from lib.utils.distributed import get_world_size, get_rank, is_distributed
+
+
+class _BaseEvaluator:
+
+    def __init__(self, configer, trainer):
+        self.configer = configer
+        self.trainer = trainer
+        self._init_running_scores()
+        self.conditions = configer.conditions
+
+    def use_me(self):
+        raise NotImplementedError
+
+    def _init_running_scores(self):
+        raise NotImplementedError
+
+    def update_score(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def print_scores(self, show_miou=True):
+        for key, rs in self.running_scores.items():
+            Log.info('Result for {}'.format(key))
+            if isinstance(rs, fscore_rslib.F1RunningScore):
+                FScore, FScore_cls = rs.get_scores()
+                Log.info('Mean FScore: {}'.format(FScore))
+                Log.info(
+                    'Class-wise FScore: {}'.format(
+                        ', '.join(
+                            '{:.3f}'.format(x)
+                            for x in FScore_cls
+                        )
+                    )
+                )
+            elif isinstance(rs, rslib.SimpleCounterRunningScore):
+                Log.info('ACC: {}\n'.format(rs.get_mean_acc()))
+            else:
+                if show_miou and hasattr(rs, 'get_mean_iou'):
+                    Log.info('Mean IOU: {}\n'.format(rs.get_mean_iou()))
+                Log.info('Pixel ACC: {}\n'.format(rs.get_pixel_acc()))
+
+                if hasattr(rs, 'n_classes') and rs.n_classes == 2:
+                    Log.info(
+                        'F1 Score: {} Precision: {} Recall: {}\n'
+                        .format(*rs.get_F1_score())
+                    )
+
+    def prepare_validaton(self):
+        """
+        Replicate models if using diverse size validation.
+        """
+        if is_distributed():
+            return
+        device_ids = list(range(len(self.configer.get('gpu'))))
+        if self.conditions.diverse_size:
+            cudnn.benchmark = False
+            assert self.configer.get('val', 'batch_size') <= len(device_ids)
+            replicas = nn.parallel.replicate(
+                self.trainer.seg_net.module, device_ids)
+            return replicas
+
+    def update_performance(self):
+
+        try:
+            rs = self.running_scores[self.save_net_main_key]
+            if self.save_net_metric == 'miou':
+                perf = rs.get_mean_iou()
+            elif self.save_net_metric == 'acc':
+                perf = rs.get_pixel_acc()
+
+            max_perf = self.configer.get('max_performance')
+            self.configer.update(['performance'], perf)
+            if perf > max_perf and (not is_distributed() or get_rank() == 0):
+                Log.info('Performance {} -> {}'.format(max_perf, perf))
+        except Exception as e:
+            Log.warn(e)
+
+    def reset(self):
+        for rs in self.running_scores.values():
+            rs.reset()
+
+    def reduce_scores(self):
+        for rs in self.running_scores.values():
+            if hasattr(rs, 'reduce_scores'):
+                rs.reduce_scores()
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/standard.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/standard.py
new file mode 100644
index 0000000..1e8ab3f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/standard.py
@@ -0,0 +1,103 @@
+import os
+
+import cv2
+import pdb
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+from collections import Counter
+
+from lib.utils.tools.logger import Logger as Log
+from .base import _BaseEvaluator
+from . import tasks
+
+def _parse_output_spec(spec):
+    """
+    Parse string like "mask, _, dir, ..., seg" into indices mapping
+    {
+        "mask": 0,
+        "dir": 2,
+        "seg": -1
+    }
+    """
+    spec = [x.strip() for x in spec.split(',')]
+    existing_task_names = set(tasks.task_mapping)
+
+    # `spec` should not have invalid keys other than in `existing_task_names`
+    assert set(spec) - ({'...', '_'} | existing_task_names) == set()
+    # `spec` should have at least one key in `existing_task_names`
+    assert set(spec) & existing_task_names != set()
+
+    counter = Counter(spec)
+    for task in tasks.task_mapping.values():
+        task.validate_output_spec(spec, counter)
+    assert counter['...'] <= 1
+
+    length = len(spec)
+    output_indices = {}
+    negative_index = False
+    for idx, name in enumerate(spec):
+        if name not in ['_', '...']:
+            index = idx - length if negative_index else idx
+            output_indices[name] = index
+        elif name == '...':
+            negative_index = True
+
+    return output_indices
+
+
+class StandardEvaluator(_BaseEvaluator):
+
+    def _output_spec(self):
+        if self.configer.conditions.pred_dt_offset:
+            default_spec = 'mask, dir'
+        elif self.configer.conditions.pred_ml_dt_offset:
+            default_spec = 'mask, ml_dir'
+        else:
+            default_spec = '..., seg'
+
+        return os.environ.get('output_spec', default_spec)
+
+    def _init_running_scores(self):
+        self.output_indices = _parse_output_spec(self._output_spec())
+
+        self.running_scores = {}
+        for task in tasks.task_mapping.values():
+            rss, main_key, metric = task.running_score(self.output_indices, self.configer)
+            if rss is None:
+                continue
+            self.running_scores.update(rss)
+            self.save_net_main_key = main_key
+            self.save_net_metric = metric
+
+    def update_score(self, outputs, metas):
+        if isinstance(outputs, torch.Tensor):
+            outputs = [outputs]
+            
+        for i in range(len(outputs[0])):
+
+            ori_img_size = metas[i]['ori_img_size']
+            border_size = metas[i]['border_size']
+
+            outputs_numpy = {}
+            for name, idx in self.output_indices.items():
+                item = outputs[idx].permute(0, 2, 3, 1)
+                if self.configer.get('dataset') == 'celeba':
+                    # the celeba image is of size 1024x1024
+                    item = cv2.resize(
+                        item[i, :border_size[1], :border_size[0]].cpu().numpy(),
+                        tuple(x // 2 for x in ori_img_size), interpolation=cv2.INTER_CUBIC
+                    )
+                else:
+                    item = cv2.resize(
+                        item[i, :border_size[1], :border_size[0]].cpu().numpy(),
+                        tuple(ori_img_size), interpolation=cv2.INTER_CUBIC
+                    )
+                outputs_numpy[name] = item
+
+            for name in outputs_numpy:
+                tasks.task_mapping[name].eval(
+                    outputs_numpy, metas[i], self.running_scores
+                )
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/tasks.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/tasks.py
new file mode 100644
index 0000000..636289d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/evaluator/tasks.py
@@ -0,0 +1,246 @@
+import os
+import pdb
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+from collections import Counter
+
+from lib.metrics import running_score as rslib
+from lib.metrics import F1_running_score as fscore_rslib
+from lib.utils.tools.logger import Logger as Log
+from lib.utils.helpers.offset_helper import DTOffsetConfig, DTOffsetHelper
+from .base import _BaseEvaluator
+
+
+def _sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+class SegTask:
+    name = 'seg'
+
+    @staticmethod
+    def validate_output_spec(spec, spec_counter):
+        assert spec_counter['seg'] <= 1
+
+    @staticmethod
+    def running_score(spec, configer):
+        if 'seg' not in spec:
+            return (None, None, None)
+
+        return (
+            {'seg': rslib.RunningScore(configer)},
+            'seg',
+            'miou'
+        )
+
+    @staticmethod
+    def eval(outputs, meta, running_scores):
+        ori_target = meta['ori_target']
+        labelmap = np.argmax(outputs['seg'], axis=-1)
+        running_scores['seg'].update(labelmap[None], ori_target[None])
+
+
+class MaskTask:
+    name = 'mask'
+
+    @staticmethod
+    def validate_output_spec(spec, spec_counter):
+        assert spec_counter['mask'] <= 1
+
+    @staticmethod
+    def running_score(spec, configer):
+        if 'mask' not in spec:
+            return (None, None, None)
+
+        return (
+            {
+                'mask': rslib.RunningScore(
+                    configer, num_classes=2, ignore_index=-1
+                )
+            },
+            'mask',
+            'acc'
+        )
+
+    @staticmethod
+    def get_mask_pred(x):
+        if x.ndim == 2:
+            pred = _sigmoid(x) > 0.5
+        else:
+            pred = np.argmax(x, axis=-1)
+
+        return pred.astype(np.int)
+
+    @staticmethod
+    def eval(outputs, meta, running_scores):
+        distance_map = meta['ori_distance_map']
+        seg_label_map = meta['ori_target']
+        gt_mask_label = DTOffsetHelper.distance_to_mask_label(
+            distance_map,
+            seg_label_map,
+            return_tensor=False
+        )
+        mask_pred = MaskTask.get_mask_pred(outputs['mask'])
+        running_scores['mask'].update(
+            (mask_pred == 1).astype(np.int)[None],
+            gt_mask_label[None]
+        )
+
+
+class DirectionTask:
+    name = 'dir'
+
+    @staticmethod
+    def validate_output_spec(spec, spec_counter):
+        assert spec_counter['dir'] == 0 or (
+            spec_counter['dir'] == 1 and spec_counter['mask'] == 1
+        )
+
+    @staticmethod
+    def running_score(spec, configer):
+        if 'dir' not in spec:
+            return (None, None, None)
+
+        return (
+            {
+                'dir (mask)': rslib.RunningScore(configer, num_classes=DTOffsetConfig.num_classes, ignore_index=-1),
+                'dir (GT)': rslib.RunningScore(configer, num_classes=DTOffsetConfig.num_classes + 1, ignore_index=-1),
+            },
+            'dir (GT)',
+            'acc'
+        )
+
+    @staticmethod
+    def eval(outputs, meta, running_scores):
+        distance_map = meta['ori_distance_map']
+        angle_map = meta['ori_angle_map']
+        seg_label_map = meta['ori_target']
+
+        mask_pred = MaskTask.get_mask_pred(outputs['mask'])
+        dir_pred = np.argmax(outputs['dir'], axis=-1)
+
+        gt_mask_label = DTOffsetHelper.distance_to_mask_label(
+            distance_map,
+            seg_label_map,
+            return_tensor=False
+        )
+        gt_dir_label = DTOffsetHelper.angle_to_direction_label(
+            angle_map,
+            seg_label_map=seg_label_map,
+            extra_ignore_mask=mask_pred != 1
+        )
+
+        running_scores['dir (mask)'].update(
+            dir_pred[None], gt_dir_label[None]
+        )
+
+        dir_pred[mask_pred != 1] = DTOffsetConfig.num_classes
+        gt_dir_label = DTOffsetHelper.angle_to_direction_label(
+            angle_map,
+            seg_label_map=seg_label_map,
+            extra_ignore_mask=(gt_mask_label == 0),
+        )
+        running_scores['dir (GT)'].update(
+            dir_pred[None], gt_dir_label[None]
+        )
+
+
+class MLDirectionTask:
+    name = 'ml_dir'
+
+    @staticmethod
+    def validate_output_spec(spec, spec_counter):
+        assert spec_counter['ml_dir'] == 0 or (
+            spec_counter['ml_dir'] == 1 and spec_counter['mask'] == 1
+        )
+
+    @staticmethod
+    def running_score(spec, configer):
+        if 'ml_dir' not in spec:
+            return (None, None, None)
+
+        return (
+            {
+                'ML dir (mask)': rslib.MultiLabelRunningScore(),
+                'ML dir (GT)': rslib.MultiLabelRunningScore(),
+            },
+            'ML dir (GT)',
+            'acc'
+        )
+
+    @staticmethod
+    def _get_multilabel_prediction(dir_logits, no_offset_mask=None, topk=8):
+        h, w, _ = dir_logits.shape
+        dir_logits = torch.from_numpy(
+            dir_logits
+        ).unsqueeze(0).permute(0, 3, 1, 2)
+        offsets = []
+        if topk == dir_logits.shape[1]:
+            for i in range(topk):
+                offset_i = DTOffsetHelper.label_to_vector(
+                    torch.tensor([i]).view(1, 1, 1)
+                ).repeat(1, 1, h, w)
+                offset_i = offset_i.float() * dir_logits[:, i:i+1, :, :]
+                offsets.append(offset_i)
+        else:
+            dir_logits, dir_pred = torch.topk(dir_logits, topk, dim=1)
+            for i in range(topk):
+                dir_pred_i = dir_pred[:, i, :, :]
+                offset_i = DTOffsetHelper.label_to_vector(dir_pred_i)
+                offset_i = offset_i.float() * dir_logits[:, i:i+1, :, :]
+                offsets.append(offset_i)
+
+        offset = sum(offsets)
+        dir_pred = DTOffsetHelper.vector_to_label(
+            offset.permute(0, 2, 3, 1),
+            num_classes=8,
+            return_tensor=True
+        )
+
+        dir_pred = dir_pred.squeeze(0).numpy()
+
+        if no_offset_mask is not None:
+            dir_pred[no_offset_mask] = 8
+
+        return dir_pred
+
+    @staticmethod
+    def eval(outputs, meta, running_scores):
+        distance_map = meta['ori_distance_map']
+        seg_label_map = meta['ori_target']
+        dir_label_map = meta['ori_multi_label_direction_map']
+        dir_label_map = DTOffsetHelper.encode_multi_labels(dir_label_map)
+        dir_label_map[seg_label_map == -1, :] = -1
+        gt_mask_label = DTOffsetHelper.distance_to_mask_label(
+            distance_map,
+            seg_label_map,
+            return_tensor=False
+        )
+
+        mask_pred = MaskTask.get_mask_pred(outputs['mask'])
+        dir_pred = MLDirectionTask._get_multilabel_prediction(
+            outputs['ml_dir'],
+            no_offset_mask=mask_pred == 0,
+            topk=8
+        )
+
+        running_scores['ML dir (mask)'].update(
+            dir_pred, dir_label_map,
+            (mask_pred == 1) & (seg_label_map != -1)
+        )
+        running_scores['ML dir (GT)'].update(
+            dir_pred, dir_label_map,
+            gt_mask_label == 1
+        )
+
+
+task_mapping = {task.name: task for task in [
+    MaskTask,
+    SegTask,
+    DirectionTask,
+    MLDirectionTask,
+]}
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/module_runner.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/module_runner.py
new file mode 100644
index 0000000..ff5d6ea
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/module_runner.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com)
+# Some methods used by main methods.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import os
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+from torch.nn.parallel.scatter_gather import gather as torch_gather
+from torch.nn.functional import interpolate
+
+from models.protoseg_core.lib.extensions.parallel.data_parallel import DataParallelModel
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.utils.distributed import get_rank, is_distributed
+
+
+class ModuleRunner(object):
+
+    def __init__(self, configer):
+        self.configer = configer
+        self._init()
+
+    def _init(self):
+        self.configer.add(['iters'], 0)
+        self.configer.add(['last_iters'], 0)
+        self.configer.add(['epoch'], 0)
+        self.configer.add(['last_epoch'], 0)
+        self.configer.add(['max_performance'], 0.0)
+        self.configer.add(['performance'], 0.0)
+        self.configer.add(['min_val_loss'], 9999.0)
+        self.configer.add(['val_loss'], 9999.0)
+        if not self.configer.exists('network', 'bn_type'):
+            self.configer.add(['network', 'bn_type'], 'torchbn')
+
+        # if self.configer.get('phase') == 'train':
+        #     assert len(self.configer.get('gpu')) > 1 or self.configer.get('network', 'bn_type') == 'torchbn'
+
+        Log.info('BN Type is {}.'.format(self.configer.get('network', 'bn_type')))
+
+    def to_device(self, *params, force_list=False):
+        if is_distributed():
+            device = torch.device('cuda:{}'.format(get_rank()))
+        else:
+            device = torch.device('cpu' if self.configer.get('gpu') is None else 'cuda')
+        return_list = list()
+        for i in range(len(params)):
+            return_list.append(params[i].to(device))
+
+        if force_list:
+            return return_list
+        else:
+            return return_list[0] if len(params) == 1 else return_list
+
+    def _make_parallel(self, net):
+        if is_distributed():
+            local_rank = get_rank()
+
+            return torch.nn.parallel.DistributedDataParallel(
+                net,
+                device_ids=[local_rank],
+                output_device=local_rank,
+                find_unused_parameters=True
+            )
+
+        if len(self.configer.get('gpu')) == 1:
+            self.configer.update(['network', 'gathered'], True)
+
+        return DataParallelModel(net, gather_=self.configer.get('network', 'gathered'))
+
+    def load_net(self, net):
+        net = self.to_device(net)
+        net = self._make_parallel(net)
+
+        if not is_distributed():
+            net = net.to(torch.device('cpu' if self.configer.get('gpu') is None else 'cuda'))
+
+        net.float()
+        if self.configer.get('network', 'resume') is not None:
+            Log.info('Loading checkpoint from {}...'.format(self.configer.get('network', 'resume')))
+            resume_dict = torch.load(self.configer.get('network', 'resume'), map_location=lambda storage, loc: storage)
+            if 'state_dict' in resume_dict:
+                checkpoint_dict = resume_dict['state_dict']
+
+            elif 'model' in resume_dict:
+                checkpoint_dict = resume_dict['model']
+
+            elif isinstance(resume_dict, OrderedDict):
+                checkpoint_dict = resume_dict
+
+            else:
+                raise RuntimeError(
+                    'No state_dict found in checkpoint file {}'.format(self.configer.get('network', 'resume')))
+
+            if list(checkpoint_dict.keys())[0].startswith('module.'):
+                checkpoint_dict = {k[7:]: v for k, v in checkpoint_dict.items()}
+
+            # load state_dict
+            if hasattr(net, 'module'):
+                self.load_state_dict(net.module, checkpoint_dict, self.configer.get('network', 'resume_strict'))
+            else:
+                self.load_state_dict(net, checkpoint_dict, self.configer.get('network', 'resume_strict'))
+
+            data_dir = self.configer.get('data', 'data_dir')
+            resume_dict['config_dict']['data']['data_dir'] = data_dir
+
+            if self.configer.get('network', 'resume_continue'):
+                self.configer.resume(resume_dict['config_dict'])
+
+            Log.info(resume_dict['config_dict'])
+
+        return net
+
+    @staticmethod
+    def load_state_dict(module, state_dict, strict=False):
+        """Load state_dict to a module.
+        This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+        Default value for ``strict`` is set to ``False`` and the message for
+        param mismatch will be shown even if strict is False.
+        Args:
+            module (Module): Module that receives the state_dict.
+            state_dict (OrderedDict): Weights.
+            strict (bool): whether to strictly enforce that the keys
+                in :attr:`state_dict` match the keys returned by this module's
+                :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+        """
+        unexpected_keys = []
+        own_state = module.state_dict()
+        for name, param in state_dict.items():
+            if name not in own_state:
+                unexpected_keys.append(name)
+                continue
+            if isinstance(param, torch.nn.Parameter):
+                # backwards compatibility for serialized parameters
+                param = param.data
+
+            try:
+                own_state[name].copy_(param)
+            except Exception:
+                Log.warn('While copying the parameter named {}, '
+                                   'whose dimensions in the model are {} and '
+                                   'whose dimensions in the checkpoint are {}.'
+                                   .format(name, own_state[name].size(),
+                                           param.size()))
+                
+        missing_keys = set(own_state.keys()) - set(state_dict.keys())
+
+        err_msg = []
+        if unexpected_keys:
+            err_msg.append('unexpected key in source state_dict: {}\n'.format(', '.join(unexpected_keys)))
+        if missing_keys:
+            # we comment this to fine-tune the models with some missing keys.
+            err_msg.append('missing keys in source state_dict: {}\n'.format(', '.join(missing_keys)))
+        err_msg = '\n'.join(err_msg)
+        if err_msg:
+            if strict:
+                raise RuntimeError(err_msg)
+            else:
+                Log.warn(err_msg)
+
+    def save_net(self, net, save_mode='iters', experiment=None):
+        if is_distributed() and get_rank() != 0:
+            return
+
+        state = {
+            'config_dict': self.configer.to_dict(),
+            'state_dict': net.state_dict(),
+        }
+        if self.configer.get('checkpoints', 'checkpoints_root') is None:
+            checkpoints_dir = os.path.join(self.configer.get('project_dir'),
+                                           self.configer.get('checkpoints', 'checkpoints_dir'))
+        else:
+            checkpoints_dir = os.path.join(self.configer.get('checkpoints', 'checkpoints_root'),
+                                           self.configer.get('checkpoints', 'checkpoints_dir'))
+
+        if not os.path.exists(checkpoints_dir):
+            os.makedirs(checkpoints_dir)
+
+        latest_name = '{}_latest.pth'.format(self.configer.get('checkpoints', 'checkpoints_name'))
+        torch.save(state, os.path.join(checkpoints_dir, latest_name))
+        if save_mode == 'performance':
+            if self.configer.get('performance') > self.configer.get('max_performance'):
+                latest_name = '{}_max_performance.pth'.format(self.configer.get('checkpoints', 'checkpoints_name'))
+                torch.save(state, os.path.join(checkpoints_dir, latest_name))
+                self.configer.update(['max_performance'], self.configer.get('performance'))
+
+        elif save_mode == 'val_loss':
+            if self.configer.get('val_loss') < self.configer.get('min_val_loss'):
+                latest_name = '{}_min_loss.pth'.format(self.configer.get('checkpoints', 'checkpoints_name'))
+                torch.save(state, os.path.join(checkpoints_dir, latest_name))
+                self.configer.update(['min_val_loss'], self.configer.get('val_loss'))
+
+        elif save_mode == 'iters':
+            if self.configer.get('iters') - self.configer.get('last_iters') >= \
+                    self.configer.get('checkpoints', 'save_iters'):
+                latest_name = '{}_iters{}.pth'.format(self.configer.get('checkpoints', 'checkpoints_name'),
+                                                 self.configer.get('iters'))
+                torch.save(state, os.path.join(checkpoints_dir, latest_name))
+                self.configer.update(['last_iters'], self.configer.get('iters'))
+
+        elif save_mode == 'epoch':
+            if self.configer.get('epoch') - self.configer.get('last_epoch') >= \
+                    self.configer.get('checkpoints', 'save_epoch'):
+                latest_name = '{}_epoch{}.pth'.format(self.configer.get('checkpoints', 'checkpoints_name'),
+                                                 self.configer.get('epoch'))
+                torch.save(state, os.path.join(checkpoints_dir, latest_name))
+                self.configer.update(['last_epoch'], self.configer.get('epoch'))
+
+        else:
+            Log.error('Metric: {} is invalid.'.format(save_mode))
+            exit(1)
+
+        if experiment is not None:
+            experiment.checkpoint(
+                path=os.path.join(checkpoints_dir, latest_name),
+                step=self.configer.get('iters'),
+                metrics={'mIoU': self.configer.get('performance'), 'loss': self.configer.get('val_loss')},
+                primary_metric=("mIoU", "maximize")
+            )
+
+    def freeze_bn(self, net, syncbn=False):
+        for m in net.modules():
+            if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                m.eval()
+
+            if syncbn:
+                from lib.extensions import BatchNorm2d, BatchNorm1d
+                if isinstance(m, BatchNorm2d) or isinstance(m, BatchNorm1d):
+                    m.eval()
+
+    def clip_grad(self, model, max_grad=10.):
+        """Computes a gradient clipping coefficient based on gradient norm."""
+        total_norm = 0
+        for p in model.parameters():
+            if p.requires_grad:
+                modulenorm = p.grad.data.norm()
+                total_norm += modulenorm ** 2
+
+        total_norm = math.sqrt(total_norm)
+
+        norm = max_grad / max(total_norm, max_grad)
+        for p in model.parameters():
+            if p.requires_grad:
+                p.grad.mul_(norm)
+
+    def gather(self, outputs, target_device=None, dim=0):
+        r"""
+        Gathers tensors from different GPUs on a specified device
+          (-1 means the CPU).
+        """
+        if not self.configer.get('network', 'gathered'):
+            if target_device is None:
+                target_device = list(range(torch.cuda.device_count()))[0]
+
+            return torch_gather(outputs, target_device, dim=dim)
+
+        else:
+            return outputs
+
+    def get_lr(self, optimizer):
+
+        return [param_group['lr'] for param_group in optimizer.param_groups]
+
+    def warm_lr(self, iters, scheduler, optimizer, backbone_list=(0, )):
+        """Sets the learning rate
+        # Adapted from PyTorch Imagenet example:
+        # https://github.com/pytorch/examples/blob/master/imagenet/main.py
+        """
+        if not self.configer.exists('lr', 'is_warm') or not self.configer.get('lr', 'is_warm'):
+            return
+
+        warm_iters = self.configer.get('lr', 'warm')['warm_iters']
+        if iters < warm_iters:
+            if self.configer.get('lr', 'warm')['freeze_backbone']:
+                for backbone_index in backbone_list:
+                    optimizer.param_groups[backbone_index]['lr'] = 0.0
+
+            else:
+                lr_ratio = (self.configer.get('iters') + 1) / warm_iters
+                base_lr_list = scheduler.get_lr()
+                for backbone_index in backbone_list:
+                    optimizer.param_groups[backbone_index]['lr'] = base_lr_list[backbone_index] * (lr_ratio ** 4)
+
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/optim_scheduler.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/optim_scheduler.py
new file mode 100644
index 0000000..b2743e9
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/tools/optim_scheduler.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# Author: Donny You(youansheng@gmail.com), Lang Huang, Rainbowsecret
+# Some methods used by main methods.
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import math
+
+import torchcontrib
+from torch.optim import SGD, Adam, AdamW, lr_scheduler
+from torch.optim.lr_scheduler import LambdaLR
+
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+
+
+class WarmupCosineSchedule(LambdaLR):
+    """ Linear warmup and then cosine decay.
+        Linearly increases learning rate from 0 to 1 over `warmup_steps` training steps.
+        Decreases learning rate from 1. to 0. over remaining `t_total - warmup_steps` steps following a cosine curve.
+        If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup.
+    """
+
+    def __init__(self, optimizer, warmup_steps, t_total, cycles=.5, last_epoch=-1):
+        self.warmup_steps = warmup_steps
+        self.t_total = t_total
+        self.cycles = cycles
+        super(WarmupCosineSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)
+
+    def lr_lambda(self, step):
+        if step < self.warmup_steps:
+            return float(step) / float(max(1.0, self.warmup_steps))
+        # progress after warmup
+        progress = float(step - self.warmup_steps) / float(max(1, self.t_total - self.warmup_steps))
+        return max(0.0, 0.5 * (1. + math.cos(math.pi * float(self.cycles) * 2.0 * progress)))
+
+
+class OptimScheduler(object):
+    def __init__(self, configer):
+        self.configer = configer
+
+    def init_optimizer(self, net_params):
+        optimizer = None
+        if self.configer.get('optim', 'optim_method') == 'sgd':
+            optimizer = SGD(net_params,
+                            lr=self.configer.get('lr', 'base_lr'),
+                            momentum=self.configer.get('optim', 'sgd')['momentum'],
+                            weight_decay=self.configer.get('optim', 'sgd')['weight_decay'],
+                            nesterov=self.configer.get('optim', 'sgd')['nesterov'])
+
+        elif self.configer.get('optim', 'optim_method') == 'adam':
+            optimizer = Adam(net_params,
+                             lr=self.configer.get('lr', 'base_lr'),
+                             betas=self.configer.get('optim', 'adam')['betas'],
+                             eps=self.configer.get('optim', 'adam')['eps'],
+                             weight_decay=self.configer.get('optim', 'adam')['weight_decay'])
+        elif self.configer.get('optim', 'optim_method') == 'adamw':
+            optimizer = AdamW(net_params,
+                              lr=self.configer.get('lr', 'base_lr'),
+                              betas=self.configer.get('optim', 'adamw')['betas'],
+                              eps=self.configer.get('optim', 'adamw')['eps'],
+                              weight_decay=self.configer.get('optim', 'adamw')['weight_decay'])
+
+        else:
+            Log.error('Optimizer {} is not valid.'.format(self.configer.get('optim', 'optim_method')))
+            exit(1)
+
+        policy = self.configer.get('lr', 'lr_policy')
+
+        scheduler = None
+        if policy == 'step':
+            scheduler = lr_scheduler.StepLR(optimizer,
+                                            self.configer.get('lr', 'step')['step_size'],
+                                            gamma=self.configer.get('lr', 'step')['gamma'])
+
+        elif policy == 'multistep':
+            scheduler = lr_scheduler.MultiStepLR(optimizer,
+                                                 self.configer.get('lr', 'multistep')['stepvalue'],
+                                                 gamma=self.configer.get('lr', 'multistep')['gamma'])
+
+        elif policy == 'lambda_poly':
+            if os.environ.get('lambda_poly_power'):
+                _lambda_poly_power = float(os.environ.get('lambda_poly_power'))
+                Log.info('Use lambda_poly policy with power {}'.format(_lambda_poly_power))
+                lambda_poly = lambda iters: pow((1.0 - iters / self.configer.get('solver', 'max_iters')),
+                                                _lambda_poly_power)
+            elif self.configer.exists('lr', 'lambda_poly'):
+                Log.info('Use lambda_poly policy with power {}'.format(self.configer.get('lr', 'lambda_poly')['power']))
+                lambda_poly = lambda iters: pow((1.0 - iters / self.configer.get('solver', 'max_iters')),
+                                                self.configer.get('lr', 'lambda_poly')['power'])
+            else:
+                Log.info('Use lambda_poly policy with default power 0.9')
+                lambda_poly = lambda iters: pow((1.0 - iters / self.configer.get('solver', 'max_iters')), 0.9)
+            scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_poly)
+
+        elif policy == 'lambda_cosine':
+            lambda_cosine = lambda iters: (math.cos(math.pi * iters / self.configer.get('solver', 'max_iters'))
+                                           + 1.0) / 2
+            scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_cosine)
+
+        elif policy == 'plateau':
+            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
+                                                       mode=self.configer.get('lr', 'plateau')['mode'],
+                                                       factor=self.configer.get('lr', 'plateau')['factor'],
+                                                       patience=self.configer.get('lr', 'plateau')['patience'],
+                                                       threshold=self.configer.get('lr', 'plateau')['threshold'],
+                                                       threshold_mode=self.configer.get('lr', 'plateau')['thre_mode'],
+                                                       cooldown=self.configer.get('lr', 'plateau')['cooldown'],
+                                                       min_lr=self.configer.get('lr', 'plateau')['min_lr'],
+                                                       eps=self.configer.get('lr', 'plateau')['eps'])
+
+        elif policy == 'swa_lambda_poly':
+            optimizer = torchcontrib.optim.SWA(optimizer)
+            normal_max_iters = int(self.configer.get('solver', 'max_iters') * 0.75)
+            swa_step_max_iters = (self.configer.get('solver',
+                                                    'max_iters') - normal_max_iters) // 5 + 1  # we use 5 ensembles here
+
+            def swa_lambda_poly(iters):
+                if iters < normal_max_iters:
+                    return pow(1.0 - iters / normal_max_iters, 0.9)
+                else:  # set lr to half of initial lr and start swa
+                    return 0.5 * pow(1.0 - ((iters - normal_max_iters) % swa_step_max_iters) / swa_step_max_iters, 0.9)
+
+            scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=swa_lambda_poly)
+
+        elif policy == 'swa_lambda_cosine':
+            optimizer = torchcontrib.optim.SWA(optimizer)
+            normal_max_iters = int(self.configer.get('solver', 'max_iters') * 0.75)
+            swa_step_max_iters = (self.configer.get('solver',
+                                                    'max_iters') - normal_max_iters) // 5 + 1  # we use 5 ensembles here
+
+            def swa_lambda_cosine(iters):
+                if iters < normal_max_iters:
+                    return (math.cos(math.pi * iters / normal_max_iters) + 1.0) / 2
+                else:  # set lr to half of initial lr and start swa
+                    return 0.5 * (math.cos(
+                        math.pi * ((iters - normal_max_iters) % swa_step_max_iters) / swa_step_max_iters) + 1.0) / 2
+
+            scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=swa_lambda_cosine)
+
+        elif policy == 'warmup_cosine':
+            scheduler = WarmupCosineSchedule(optimizer, warmup_steps=1000,
+                                             t_total=self.configer.get('solver', 'max_iters'))
+
+        else:
+            Log.error('Policy:{} is not valid.'.format(policy))
+            exit(1)
+
+        return optimizer, scheduler
+
+    def update_optimizer(self, net, optim_method, lr_policy):
+        self.configer.update(('optim', 'optim_method'), optim_method)
+        self.configer.update(('lr', 'lr_policy'), lr_policy)
+        optimizer, scheduler = self.init_optimizer(net)
+        return optimizer, scheduler
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer.py
new file mode 100644
index 0000000..3a8fa25
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer.py
@@ -0,0 +1,393 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: RainbowSecret, JingyiXie, LangHuang
+## Microsoft Research
+## yuyua@microsoft.com
+## Copyright (c) 2019
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+import os
+import cv2
+import pdb
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.distributed as dist
+import torch.backends.cudnn as cudnn
+
+from lib.utils.tools.average_meter import AverageMeter
+from lib.datasets.data_loader import DataLoader
+from lib.loss.loss_manager import LossManager
+from lib.models.model_manager import ModelManager
+from lib.utils.tools.logger import Logger as Log
+from lib.vis.seg_visualizer import SegVisualizer
+from segmentor.tools.module_runner import ModuleRunner
+from segmentor.tools.optim_scheduler import OptimScheduler
+from segmentor.tools.data_helper import DataHelper
+from segmentor.tools.evaluator import get_evaluator
+from lib.utils.distributed import get_world_size, get_rank, is_distributed
+# from mmcv.cnn import get_model_complexity_info
+
+
+class Trainer(object):
+    """
+      The class for Pose Estimation. Include train, val, val & predict.
+    """
+
+    def __init__(self, configer):
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.foward_time = AverageMeter()
+        self.backward_time = AverageMeter()
+        self.loss_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.train_losses = AverageMeter()
+        # self.val_losses = AverageMeter()
+        self.seg_visualizer = SegVisualizer(configer)
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.data_loader = DataLoader(configer)
+        self.optim_scheduler = OptimScheduler(configer)
+        self.data_helper = DataHelper(configer, self)
+        self.evaluator = get_evaluator(configer, self)
+
+        self.seg_net = None
+        self.train_loader = None
+        self.val_loader = None
+        self.optimizer = None
+        self.scheduler = None
+        self.running_score = None
+
+        self._init_model()
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+
+        try:
+            flops, params = get_model_complexity_info(self.seg_net, (3, 512, 512))
+            split_line = '=' * 30
+            print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
+                split_line, (3, 512, 512), flops, params))
+            print('!!!Please be cautious if you use the results in papers. '
+                  'You may need to check if all ops are supported and verify that the '
+                  'flops computation is correct.')
+        except:
+            pass
+
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        Log.info('Params Group Method: {}'.format(self.configer.get('optim', 'group_method')))
+        if self.configer.get('optim', 'group_method') == 'decay':
+            params_group = self.group_weight(self.seg_net)
+        else:
+            assert self.configer.get('optim', 'group_method') is None
+            params_group = self._get_parameters()
+
+        self.optimizer, self.scheduler = self.optim_scheduler.init_optimizer(params_group)
+
+        self.train_loader = self.data_loader.get_trainloader()
+        self.val_loader = self.data_loader.get_valloader()
+        self.pixel_loss = self.loss_manager.get_seg_loss()
+        if is_distributed():
+            self.pixel_loss = self.module_runner.to_device(self.pixel_loss)
+
+        self.with_proto = True if self.configer.exists("protoseg") else False
+
+
+    @staticmethod
+    def group_weight(module):
+        group_decay = []
+        group_no_decay = []
+        for m in module.modules():
+            if isinstance(m, nn.Linear):
+                group_decay.append(m.weight)
+                if m.bias is not None:
+                    group_no_decay.append(m.bias)
+            elif isinstance(m, nn.modules.conv._ConvNd):
+                group_decay.append(m.weight)
+                if m.bias is not None:
+                    group_no_decay.append(m.bias)
+            else:
+                if hasattr(m, 'weight'):
+                    group_no_decay.append(m.weight)
+                if hasattr(m, 'bias'):
+                    group_no_decay.append(m.bias)
+
+        assert len(list(module.parameters())) == len(group_decay) + len(group_no_decay)
+        groups = [dict(params=group_decay), dict(params=group_no_decay, weight_decay=.0)]
+        return groups
+
+    def _get_parameters(self):
+        bb_lr = []
+        nbb_lr = []
+        fcn_lr = []
+        params_dict = dict(self.seg_net.named_parameters())
+        for key, value in params_dict.items():
+            if 'backbone' in key:
+                bb_lr.append(value)
+            elif 'aux_layer' in key or 'upsample_proj' in key:
+                fcn_lr.append(value)
+            else:
+                nbb_lr.append(value)
+
+        params = [{'params': bb_lr, 'lr': self.configer.get('lr', 'base_lr')},
+                  {'params': fcn_lr, 'lr': self.configer.get('lr', 'base_lr') * 10},
+                  {'params': nbb_lr, 'lr': self.configer.get('lr', 'base_lr') * self.configer.get('lr', 'nbb_mult')}]
+        return params
+
+    def __train(self):
+        """
+          Train function of every epoch during train phase.
+        """
+        self.seg_net.train()
+        self.pixel_loss.train()
+        start_time = time.time()
+        scaler = torch.cuda.amp.GradScaler()
+
+        if "swa" in self.configer.get('lr', 'lr_policy'):
+            normal_max_iters = int(self.configer.get('solver', 'max_iters') * 0.75)
+            swa_step_max_iters = (self.configer.get('solver', 'max_iters') - normal_max_iters) // 5 + 1
+
+        if hasattr(self.train_loader.sampler, 'set_epoch'):
+            self.train_loader.sampler.set_epoch(self.configer.get('epoch'))
+
+        for i, data_dict in enumerate(self.train_loader):
+            self.optimizer.zero_grad()
+            if self.configer.get('lr', 'metric') == 'iters':
+                self.scheduler.step(self.configer.get('iters'))
+            else:
+                self.scheduler.step(self.configer.get('epoch'))
+
+            if self.configer.get('lr', 'is_warm'):
+                self.module_runner.warm_lr(
+                    self.configer.get('iters'),
+                    self.scheduler, self.optimizer, backbone_list=[0, ]
+                )
+
+            (inputs, targets), batch_size = self.data_helper.prepare_data(data_dict)
+            self.data_time.update(time.time() - start_time)
+
+            foward_start_time = time.time()
+            with torch.cuda.amp.autocast():
+                if not self.with_proto:
+                    outputs = self.seg_net(*inputs)
+                else:
+                    pretrain_prototype = True if self.configer.get('iters') < self. configer.get('protoseg', 'warmup_iters') else False
+                    outputs = self.seg_net(*inputs, gt_semantic_seg=targets[:, None, ...],
+                                           pretrain_prototype=pretrain_prototype)
+            self.foward_time.update(time.time() - foward_start_time)
+
+            loss_start_time = time.time()
+            if is_distributed():
+                import torch.distributed as dist
+                def reduce_tensor(inp):
+                    """
+                    Reduce the loss from all processes so that 
+                    process with rank 0 has the averaged results.
+                    """
+                    world_size = get_world_size()
+                    if world_size < 2:
+                        return inp
+                    with torch.no_grad():
+                        reduced_inp = inp
+                        dist.reduce(reduced_inp, dst=0)
+                    return reduced_inp
+
+                with torch.cuda.amp.autocast():
+                    loss = self.pixel_loss(outputs, targets)
+                    backward_loss = loss
+                    display_loss = reduce_tensor(backward_loss) / get_world_size()
+            else:
+                backward_loss = display_loss = self.pixel_loss(outputs, targets)
+
+            self.train_losses.update(display_loss.item(), batch_size)
+            self.loss_time.update(time.time() - loss_start_time)
+
+            backward_start_time = time.time()
+
+            # backward_loss.backward()
+            # self.optimizer.step()
+            scaler.scale(backward_loss).backward()
+            scaler.step(self.optimizer)
+            scaler.update()
+
+            self.backward_time.update(time.time() - backward_start_time)
+
+            # Update the vars of the train phase.
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+            self.configer.plus_one('iters')
+
+            # Print the log info & reset the states.
+            if self.configer.get('iters') % self.configer.get('solver', 'display_iter') == 0 and \
+                    (not is_distributed() or get_rank() == 0):
+                Log.info('Train Epoch: {0}\tTrain Iteration: {1}\t'
+                         'Time {batch_time.sum:.3f}s / {2}iters, ({batch_time.avg:.3f})\t'
+                         'Forward Time {foward_time.sum:.3f}s / {2}iters, ({foward_time.avg:.3f})\t'
+                         'Backward Time {backward_time.sum:.3f}s / {2}iters, ({backward_time.avg:.3f})\t'
+                         'Loss Time {loss_time.sum:.3f}s / {2}iters, ({loss_time.avg:.3f})\t'
+                         'Data load {data_time.sum:.3f}s / {2}iters, ({data_time.avg:3f})\n'
+                         'Learning rate = {3}\tLoss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
+                    self.configer.get('epoch'), self.configer.get('iters'),
+                    self.configer.get('solver', 'display_iter'),
+                    self.module_runner.get_lr(self.optimizer), batch_time=self.batch_time,
+                    foward_time=self.foward_time, backward_time=self.backward_time, loss_time=self.loss_time,
+                    data_time=self.data_time, loss=self.train_losses))
+                self.batch_time.reset()
+                self.foward_time.reset()
+                self.backward_time.reset()
+                self.loss_time.reset()
+                self.data_time.reset()
+                self.train_losses.reset()
+
+            # save checkpoints for swa
+            if 'swa' in self.configer.get('lr', 'lr_policy') and \
+                    self.configer.get('iters') > normal_max_iters and \
+                    ((self.configer.get('iters') - normal_max_iters) % swa_step_max_iters == 0 or \
+                     self.configer.get('iters') == self.configer.get('solver', 'max_iters')):
+                self.optimizer.update_swa()
+
+            if self.configer.get('iters') == self.configer.get('solver', 'max_iters'):
+                break
+
+            # Check to val the current model.
+            # if self.configer.get('epoch') % self.configer.get('solver', 'test_interval') == 0:
+            if self.configer.get('iters') % self.configer.get('solver', 'test_interval') == 0:
+                self.__val()
+
+        self.configer.plus_one('epoch')
+
+    def __val(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        self.pixel_loss.eval()
+        start_time = time.time()
+        replicas = self.evaluator.prepare_validaton()
+
+        data_loader = self.val_loader if data_loader is None else data_loader
+        for j, data_dict in enumerate(data_loader):
+            if j % 10 == 0:
+                if is_distributed(): dist.barrier()  # Synchronize all processes
+                Log.info('{} images processed\n'.format(j))
+
+            if self.configer.get('dataset') == 'lip':
+                (inputs, targets, inputs_rev, targets_rev), batch_size = self.data_helper.prepare_data(data_dict,
+                                                                                                       want_reverse=True)
+            else:
+                (inputs, targets), batch_size = self.data_helper.prepare_data(data_dict)
+
+            with torch.no_grad():
+                if self.configer.get('dataset') == 'lip':
+                    inputs = torch.cat([inputs[0], inputs_rev[0]], dim=0)
+                    outputs = self.seg_net(inputs)
+                    if not is_distributed():
+                        outputs_ = self.module_runner.gather(outputs)
+                    else:
+                        outputs_ = outputs
+                    if isinstance(outputs_, (list, tuple)):
+                        outputs_ = outputs_[-1]
+                    outputs = outputs_[0:int(outputs_.size(0) / 2), :, :, :].clone()
+                    outputs_rev = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), :, :, :].clone()
+                    if outputs_rev.shape[1] == 20:
+                        outputs_rev[:, 14, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 15, :, :]
+                        outputs_rev[:, 15, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 14, :, :]
+                        outputs_rev[:, 16, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 17, :, :]
+                        outputs_rev[:, 17, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 16, :, :]
+                        outputs_rev[:, 18, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 19, :, :]
+                        outputs_rev[:, 19, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 18, :, :]
+                    outputs_rev = torch.flip(outputs_rev, [3])
+                    outputs = (outputs + outputs_rev) / 2.
+                    self.evaluator.update_score(outputs, data_dict['meta'])
+
+                elif self.data_helper.conditions.diverse_size:
+                    if is_distributed():
+                        outputs = [self.seg_net(inputs[i]) for i in range(len(inputs))]
+                    else:
+                        outputs = nn.parallel.parallel_apply(replicas[:len(inputs)], inputs)
+
+                    for i in range(len(outputs)):
+                        loss = self.pixel_loss(outputs[i], targets[i].unsqueeze(0))
+                        # self.val_losses.update(loss.item(), 1)
+                        outputs_i = outputs[i]
+                        if isinstance(outputs_i, torch.Tensor):
+                            outputs_i = [outputs_i]
+                        self.evaluator.update_score(outputs_i, data_dict['meta'][i:i + 1])
+
+                else:
+                    outputs = self.seg_net(*inputs)
+
+                    if not is_distributed():
+                        outputs = self.module_runner.gather(outputs)
+                    if isinstance(outputs, dict):
+                        outputs = outputs['seg']
+                    self.evaluator.update_score(outputs, data_dict['meta'])
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        self.evaluator.update_performance()
+
+        self.module_runner.save_net(self.seg_net, save_mode='performance')
+        cudnn.benchmark = True
+
+        # Print the log info & reset the states.
+        self.evaluator.reduce_scores()
+        if not is_distributed() or get_rank() == 0:
+            self.evaluator.print_scores()
+
+        self.batch_time.reset()
+        self.evaluator.reset()
+        self.seg_net.train()
+        self.pixel_loss.train()
+
+    def train(self):
+        # cudnn.benchmark = True
+        # self.__val()
+        if self.configer.get('network', 'resume') is not None:
+            if self.configer.get('network', 'resume_val'):
+                self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+                return
+            elif self.configer.get('network', 'resume_train'):
+                self.__val(data_loader=self.data_loader.get_valloader(dataset='train'))
+                return
+            # return
+
+        # if self.configer.get('network', 'resume') is not None and self.configer.get('network', 'resume_val'):
+        #     self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+        #     return
+
+        while self.configer.get('iters') < self.configer.get('solver', 'max_iters'):
+            self.__train()
+
+        # use swa to average the model
+        if 'swa' in self.configer.get('lr', 'lr_policy'):
+            self.optimizer.swap_swa_sgd()
+            self.optimizer.bn_update(self.train_loader, self.seg_net)
+
+        self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+
+    def summary(self):
+        from lib.utils.summary import get_model_summary
+        import torch.nn.functional as F
+        self.seg_net.eval()
+
+        for j, data_dict in enumerate(self.train_loader):
+            print(get_model_summary(self.seg_net, data_dict['img'][0:1]))
+            return
+
+
+if __name__ == "__main__":
+    pass
diff --git a/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer_contrastive.py b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer_contrastive.py
new file mode 100644
index 0000000..35fb855
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/protoseg_core/segmentor/trainer_contrastive.py
@@ -0,0 +1,439 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import time
+
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+
+from lib.datasets.data_loader import DataLoader
+from lib.loss.loss_manager import LossManager
+from lib.models.model_manager import ModelManager
+from lib.utils.distributed import get_world_size, get_rank, is_distributed
+from lib.utils.tools.average_meter import AverageMeter
+from lib.utils.tools.logger import Logger as Log
+from lib.vis.seg_visualizer import SegVisualizer
+from segmentor.tools.data_helper import DataHelper
+from segmentor.tools.evaluator import get_evaluator
+from segmentor.tools.module_runner import ModuleRunner
+from segmentor.tools.optim_scheduler import OptimScheduler
+
+
+class Trainer(object):
+    def __init__(self, configer):
+        self.configer = configer
+        self.batch_time = AverageMeter()
+        self.foward_time = AverageMeter()
+        self.backward_time = AverageMeter()
+        self.loss_time = AverageMeter()
+        self.data_time = AverageMeter()
+        self.train_losses = AverageMeter()
+        self.val_losses = AverageMeter()
+        self.seg_visualizer = SegVisualizer(configer)
+        self.loss_manager = LossManager(configer)
+        self.module_runner = ModuleRunner(configer)
+        self.model_manager = ModelManager(configer)
+        self.data_loader = DataLoader(configer)
+        self.optim_scheduler = OptimScheduler(configer)
+        self.data_helper = DataHelper(configer, self)
+        self.evaluator = get_evaluator(configer, self)
+
+        self.seg_net = None
+        self.train_loader = None
+        self.val_loader = None
+        self.optimizer = None
+        self.scheduler = None
+        self.running_score = None
+
+        self._init_model()
+
+    def _init_model(self):
+        self.seg_net = self.model_manager.semantic_segmentor()
+        self.seg_net = self.module_runner.load_net(self.seg_net)
+
+        Log.info('Params Group Method: {}'.format(self.configer.get('optim', 'group_method')))
+        if self.configer.get('optim', 'group_method') == 'decay':
+            params_group = self.group_weight(self.seg_net)
+        else:
+            assert self.configer.get('optim', 'group_method') is None
+            params_group = self._get_parameters()
+
+        self.optimizer, self.scheduler = self.optim_scheduler.init_optimizer(params_group)
+
+        self.train_loader = self.data_loader.get_trainloader()
+        self.val_loader = self.data_loader.get_valloader()
+        self.pixel_loss = self.loss_manager.get_seg_loss()
+        if is_distributed():
+            self.pixel_loss = self.module_runner.to_device(self.pixel_loss)
+
+        self.with_contrast = True if self.configer.exists("contrast") else False
+        if self.configer.exists("contrast", "warmup_iters"):
+            self.contrast_warmup_iters = self.configer.get("contrast", "warmup_iters")
+        else:
+            self.contrast_warmup_iters = 0
+
+        self.with_memory = self.configer.exists('contrast', 'with_memory')
+        if self.with_memory:
+            self.memory_size = self.configer.get('contrast', 'memory_size')
+            self.pixel_update_freq = self.configer.get('contrast', 'pixel_update_freq')
+
+        self.network_stride = self.configer.get('network', 'stride')
+
+        Log.info("with_contrast: {}, warmup_iters: {}, with_memory: {}".format(
+            self.with_contrast, self.contrast_warmup_iters, self.with_memory))
+
+        # self.experiment = keepsake.init(
+        #     path='keepsake',
+        #     params={"[HP] learning_rate": self.configer.get('lr', 'base_lr'),
+        #             "[HP] train_bs": self.configer.get('train', 'batch_size'),
+        #             "[NET] loss": self.configer.get('loss', 'loss_type'),
+        #             "[NET] backbone": self.configer.get('network', 'backbone'),
+        #             "[NET] model_name": self.configer.get('network', 'model_name'),
+        #             "[CONTRAST] proj_dim": self.configer.get('contrast', 'proj_dim'),
+        #             "[CONTRAST] temperature": self.configer.get('contrast', 'temperature'),
+        #             "[CONTRAST] max_samples": self.configer.get('contrast', 'max_samples'),
+        #             "[CONTRAST] warmup_iters": self.configer.get('contrast', 'warmup_iters'),
+        #             "[CONTRAST] loss_weight": self.configer.get('contrast', 'loss_weight')}
+        # )
+
+    def _dequeue_and_enqueue(self, keys, labels,
+                             segment_queue, segment_queue_ptr,
+                             pixel_queue, pixel_queue_ptr):
+        batch_size = keys.shape[0]
+        feat_dim = keys.shape[1]
+
+        labels = labels[:, ::self.network_stride, ::self.network_stride]
+
+        for bs in range(batch_size):
+            this_feat = keys[bs].contiguous().view(feat_dim, -1)
+            this_label = labels[bs].contiguous().view(-1)
+            this_label_ids = torch.unique(this_label)
+            this_label_ids = [x for x in this_label_ids if x > 0]
+
+            for lb in this_label_ids:
+                idxs = (this_label == lb).nonzero()
+
+                # segment enqueue and dequeue
+                feat = torch.mean(this_feat[:, idxs], dim=1).squeeze(1)
+                ptr = int(segment_queue_ptr[lb])
+                segment_queue[lb, ptr, :] = nn.functional.normalize(feat.view(-1), p=2, dim=0)
+                segment_queue_ptr[lb] = (segment_queue_ptr[lb] + 1) % self.memory_size
+
+                # pixel enqueue and dequeue
+                num_pixel = idxs.shape[0]
+                perm = torch.randperm(num_pixel)
+                K = min(num_pixel, self.pixel_update_freq)
+                feat = this_feat[:, perm[:K]]
+                feat = torch.transpose(feat, 0, 1)
+                ptr = int(pixel_queue_ptr[lb])
+
+                if ptr + K >= self.memory_size:
+                    pixel_queue[lb, -K:, :] = nn.functional.normalize(feat, p=2, dim=1)
+                    pixel_queue_ptr[lb] = 0
+                else:
+                    pixel_queue[lb, ptr:ptr + K, :] = nn.functional.normalize(feat, p=2, dim=1)
+                    pixel_queue_ptr[lb] = (pixel_queue_ptr[lb] + 1) % self.memory_size
+
+    @staticmethod
+    def group_weight(module):
+        group_decay = []
+        group_no_decay = []
+        for m in module.modules():
+            if isinstance(m, nn.Linear):
+                group_decay.append(m.weight)
+                if m.bias is not None:
+                    group_no_decay.append(m.bias)
+            elif isinstance(m, nn.modules.conv._ConvNd):
+                group_decay.append(m.weight)
+                if m.bias is not None:
+                    group_no_decay.append(m.bias)
+            else:
+                if hasattr(m, 'weight'):
+                    group_no_decay.append(m.weight)
+                if hasattr(m, 'bias'):
+                    group_no_decay.append(m.bias)
+
+        assert len(list(module.parameters())) == len(group_decay) + len(group_no_decay)
+        groups = [dict(params=group_decay), dict(params=group_no_decay, weight_decay=.0)]
+        return groups
+
+    def _get_parameters(self):
+        bb_lr = []
+        nbb_lr = []
+        params_dict = dict(self.seg_net.named_parameters())
+        for key, value in params_dict.items():
+            if 'backbone' not in key:
+                nbb_lr.append(value)
+            else:
+                bb_lr.append(value)
+
+        params = [{'params': bb_lr, 'lr': self.configer.get('lr', 'base_lr')},
+                  {'params': nbb_lr, 'lr': self.configer.get('lr', 'base_lr') * self.configer.get('lr', 'nbb_mult')}]
+        return params
+
+    def __train(self):
+        """
+          Train function of every epoch during train phase.
+        """
+        self.seg_net.train()
+        self.pixel_loss.train()
+        start_time = time.time()
+
+        if "swa" in self.configer.get('lr', 'lr_policy'):
+            normal_max_iters = int(self.configer.get('solver', 'max_iters') * 0.75)
+            swa_step_max_iters = (self.configer.get('solver', 'max_iters') - normal_max_iters) // 5 + 1
+
+        if hasattr(self.train_loader.sampler, 'set_epoch'):
+            self.train_loader.sampler.set_epoch(self.configer.get('epoch'))
+
+        for i, data_dict in enumerate(self.train_loader):
+            if self.configer.get('lr', 'metric') == 'iters':
+                self.scheduler.step(self.configer.get('iters'))
+            else:
+                self.scheduler.step(self.configer.get('epoch'))
+
+            if self.configer.get('lr', 'is_warm'):
+                self.module_runner.warm_lr(
+                    self.configer.get('iters'),
+                    self.scheduler, self.optimizer, backbone_list=[0, ]
+                )
+
+            (inputs, targets), batch_size = self.data_helper.prepare_data(data_dict)
+            self.data_time.update(time.time() - start_time)
+
+            foward_start_time = time.time()
+
+            with_embed = True if self.configer.get('iters') >= self.contrast_warmup_iters else False
+            if self.with_contrast is True:
+                if self.with_memory is True:
+                    outputs = self.seg_net(*inputs, targets, with_embed=with_embed)
+
+                    outputs['pixel_queue'] = self.seg_net.module.pixel_queue
+                    outputs['pixel_queue_ptr'] = self.seg_net.module.pixel_queue_ptr
+                    outputs['segment_queue'] = self.seg_net.module.segment_queue
+                    outputs['segment_queue_ptr'] = self.seg_net.module.segment_queue_ptr
+                else:
+                    outputs = self.seg_net(*inputs, with_embed=with_embed)
+            else:
+                outputs = self.seg_net(*inputs)
+
+            self.foward_time.update(time.time() - foward_start_time)
+
+            loss_start_time = time.time()
+            if is_distributed():
+                import torch.distributed as dist
+                def reduce_tensor(inp):
+                    """
+                    Reduce the loss from all processes so that 
+                    process with rank 0 has the averaged results.
+                    """
+                    world_size = get_world_size()
+                    if world_size < 2:
+                        return inp
+                    with torch.no_grad():
+                        reduced_inp = inp
+                        dist.reduce(reduced_inp, dst=0)
+                    return reduced_inp
+
+                loss = self.pixel_loss(outputs, targets, with_embed=with_embed)
+                backward_loss = loss
+                display_loss = reduce_tensor(backward_loss) / get_world_size()
+            else:
+                backward_loss = display_loss = self.pixel_loss(outputs, targets)
+
+            if self.with_memory and 'key' in outputs and 'lb_key' in outputs:
+                self._dequeue_and_enqueue(outputs['key'], outputs['lb_key'],
+                                          segment_queue=self.seg_net.module.segment_queue,
+                                          segment_queue_ptr=self.seg_net.module.segment_queue_ptr,
+                                          pixel_queue=self.seg_net.module.pixel_queue,
+                                          pixel_queue_ptr=self.seg_net.module.pixel_queue_ptr)
+
+            self.train_losses.update(display_loss.item(), batch_size)
+            self.loss_time.update(time.time() - loss_start_time)
+
+            backward_start_time = time.time()
+            self.optimizer.zero_grad()
+            backward_loss.backward()
+
+            self.optimizer.step()
+            self.backward_time.update(time.time() - backward_start_time)
+
+            # Update the vars of the train phase.
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+            self.configer.plus_one('iters')
+
+            # Print the log info & reset the states.
+            if self.configer.get('iters') % self.configer.get('solver', 'display_iter') == 0 and \
+                    (not is_distributed() or get_rank() == 0):
+                Log.info('Train Epoch: {0}\tTrain Iteration: {1}\t'
+                         'Time {batch_time.sum:.3f}s / {2}iters, ({batch_time.avg:.3f})\t'
+                         'Forward Time {foward_time.sum:.3f}s / {2}iters, ({foward_time.avg:.3f})\t'
+                         'Backward Time {backward_time.sum:.3f}s / {2}iters, ({backward_time.avg:.3f})\t'
+                         'Loss Time {loss_time.sum:.3f}s / {2}iters, ({loss_time.avg:.3f})\t'
+                         'Data load {data_time.sum:.3f}s / {2}iters, ({data_time.avg:3f})\n'
+                         'Learning rate = {3}\tLoss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
+                    self.configer.get('epoch'), self.configer.get('iters'),
+                    self.configer.get('solver', 'display_iter'),
+                    self.module_runner.get_lr(self.optimizer), batch_time=self.batch_time,
+                    foward_time=self.foward_time, backward_time=self.backward_time, loss_time=self.loss_time,
+                    data_time=self.data_time, loss=self.train_losses))
+                self.batch_time.reset()
+                self.foward_time.reset()
+                self.backward_time.reset()
+                self.loss_time.reset()
+                self.data_time.reset()
+                self.train_losses.reset()
+
+            # save checkpoints for swa
+            if 'swa' in self.configer.get('lr', 'lr_policy') and \
+                    self.configer.get('iters') > normal_max_iters and \
+                    ((self.configer.get('iters') - normal_max_iters) % swa_step_max_iters == 0 or \
+                     self.configer.get('iters') == self.configer.get('solver', 'max_iters')):
+                self.optimizer.update_swa()
+
+            if self.configer.get('iters') == self.configer.get('solver', 'max_iters'):
+                break
+
+            if self.configer.get('iters') % self.configer.get('solver', 'test_interval') == 0:
+                self.__val()
+
+        self.configer.plus_one('epoch')
+
+    def __val(self, data_loader=None):
+        """
+          Validation function during the train phase.
+        """
+        self.seg_net.eval()
+        self.pixel_loss.eval()
+        start_time = time.time()
+        replicas = self.evaluator.prepare_validaton()
+
+        data_loader = self.val_loader if data_loader is None else data_loader
+        for j, data_dict in enumerate(data_loader):
+            if j % 10 == 0:
+                Log.info('{} images processed\n'.format(j))
+
+            if self.configer.get('dataset') == 'lip':
+                (inputs, targets, inputs_rev, targets_rev), batch_size = self.data_helper.prepare_data(data_dict,
+                                                                                                       want_reverse=True)
+            else:
+                (inputs, targets), batch_size = self.data_helper.prepare_data(data_dict)
+
+            with torch.no_grad():
+                if self.configer.get('dataset') == 'lip':
+                    inputs = torch.cat([inputs[0], inputs_rev[0]], dim=0)
+
+                    outputs = self.seg_net(inputs)
+
+                    outputs_ = self.module_runner.gather(outputs)
+                    if isinstance(outputs_, (list, tuple)):
+                        outputs_ = outputs_[-1]
+                    outputs = outputs_[0:int(outputs_.size(0) / 2), :, :, :].clone()
+                    outputs_rev = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), :, :, :].clone()
+                    if outputs_rev.shape[1] == 20:
+                        outputs_rev[:, 14, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 15, :, :]
+                        outputs_rev[:, 15, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 14, :, :]
+                        outputs_rev[:, 16, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 17, :, :]
+                        outputs_rev[:, 17, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 16, :, :]
+                        outputs_rev[:, 18, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 19, :, :]
+                        outputs_rev[:, 19, :, :] = outputs_[int(outputs_.size(0) / 2):int(outputs_.size(0)), 18, :, :]
+                    outputs_rev = torch.flip(outputs_rev, [3])
+                    outputs = (outputs + outputs_rev) / 2.
+                    self.evaluator.update_score(outputs, data_dict['meta'])
+
+                elif self.data_helper.conditions.diverse_size:
+                    if is_distributed():
+                        outputs = [self.seg_net(inputs[i]) for i in range(len(inputs))]
+                    else:
+                        outputs = nn.parallel.parallel_apply(replicas[:len(inputs)], inputs)
+
+                    for i in range(len(outputs)):
+                        loss = self.pixel_loss(outputs[i], targets[i].unsqueeze(0))
+                        self.val_losses.update(loss.item(), 1)
+                        outputs_i = outputs[i]['seg']
+                        if isinstance(outputs_i, torch.Tensor):
+                            outputs_i = [outputs_i]
+                        self.evaluator.update_score(outputs_i, data_dict['meta'][i:i + 1])
+
+                else:
+                    outputs = self.seg_net(*inputs, is_eval=True)
+
+                    try:
+                        loss = self.pixel_loss(outputs, targets)
+                    except AssertionError as e:
+                        print(len(outputs), len(targets))
+
+                    if not is_distributed():
+                        outputs = self.module_runner.gather(outputs)
+                    self.val_losses.update(loss.item(), batch_size)
+
+                    if isinstance(outputs, dict):
+                        self.evaluator.update_score(outputs['seg'], data_dict['meta'])
+                    else:
+                        self.evaluator.update_score(outputs, data_dict['meta'])
+
+            self.batch_time.update(time.time() - start_time)
+            start_time = time.time()
+
+        self.evaluator.update_performance()
+
+        self.configer.update(['val_loss'], self.val_losses.avg)
+        self.module_runner.save_net(self.seg_net, save_mode='performance', experiment=None)
+        self.module_runner.save_net(self.seg_net, save_mode='val_loss', experiment=None)
+        cudnn.benchmark = True
+
+        # Print the log info & reset the states.
+        if not is_distributed() or get_rank() == 0:
+            Log.info(
+                'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
+                'Loss {loss.avg:.8f}\n'.format(
+                    batch_time=self.batch_time, loss=self.val_losses))
+            self.evaluator.print_scores()
+
+        self.batch_time.reset()
+        self.val_losses.reset()
+        self.evaluator.reset()
+        self.seg_net.train()
+        self.pixel_loss.train()
+
+    def train(self):
+        # cudnn.benchmark = True
+        # self.__val()
+        if self.configer.get('network', 'resume') is not None:
+            if self.configer.get('network', 'resume_val'):
+                self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+                return
+            elif self.configer.get('network', 'resume_train'):
+                self.__val(data_loader=self.data_loader.get_valloader(dataset='train'))
+                return
+            # return
+
+        if self.configer.get('network', 'resume') is not None and self.configer.get('network', 'resume_val'):
+            self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+            return
+
+        while self.configer.get('iters') < self.configer.get('solver', 'max_iters'):
+            self.__train()
+
+        # use swa to average the model
+        if 'swa' in self.configer.get('lr', 'lr_policy'):
+            self.optimizer.swap_swa_sgd()
+            self.optimizer.bn_update(self.train_loader, self.seg_net)
+
+        self.__val(data_loader=self.data_loader.get_valloader(dataset='val'))
+
+    def summary(self):
+        from lib.utils.tools.summary import get_model_summary
+        self.seg_net.eval()
+
+        for j, data_dict in enumerate(self.train_loader):
+            print(get_model_summary(self.seg_net, data_dict['img'][0:1]))
+            return
+
+
+if __name__ == "__main__":
+    pass
diff --git a/BiSTNet-NTIRE2023/models/raft_core/__init__.py b/BiSTNet-NTIRE2023/models/raft_core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/models/raft_core/augmentor.py b/BiSTNet-NTIRE2023/models/raft_core/augmentor.py
new file mode 100644
index 0000000..e81c4f2
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/augmentor.py
@@ -0,0 +1,246 @@
+import numpy as np
+import random
+import math
+from PIL import Image
+
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+
+import torch
+from torchvision.transforms import ColorJitter
+import torch.nn.functional as F
+
+
+class FlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
+        
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+
+    def color_transform(self, img1, img2):
+        """ Photometric augmentation """
+
+        # asymmetric
+        if np.random.rand() < self.asymmetric_color_aug_prob:
+            img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
+            img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
+
+        # symmetric
+        else:
+            image_stack = np.concatenate([img1, img2], axis=0)
+            image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+            img1, img2 = np.split(image_stack, 2, axis=0)
+
+        return img1, img2
+
+    def eraser_transform(self, img1, img2, bounds=[50, 100]):
+        """ Occlusion augmentation """
+
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(bounds[0], bounds[1])
+                dy = np.random.randint(bounds[0], bounds[1])
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+
+        return img1, img2
+
+    def spatial_transform(self, img1, img2, flow):
+        # randomly sample scale
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 8) / float(ht), 
+            (self.crop_size[1] + 8) / float(wd))
+
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = scale
+        scale_y = scale
+        if np.random.rand() < self.stretch_prob:
+            scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+            scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
+        
+        scale_x = np.clip(scale_x, min_scale, None)
+        scale_y = np.clip(scale_y, min_scale, None)
+
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow = flow * [scale_x, scale_y]
+
+        if self.do_flip:
+            if np.random.rand() < self.h_flip_prob: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+
+            if np.random.rand() < self.v_flip_prob: # v-flip
+                img1 = img1[::-1, :]
+                img2 = img2[::-1, :]
+                flow = flow[::-1, :] * [1.0, -1.0]
+
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
+        x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
+        
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+
+        return img1, img2, flow
+
+    def __call__(self, img1, img2, flow):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow = self.spatial_transform(img1, img2, flow)
+
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+
+        return img1, img2, flow
+
+class SparseFlowAugmentor:
+    def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
+        # spatial augmentation params
+        self.crop_size = crop_size
+        self.min_scale = min_scale
+        self.max_scale = max_scale
+        self.spatial_aug_prob = 0.8
+        self.stretch_prob = 0.8
+        self.max_stretch = 0.2
+
+        # flip augmentation params
+        self.do_flip = do_flip
+        self.h_flip_prob = 0.5
+        self.v_flip_prob = 0.1
+
+        # photometric augmentation params
+        self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14)
+        self.asymmetric_color_aug_prob = 0.2
+        self.eraser_aug_prob = 0.5
+        
+    def color_transform(self, img1, img2):
+        image_stack = np.concatenate([img1, img2], axis=0)
+        image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
+        img1, img2 = np.split(image_stack, 2, axis=0)
+        return img1, img2
+
+    def eraser_transform(self, img1, img2):
+        ht, wd = img1.shape[:2]
+        if np.random.rand() < self.eraser_aug_prob:
+            mean_color = np.mean(img2.reshape(-1, 3), axis=0)
+            for _ in range(np.random.randint(1, 3)):
+                x0 = np.random.randint(0, wd)
+                y0 = np.random.randint(0, ht)
+                dx = np.random.randint(50, 100)
+                dy = np.random.randint(50, 100)
+                img2[y0:y0+dy, x0:x0+dx, :] = mean_color
+
+        return img1, img2
+
+    def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
+        ht, wd = flow.shape[:2]
+        coords = np.meshgrid(np.arange(wd), np.arange(ht))
+        coords = np.stack(coords, axis=-1)
+
+        coords = coords.reshape(-1, 2).astype(np.float32)
+        flow = flow.reshape(-1, 2).astype(np.float32)
+        valid = valid.reshape(-1).astype(np.float32)
+
+        coords0 = coords[valid>=1]
+        flow0 = flow[valid>=1]
+
+        ht1 = int(round(ht * fy))
+        wd1 = int(round(wd * fx))
+
+        coords1 = coords0 * [fx, fy]
+        flow1 = flow0 * [fx, fy]
+
+        xx = np.round(coords1[:,0]).astype(np.int32)
+        yy = np.round(coords1[:,1]).astype(np.int32)
+
+        v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
+        xx = xx[v]
+        yy = yy[v]
+        flow1 = flow1[v]
+
+        flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
+        valid_img = np.zeros([ht1, wd1], dtype=np.int32)
+
+        flow_img[yy, xx] = flow1
+        valid_img[yy, xx] = 1
+
+        return flow_img, valid_img
+
+    def spatial_transform(self, img1, img2, flow, valid):
+        # randomly sample scale
+
+        ht, wd = img1.shape[:2]
+        min_scale = np.maximum(
+            (self.crop_size[0] + 1) / float(ht), 
+            (self.crop_size[1] + 1) / float(wd))
+
+        scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
+        scale_x = np.clip(scale, min_scale, None)
+        scale_y = np.clip(scale, min_scale, None)
+
+        if np.random.rand() < self.spatial_aug_prob:
+            # rescale the images
+            img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
+            flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y)
+
+        if self.do_flip:
+            if np.random.rand() < 0.5: # h-flip
+                img1 = img1[:, ::-1]
+                img2 = img2[:, ::-1]
+                flow = flow[:, ::-1] * [-1.0, 1.0]
+                valid = valid[:, ::-1]
+
+        margin_y = 20
+        margin_x = 50
+
+        y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
+        x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
+
+        y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
+        x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
+
+        img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        return img1, img2, flow, valid
+
+
+    def __call__(self, img1, img2, flow, valid):
+        img1, img2 = self.color_transform(img1, img2)
+        img1, img2 = self.eraser_transform(img1, img2)
+        img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
+
+        img1 = np.ascontiguousarray(img1)
+        img2 = np.ascontiguousarray(img2)
+        flow = np.ascontiguousarray(flow)
+        valid = np.ascontiguousarray(valid)
+
+        return img1, img2, flow, valid
diff --git a/BiSTNet-NTIRE2023/models/raft_core/corr.py b/BiSTNet-NTIRE2023/models/raft_core/corr.py
new file mode 100644
index 0000000..048bd1f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/corr.py
@@ -0,0 +1,91 @@
+import torch
+import torch.nn.functional as F
+from models.raft_core.utils import bilinear_sampler, coords_grid
+
+try:
+    import alt_cuda_corr
+except:
+    # alt_cuda_corr is not compiled
+    pass
+
+
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+
+        batch, h1, w1, dim, h2, w2 = corr.shape
+        corr = corr.reshape(batch*h1*w1, dim, h2, w2)
+        
+        self.corr_pyramid.append(corr)
+        for i in range(self.num_levels-1):
+            corr = F.avg_pool2d(corr, 2, stride=2)
+            self.corr_pyramid.append(corr)
+
+    def __call__(self, coords):
+        r = self.radius
+        coords = coords.permute(0, 2, 3, 1)
+        batch, h1, w1, _ = coords.shape
+
+        out_pyramid = []
+        for i in range(self.num_levels):
+            corr = self.corr_pyramid[i]
+            dx = torch.linspace(-r, r, 2*r+1, device=coords.device)
+            dy = torch.linspace(-r, r, 2*r+1, device=coords.device)
+            delta = torch.stack(torch.meshgrid(dy, dx), axis=-1)
+
+            centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
+            delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
+            coords_lvl = centroid_lvl + delta_lvl
+
+            corr = bilinear_sampler(corr, coords_lvl)
+            corr = corr.view(batch, h1, w1, -1)
+            out_pyramid.append(corr)
+
+        out = torch.cat(out_pyramid, dim=-1)
+        return out.permute(0, 3, 1, 2).contiguous().float()
+
+    @staticmethod
+    def corr(fmap1, fmap2):
+        batch, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.view(batch, dim, ht*wd)
+        fmap2 = fmap2.view(batch, dim, ht*wd) 
+        
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        corr = corr.view(batch, ht, wd, 1, ht, wd)
+        return corr  / torch.sqrt(torch.tensor(dim).float())
+
+
+class AlternateCorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+
+        self.pyramid = [(fmap1, fmap2)]
+        for i in range(self.num_levels):
+            fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
+            fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
+            self.pyramid.append((fmap1, fmap2))
+
+    def __call__(self, coords):
+        coords = coords.permute(0, 2, 3, 1)
+        B, H, W, _ = coords.shape
+        dim = self.pyramid[0][0].shape[1]
+
+        corr_list = []
+        for i in range(self.num_levels):
+            r = self.radius
+            fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
+            fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
+
+            coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
+            corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
+            corr_list.append(corr.squeeze(1))
+
+        corr = torch.stack(corr_list, dim=1)
+        corr = corr.reshape(B, -1, H, W)
+        return corr / torch.sqrt(torch.tensor(dim).float())
diff --git a/BiSTNet-NTIRE2023/models/raft_core/datasets.py b/BiSTNet-NTIRE2023/models/raft_core/datasets.py
new file mode 100644
index 0000000..3411fda
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/datasets.py
@@ -0,0 +1,235 @@
+# Data loading based on https://github.com/NVIDIA/flownet2-pytorch
+
+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+
+import os
+import math
+import random
+from glob import glob
+import os.path as osp
+
+from utils import frame_utils
+from utils.augmentor import FlowAugmentor, SparseFlowAugmentor
+
+
+class FlowDataset(data.Dataset):
+    def __init__(self, aug_params=None, sparse=False):
+        self.augmentor = None
+        self.sparse = sparse
+        if aug_params is not None:
+            if sparse:
+                self.augmentor = SparseFlowAugmentor(**aug_params)
+            else:
+                self.augmentor = FlowAugmentor(**aug_params)
+
+        self.is_test = False
+        self.init_seed = False
+        self.flow_list = []
+        self.image_list = []
+        self.extra_info = []
+
+    def __getitem__(self, index):
+
+        if self.is_test:
+            img1 = frame_utils.read_gen(self.image_list[index][0])
+            img2 = frame_utils.read_gen(self.image_list[index][1])
+            img1 = np.array(img1).astype(np.uint8)[..., :3]
+            img2 = np.array(img2).astype(np.uint8)[..., :3]
+            img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+            img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+            return img1, img2, self.extra_info[index]
+
+        if not self.init_seed:
+            worker_info = torch.utils.data.get_worker_info()
+            if worker_info is not None:
+                torch.manual_seed(worker_info.id)
+                np.random.seed(worker_info.id)
+                random.seed(worker_info.id)
+                self.init_seed = True
+
+        index = index % len(self.image_list)
+        valid = None
+        if self.sparse:
+            flow, valid = frame_utils.readFlowKITTI(self.flow_list[index])
+        else:
+            flow = frame_utils.read_gen(self.flow_list[index])
+
+        img1 = frame_utils.read_gen(self.image_list[index][0])
+        img2 = frame_utils.read_gen(self.image_list[index][1])
+
+        flow = np.array(flow).astype(np.float32)
+        img1 = np.array(img1).astype(np.uint8)
+        img2 = np.array(img2).astype(np.uint8)
+
+        # grayscale images
+        if len(img1.shape) == 2:
+            img1 = np.tile(img1[...,None], (1, 1, 3))
+            img2 = np.tile(img2[...,None], (1, 1, 3))
+        else:
+            img1 = img1[..., :3]
+            img2 = img2[..., :3]
+
+        if self.augmentor is not None:
+            if self.sparse:
+                img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
+            else:
+                img1, img2, flow = self.augmentor(img1, img2, flow)
+
+        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+        flow = torch.from_numpy(flow).permute(2, 0, 1).float()
+
+        if valid is not None:
+            valid = torch.from_numpy(valid)
+        else:
+            valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000)
+
+        return img1, img2, flow, valid.float()
+
+
+    def __rmul__(self, v):
+        self.flow_list = v * self.flow_list
+        self.image_list = v * self.image_list
+        return self
+        
+    def __len__(self):
+        return len(self.image_list)
+        
+
+class MpiSintel(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/Sintel', dstype='clean'):
+        super(MpiSintel, self).__init__(aug_params)
+        flow_root = osp.join(root, split, 'flow')
+        image_root = osp.join(root, split, dstype)
+
+        if split == 'test':
+            self.is_test = True
+
+        for scene in os.listdir(image_root):
+            image_list = sorted(glob(osp.join(image_root, scene, '*.png')))
+            for i in range(len(image_list)-1):
+                self.image_list += [ [image_list[i], image_list[i+1]] ]
+                self.extra_info += [ (scene, i) ] # scene and frame_id
+
+            if split != 'test':
+                self.flow_list += sorted(glob(osp.join(flow_root, scene, '*.flo')))
+
+
+class FlyingChairs(FlowDataset):
+    def __init__(self, aug_params=None, split='train', root='datasets/FlyingChairs_release/data'):
+        super(FlyingChairs, self).__init__(aug_params)
+
+        images = sorted(glob(osp.join(root, '*.ppm')))
+        flows = sorted(glob(osp.join(root, '*.flo')))
+        assert (len(images)//2 == len(flows))
+
+        split_list = np.loadtxt('chairs_split.txt', dtype=np.int32)
+        for i in range(len(flows)):
+            xid = split_list[i]
+            if (split=='training' and xid==1) or (split=='validation' and xid==2):
+                self.flow_list += [ flows[i] ]
+                self.image_list += [ [images[2*i], images[2*i+1]] ]
+
+
+class FlyingThings3D(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/FlyingThings3D', dstype='frames_cleanpass'):
+        super(FlyingThings3D, self).__init__(aug_params)
+
+        for cam in ['left']:
+            for direction in ['into_future', 'into_past']:
+                image_dirs = sorted(glob(osp.join(root, dstype, 'TRAIN/*/*')))
+                image_dirs = sorted([osp.join(f, cam) for f in image_dirs])
+
+                flow_dirs = sorted(glob(osp.join(root, 'optical_flow/TRAIN/*/*')))
+                flow_dirs = sorted([osp.join(f, direction, cam) for f in flow_dirs])
+
+                for idir, fdir in zip(image_dirs, flow_dirs):
+                    images = sorted(glob(osp.join(idir, '*.png')) )
+                    flows = sorted(glob(osp.join(fdir, '*.pfm')) )
+                    for i in range(len(flows)-1):
+                        if direction == 'into_future':
+                            self.image_list += [ [images[i], images[i+1]] ]
+                            self.flow_list += [ flows[i] ]
+                        elif direction == 'into_past':
+                            self.image_list += [ [images[i+1], images[i]] ]
+                            self.flow_list += [ flows[i+1] ]
+      
+
+class KITTI(FlowDataset):
+    def __init__(self, aug_params=None, split='training', root='datasets/KITTI'):
+        super(KITTI, self).__init__(aug_params, sparse=True)
+        if split == 'testing':
+            self.is_test = True
+
+        root = osp.join(root, split)
+        images1 = sorted(glob(osp.join(root, 'image_2/*_10.png')))
+        images2 = sorted(glob(osp.join(root, 'image_2/*_11.png')))
+
+        for img1, img2 in zip(images1, images2):
+            frame_id = img1.split('/')[-1]
+            self.extra_info += [ [frame_id] ]
+            self.image_list += [ [img1, img2] ]
+
+        if split == 'training':
+            self.flow_list = sorted(glob(osp.join(root, 'flow_occ/*_10.png')))
+
+
+class HD1K(FlowDataset):
+    def __init__(self, aug_params=None, root='datasets/HD1k'):
+        super(HD1K, self).__init__(aug_params, sparse=True)
+
+        seq_ix = 0
+        while 1:
+            flows = sorted(glob(os.path.join(root, 'hd1k_flow_gt', 'flow_occ/%06d_*.png' % seq_ix)))
+            images = sorted(glob(os.path.join(root, 'hd1k_input', 'image_2/%06d_*.png' % seq_ix)))
+
+            if len(flows) == 0:
+                break
+
+            for i in range(len(flows)-1):
+                self.flow_list += [flows[i]]
+                self.image_list += [ [images[i], images[i+1]] ]
+
+            seq_ix += 1
+
+
+def fetch_dataloader(args, TRAIN_DS='C+T+K+S+H'):
+    """ Create the data loader for the corresponding trainign set """
+
+    if args.stage == 'chairs':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.1, 'max_scale': 1.0, 'do_flip': True}
+        train_dataset = FlyingChairs(aug_params, split='training')
+    
+    elif args.stage == 'things':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.4, 'max_scale': 0.8, 'do_flip': True}
+        clean_dataset = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        final_dataset = FlyingThings3D(aug_params, dstype='frames_finalpass')
+        train_dataset = clean_dataset + final_dataset
+
+    elif args.stage == 'sintel':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.6, 'do_flip': True}
+        things = FlyingThings3D(aug_params, dstype='frames_cleanpass')
+        sintel_clean = MpiSintel(aug_params, split='training', dstype='clean')
+        sintel_final = MpiSintel(aug_params, split='training', dstype='final')        
+
+        if TRAIN_DS == 'C+T+K+S+H':
+            kitti = KITTI({'crop_size': args.image_size, 'min_scale': -0.3, 'max_scale': 0.5, 'do_flip': True})
+            hd1k = HD1K({'crop_size': args.image_size, 'min_scale': -0.5, 'max_scale': 0.2, 'do_flip': True})
+            train_dataset = 100*sintel_clean + 100*sintel_final + 200*kitti + 5*hd1k + things
+
+        elif TRAIN_DS == 'C+T+K/S':
+            train_dataset = 100*sintel_clean + 100*sintel_final + things
+
+    elif args.stage == 'kitti':
+        aug_params = {'crop_size': args.image_size, 'min_scale': -0.2, 'max_scale': 0.4, 'do_flip': False}
+        train_dataset = KITTI(aug_params, split='training')
+
+    train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, 
+        pin_memory=False, shuffle=True, num_workers=4, drop_last=True)
+
+    print('Training with %d image pairs' % len(train_dataset))
+    return train_loader
+
diff --git a/BiSTNet-NTIRE2023/models/raft_core/extractor.py b/BiSTNet-NTIRE2023/models/raft_core/extractor.py
new file mode 100644
index 0000000..9a9c759
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/extractor.py
@@ -0,0 +1,267 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(ResidualBlock, self).__init__()
+  
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+
+        num_groups = planes // 8
+
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes)
+            self.norm2 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.BatchNorm2d(planes)
+        
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes)
+            self.norm2 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.InstanceNorm2d(planes)
+
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            if not stride == 1:
+                self.norm3 = nn.Sequential()
+
+        if stride == 1:
+            self.downsample = None
+        
+        else:    
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
+
+
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+
+        if self.downsample is not None:
+            x = self.downsample(x)
+
+        return self.relu(x+y)
+
+
+
+class BottleneckBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(BottleneckBlock, self).__init__()
+  
+        self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
+        self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
+        self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
+        self.relu = nn.ReLU(inplace=True)
+
+        num_groups = planes // 8
+
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes//4)
+            self.norm2 = nn.BatchNorm2d(planes//4)
+            self.norm3 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.BatchNorm2d(planes)
+        
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes//4)
+            self.norm2 = nn.InstanceNorm2d(planes//4)
+            self.norm3 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.InstanceNorm2d(planes)
+
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            self.norm3 = nn.Sequential()
+            if not stride == 1:
+                self.norm4 = nn.Sequential()
+
+        if stride == 1:
+            self.downsample = None
+        
+        else:    
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
+
+
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        y = self.relu(self.norm3(self.conv3(y)))
+
+        if self.downsample is not None:
+            x = self.downsample(x)
+
+        return self.relu(x+y)
+
+class BasicEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(BasicEncoder, self).__init__()
+        self.norm_fn = norm_fn
+
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
+            
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(64)
+
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(64)
+
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+
+        self.in_planes = 64
+        self.layer1 = self._make_layer(64,  stride=1)
+        self.layer2 = self._make_layer(96, stride=2)
+        self.layer3 = self._make_layer(128, stride=2)
+
+        # output convolution
+        self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
+
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+
+
+    def forward(self, x):
+
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+
+        x = self.conv2(x)
+
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+
+        return x
+
+
+class SmallEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(SmallEncoder, self).__init__()
+        self.norm_fn = norm_fn
+
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
+            
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(32)
+
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(32)
+
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+
+        self.in_planes = 32
+        self.layer1 = self._make_layer(32,  stride=1)
+        self.layer2 = self._make_layer(64, stride=2)
+        self.layer3 = self._make_layer(96, stride=2)
+
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        
+        self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, dim, stride=1):
+        layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+    
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+
+
+    def forward(self, x):
+
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+
+        return x
diff --git a/BiSTNet-NTIRE2023/models/raft_core/flow_viz.py b/BiSTNet-NTIRE2023/models/raft_core/flow_viz.py
new file mode 100644
index 0000000..dcee65e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/flow_viz.py
@@ -0,0 +1,132 @@
+# Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization
+
+
+# MIT License
+#
+# Copyright (c) 2018 Tom Runia
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to conditions.
+#
+# Author: Tom Runia
+# Date Created: 2018-08-03
+
+import numpy as np
+
+def make_colorwheel():
+    """
+    Generates a color wheel for optical flow visualization as presented in:
+        Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
+        URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
+
+    Code follows the original C++ source code of Daniel Scharstein.
+    Code follows the the Matlab source code of Deqing Sun.
+
+    Returns:
+        np.ndarray: Color wheel
+    """
+
+    RY = 15
+    YG = 6
+    GC = 4
+    CB = 11
+    BM = 13
+    MR = 6
+
+    ncols = RY + YG + GC + CB + BM + MR
+    colorwheel = np.zeros((ncols, 3))
+    col = 0
+
+    # RY
+    colorwheel[0:RY, 0] = 255
+    colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY)
+    col = col+RY
+    # YG
+    colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG)
+    colorwheel[col:col+YG, 1] = 255
+    col = col+YG
+    # GC
+    colorwheel[col:col+GC, 1] = 255
+    colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC)
+    col = col+GC
+    # CB
+    colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB)
+    colorwheel[col:col+CB, 2] = 255
+    col = col+CB
+    # BM
+    colorwheel[col:col+BM, 2] = 255
+    colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM)
+    col = col+BM
+    # MR
+    colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR)
+    colorwheel[col:col+MR, 0] = 255
+    return colorwheel
+
+
+def flow_uv_to_colors(u, v, convert_to_bgr=False):
+    """
+    Applies the flow color wheel to (possibly clipped) flow components u and v.
+
+    According to the C++ source code of Daniel Scharstein
+    According to the Matlab source code of Deqing Sun
+
+    Args:
+        u (np.ndarray): Input horizontal flow of shape [H,W]
+        v (np.ndarray): Input vertical flow of shape [H,W]
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
+    colorwheel = make_colorwheel()  # shape [55x3]
+    ncols = colorwheel.shape[0]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    a = np.arctan2(-v, -u)/np.pi
+    fk = (a+1) / 2*(ncols-1)
+    k0 = np.floor(fk).astype(np.int32)
+    k1 = k0 + 1
+    k1[k1 == ncols] = 0
+    f = fk - k0
+    for i in range(colorwheel.shape[1]):
+        tmp = colorwheel[:,i]
+        col0 = tmp[k0] / 255.0
+        col1 = tmp[k1] / 255.0
+        col = (1-f)*col0 + f*col1
+        idx = (rad <= 1)
+        col[idx]  = 1 - rad[idx] * (1-col[idx])
+        col[~idx] = col[~idx] * 0.75   # out of range
+        # Note the 2-i => BGR instead of RGB
+        ch_idx = 2-i if convert_to_bgr else i
+        flow_image[:,:,ch_idx] = np.floor(255 * col)
+    return flow_image
+
+
+def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
+    """
+    Expects a two dimensional flow image of shape.
+
+    Args:
+        flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
+        clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
+        convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
+
+    Returns:
+        np.ndarray: Flow visualization image of shape [H,W,3]
+    """
+    assert flow_uv.ndim == 3, 'input flow must have three dimensions'
+    assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
+    if clip_flow is not None:
+        flow_uv = np.clip(flow_uv, 0, clip_flow)
+    u = flow_uv[:,:,0]
+    v = flow_uv[:,:,1]
+    rad = np.sqrt(np.square(u) + np.square(v))
+    rad_max = np.max(rad)
+    epsilon = 1e-5
+    u = u / (rad_max + epsilon)
+    v = v / (rad_max + epsilon)
+    return flow_uv_to_colors(u, v, convert_to_bgr)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/raft_core/frame_utils.py b/BiSTNet-NTIRE2023/models/raft_core/frame_utils.py
new file mode 100644
index 0000000..6c49113
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/frame_utils.py
@@ -0,0 +1,137 @@
+import numpy as np
+from PIL import Image
+from os.path import *
+import re
+
+import cv2
+cv2.setNumThreads(0)
+cv2.ocl.setUseOpenCL(False)
+
+TAG_CHAR = np.array([202021.25], np.float32)
+
+def readFlow(fn):
+    """ Read .flo file in Middlebury format"""
+    # Code adapted from:
+    # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
+
+    # WARNING: this will work on little-endian architectures (eg Intel x86) only!
+    # print 'fn = %s'%(fn)
+    with open(fn, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        if 202021.25 != magic:
+            print('Magic number incorrect. Invalid .flo file')
+            return None
+        else:
+            w = np.fromfile(f, np.int32, count=1)
+            h = np.fromfile(f, np.int32, count=1)
+            # print 'Reading %d x %d flo file\n' % (w, h)
+            data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
+            # Reshape data into 3D array (columns, rows, bands)
+            # The reshape here is for visualization, the original code is (w,h,2)
+            return np.resize(data, (int(h), int(w), 2))
+
+def readPFM(file):
+    file = open(file, 'rb')
+
+    color = None
+    width = None
+    height = None
+    scale = None
+    endian = None
+
+    header = file.readline().rstrip()
+    if header == b'PF':
+        color = True
+    elif header == b'Pf':
+        color = False
+    else:
+        raise Exception('Not a PFM file.')
+
+    dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
+    if dim_match:
+        width, height = map(int, dim_match.groups())
+    else:
+        raise Exception('Malformed PFM header.')
+
+    scale = float(file.readline().rstrip())
+    if scale < 0: # little-endian
+        endian = '<'
+        scale = -scale
+    else:
+        endian = '>' # big-endian
+
+    data = np.fromfile(file, endian + 'f')
+    shape = (height, width, 3) if color else (height, width)
+
+    data = np.reshape(data, shape)
+    data = np.flipud(data)
+    return data
+
+def writeFlow(filename,uv,v=None):
+    """ Write optical flow to file.
+    
+    If v is None, uv is assumed to contain both u and v channels,
+    stacked in depth.
+    Original code by Deqing Sun, adapted from Daniel Scharstein.
+    """
+    nBands = 2
+
+    if v is None:
+        assert(uv.ndim == 3)
+        assert(uv.shape[2] == 2)
+        u = uv[:,:,0]
+        v = uv[:,:,1]
+    else:
+        u = uv
+
+    assert(u.shape == v.shape)
+    height,width = u.shape
+    f = open(filename,'wb')
+    # write the header
+    f.write(TAG_CHAR)
+    np.array(width).astype(np.int32).tofile(f)
+    np.array(height).astype(np.int32).tofile(f)
+    # arrange into matrix form
+    tmp = np.zeros((height, width*nBands))
+    tmp[:,np.arange(width)*2] = u
+    tmp[:,np.arange(width)*2 + 1] = v
+    tmp.astype(np.float32).tofile(f)
+    f.close()
+
+
+def readFlowKITTI(filename):
+    flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
+    flow = flow[:,:,::-1].astype(np.float32)
+    flow, valid = flow[:, :, :2], flow[:, :, 2]
+    flow = (flow - 2**15) / 64.0
+    return flow, valid
+
+def readDispKITTI(filename):
+    disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
+    valid = disp > 0.0
+    flow = np.stack([-disp, np.zeros_like(disp)], -1)
+    return flow, valid
+
+
+def writeFlowKITTI(filename, uv):
+    uv = 64.0 * uv + 2**15
+    valid = np.ones([uv.shape[0], uv.shape[1], 1])
+    uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
+    cv2.imwrite(filename, uv[..., ::-1])
+    
+
+def read_gen(file_name, pil=False):
+    ext = splitext(file_name)[-1]
+    if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
+        return Image.open(file_name)
+    elif ext == '.bin' or ext == '.raw':
+        return np.load(file_name)
+    elif ext == '.flo':
+        return readFlow(file_name).astype(np.float32)
+    elif ext == '.pfm':
+        flow = readPFM(file_name).astype(np.float32)
+        if len(flow.shape) == 2:
+            return flow
+        else:
+            return flow[:, :, :-1]
+    return []
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/raft_core/raft.py b/BiSTNet-NTIRE2023/models/raft_core/raft.py
new file mode 100644
index 0000000..31b3f9e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/raft.py
@@ -0,0 +1,149 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.raft_core.update import BasicUpdateBlock, SmallUpdateBlock
+from models.raft_core.extractor import BasicEncoder, SmallEncoder
+from models.raft_core.corr import CorrBlock, AlternateCorrBlock
+from models.raft_core.utils import bilinear_sampler, coords_grid, upflow8
+
+# from update import BasicUpdateBlock, SmallUpdateBlock
+# from extractor import BasicEncoder, SmallEncoder
+# from corr import CorrBlock, AlternateCorrBlock
+# from utils.utils import bilinear_sampler, coords_grid, upflow8
+
+try:
+    autocast = torch.cuda.amp.autocast
+except:
+    # dummy autocast for PyTorch < 1.6
+    class autocast:
+        def __init__(self, enabled):
+            pass
+        def __enter__(self):
+            pass
+        def __exit__(self, *args):
+            pass
+
+
+class RAFT(nn.Module):
+    def __init__(self, args):
+        super(RAFT, self).__init__()
+        self.args = args
+
+        if args.small:
+            self.hidden_dim = hdim = 96
+            self.context_dim = cdim = 64
+            args.corr_levels = 4
+            args.corr_radius = 3
+        
+        else:
+            self.hidden_dim = hdim = 128
+            self.context_dim = cdim = 128
+            args.corr_levels = 4
+            args.corr_radius = 4
+
+        if 'dropout' not in self.args:
+            self.args.dropout = 0
+
+        if 'alternate_corr' not in self.args:
+            self.args.alternate_corr = False
+
+        # feature network, context network, and update block
+        if args.small:
+            self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)        
+            self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
+            self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
+
+        else:
+            self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)        
+            self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
+            self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
+
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+
+    def initialize_flow(self, img):
+        """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
+        N, C, H, W = img.shape
+        coords0 = coords_grid(N, H//8, W//8, device=img.device)
+        coords1 = coords_grid(N, H//8, W//8, device=img.device)
+
+        # optical flow computed as difference: flow = coords1 - coords0
+        return coords0, coords1
+
+    def upsample_flow(self, flow, mask):
+        """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
+        N, _, H, W = flow.shape
+        mask = mask.view(N, 1, 9, 8, 8, H, W)
+        mask = torch.softmax(mask, dim=2)
+
+        up_flow = F.unfold(8 * flow, [3,3], padding=1)
+        up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
+
+        up_flow = torch.sum(mask * up_flow, dim=2)
+        up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
+        return up_flow.reshape(N, 2, 8*H, 8*W)
+
+
+    def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+        """ Estimate optical flow between pair of frames """
+
+        image1 = 2 * (image1 / 255.0) - 1.0
+        image2 = 2 * (image2 / 255.0) - 1.0
+
+        image1 = image1.contiguous()
+        image2 = image2.contiguous()
+
+        hdim = self.hidden_dim
+        cdim = self.context_dim
+
+        # run the feature network
+        with autocast(enabled=self.args.mixed_precision):
+            fmap1, fmap2 = self.fnet([image1, image2])        
+        
+        fmap1 = fmap1.float()
+        fmap2 = fmap2.float()
+        if self.args.alternate_corr:
+            corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        else:
+            corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+
+        # run the context network
+        with autocast(enabled=self.args.mixed_precision):
+            cnet = self.cnet(image1)
+            net, inp = torch.split(cnet, [hdim, cdim], dim=1)
+            net = torch.tanh(net)
+            inp = torch.relu(inp)
+
+        coords0, coords1 = self.initialize_flow(image1)
+
+        if flow_init is not None:
+            coords1 = coords1 + flow_init
+
+        flow_predictions = []
+        for itr in range(iters):
+            coords1 = coords1.detach()
+            corr = corr_fn(coords1) # index correlation volume
+
+            flow = coords1 - coords0
+            with autocast(enabled=self.args.mixed_precision):
+                net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
+
+            # F(t+1) = F(t) + \Delta(t)
+            coords1 = coords1 + delta_flow
+
+            # upsample predictions
+            if up_mask is None:
+                flow_up = upflow8(coords1 - coords0)
+            else:
+                flow_up = self.upsample_flow(coords1 - coords0, up_mask)
+            
+            flow_predictions.append(flow_up)
+
+        if test_mode:
+            return coords1 - coords0, flow_up
+            
+        return flow_predictions
diff --git a/BiSTNet-NTIRE2023/models/raft_core/update.py b/BiSTNet-NTIRE2023/models/raft_core/update.py
new file mode 100644
index 0000000..f940497
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/update.py
@@ -0,0 +1,139 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class FlowHead(nn.Module):
+    def __init__(self, input_dim=128, hidden_dim=256):
+        super(FlowHead, self).__init__()
+        self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
+        self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        return self.conv2(self.relu(self.conv1(x)))
+
+class ConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(ConvGRU, self).__init__()
+        self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+
+    def forward(self, h, x):
+        hx = torch.cat([h, x], dim=1)
+
+        z = torch.sigmoid(self.convz(hx))
+        r = torch.sigmoid(self.convr(hx))
+        q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1)))
+
+        h = (1-z) * h + z * q
+        return h
+
+class SepConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(SepConvGRU, self).__init__()
+        self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+
+        self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+
+
+    def forward(self, h, x):
+        # horizontal
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz1(hx))
+        r = torch.sigmoid(self.convr1(hx))
+        q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1)))        
+        h = (1-z) * h + z * q
+
+        # vertical
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz2(hx))
+        r = torch.sigmoid(self.convr2(hx))
+        q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1)))       
+        h = (1-z) * h + z * q
+
+        return h
+
+class SmallMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(SmallMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
+        self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
+        self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
+        self.conv = nn.Conv2d(128, 80, 3, padding=1)
+
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+
+class BasicMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(BasicMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
+        self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
+        self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
+        self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
+        self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1)
+
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        cor = F.relu(self.convc2(cor))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+
+class SmallUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=96):
+        super(SmallUpdateBlock, self).__init__()
+        self.encoder = SmallMotionEncoder(args)
+        self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
+
+    def forward(self, net, inp, corr, flow):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+        net = self.gru(net, inp)
+        delta_flow = self.flow_head(net)
+
+        return net, None, delta_flow
+
+class BasicUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=128, input_dim=128):
+        super(BasicUpdateBlock, self).__init__()
+        self.args = args
+        self.encoder = BasicMotionEncoder(args)
+        self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
+
+        self.mask = nn.Sequential(
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 64*9, 1, padding=0))
+
+    def forward(self, net, inp, corr, flow, upsample=True):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+
+        net = self.gru(net, inp)
+        delta_flow = self.flow_head(net)
+
+        # scale mask to balence gradients
+        mask = .25 * self.mask(net)
+        return net, mask, delta_flow
+
+
+
diff --git a/BiSTNet-NTIRE2023/models/raft_core/utils.py b/BiSTNet-NTIRE2023/models/raft_core/utils.py
new file mode 100644
index 0000000..741ccfe
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/raft_core/utils.py
@@ -0,0 +1,82 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+from scipy import interpolate
+
+
+class InputPadder:
+    """ Pads images such that dimensions are divisible by 8 """
+    def __init__(self, dims, mode='sintel'):
+        self.ht, self.wd = dims[-2:]
+        pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
+        pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
+        if mode == 'sintel':
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]
+        else:
+            self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
+
+    def pad(self, *inputs):
+        return [F.pad(x, self._pad, mode='replicate') for x in inputs]
+
+    def unpad(self,x):
+        ht, wd = x.shape[-2:]
+        c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
+        return x[..., c[0]:c[1], c[2]:c[3]]
+
+def forward_interpolate(flow):
+    flow = flow.detach().cpu().numpy()
+    dx, dy = flow[0], flow[1]
+
+    ht, wd = dx.shape
+    x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
+
+    x1 = x0 + dx
+    y1 = y0 + dy
+    
+    x1 = x1.reshape(-1)
+    y1 = y1.reshape(-1)
+    dx = dx.reshape(-1)
+    dy = dy.reshape(-1)
+
+    valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
+    x1 = x1[valid]
+    y1 = y1[valid]
+    dx = dx[valid]
+    dy = dy[valid]
+
+    flow_x = interpolate.griddata(
+        (x1, y1), dx, (x0, y0), method='nearest', fill_value=0)
+
+    flow_y = interpolate.griddata(
+        (x1, y1), dy, (x0, y0), method='nearest', fill_value=0)
+
+    flow = np.stack([flow_x, flow_y], axis=0)
+    return torch.from_numpy(flow).float()
+
+
+def bilinear_sampler(img, coords, mode='bilinear', mask=False):
+    """ Wrapper for grid_sample, uses pixel coordinates """
+    H, W = img.shape[-2:]
+    xgrid, ygrid = coords.split([1,1], dim=-1)
+    xgrid = 2*xgrid/(W-1) - 1
+    ygrid = 2*ygrid/(H-1) - 1
+
+    grid = torch.cat([xgrid, ygrid], dim=-1)
+    img = F.grid_sample(img, grid, align_corners=True)
+
+    if mask:
+        mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
+        return img, mask.float()
+
+    return img
+
+
+def coords_grid(batch, ht, wd, device):
+    coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
+    coords = torch.stack(coords[::-1], dim=0).float()
+    return coords[None].repeat(batch, 1, 1, 1)
+
+
+def upflow8(flow, mode='bilinear'):
+    new_size = (8 * flow.shape[2], 8 * flow.shape[3])
+    return  8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
diff --git a/BiSTNet-NTIRE2023/models/spectral_normalization.py b/BiSTNet-NTIRE2023/models/spectral_normalization.py
new file mode 100644
index 0000000..6801480
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/spectral_normalization.py
@@ -0,0 +1,61 @@
+import torch
+from torch import nn
+from torch.nn import Parameter
+
+
+def l2normalize(v, eps=1e-12):
+    return v / (v.norm() + eps)
+
+
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name="weight", power_iterations=1):
+        super(SpectralNorm, self).__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+
+    def _update_u_v(self):
+        u = getattr(self.module, self.name + "_u")
+        v = getattr(self.module, self.name + "_v")
+        w = getattr(self.module, self.name + "_bar")
+
+        height = w.data.shape[0]
+        for _ in range(self.power_iterations):
+            v.data = l2normalize(torch.mv(torch.t(w.view(height, -1).data), u.data))
+            u.data = l2normalize(torch.mv(w.view(height, -1).data, v.data))
+
+        sigma = u.dot(w.view(height, -1).mv(v))
+        setattr(self.module, self.name, w / sigma.expand_as(w))
+
+    def _made_params(self):
+        try:
+            u = getattr(self.module, self.name + "_u")
+            v = getattr(self.module, self.name + "_v")
+            w = getattr(self.module, self.name + "_bar")
+            return True
+        except AttributeError:
+            return False
+
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+
+        height = w.data.shape[0]
+        width = w.view(height, -1).data.shape[1]
+
+        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
+        u.data = l2normalize(u.data)
+        v.data = l2normalize(v.data)
+        w_bar = Parameter(w.data)
+
+        del self.module._parameters[self.name]
+
+        self.module.register_parameter(self.name + "_u", u)
+        self.module.register_parameter(self.name + "_v", v)
+        self.module.register_parameter(self.name + "_bar", w_bar)
+
+    def forward(self, *args):
+        self._update_u_v()
+        return self.module.forward(*args)
diff --git a/BiSTNet-NTIRE2023/models/superslomo_model.py b/BiSTNet-NTIRE2023/models/superslomo_model.py
new file mode 100644
index 0000000..bc3e350
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/superslomo_model.py
@@ -0,0 +1,361 @@
+import torch
+import torchvision
+import torchvision.transforms as transforms
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+class down(nn.Module):
+    """
+    A class for creating neural network blocks containing layers:
+    
+    Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
+    
+    This is used in the UNet Class to create a UNet like NN architecture.
+
+    ...
+
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+
+
+    def __init__(self, inChannels, outChannels, filterSize):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the first convolutional layer.
+            outChannels : int
+                number of output channels for the first convolutional layer.
+                This is also used as input and output channels for the
+                second convolutional layer.
+            filterSize : int
+                filter size for the convolution filter. input N would create
+                a N x N filter.
+        """
+
+
+        super(down, self).__init__()
+        # Initialize convolutional layers.
+        self.conv1 = nn.Conv2d(inChannels,  outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
+        self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
+           
+    def forward(self, x):
+        """
+        Returns output tensor after passing input `x` to the neural network
+        block.
+
+        Parameters
+        ----------
+            x : tensor
+                input to the NN block.
+
+        Returns
+        -------
+            tensor
+                output of the NN block.
+        """
+
+
+        # Average pooling with kernel size 2 (2 x 2).
+        x = F.avg_pool2d(x, 2)
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
+        return x
+    
+class up(nn.Module):
+    """
+    A class for creating neural network blocks containing layers:
+    
+    Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
+    
+    This is used in the UNet Class to create a UNet like NN architecture.
+
+    ...
+
+    Methods
+    -------
+    forward(x, skpCn)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+
+
+    def __init__(self, inChannels, outChannels):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the first convolutional layer.
+            outChannels : int
+                number of output channels for the first convolutional layer.
+                This is also used for setting input and output channels for
+                the second convolutional layer.
+        """
+
+        
+        super(up, self).__init__()
+        # Initialize convolutional layers.
+        self.conv1 = nn.Conv2d(inChannels,  outChannels, 3, stride=1, padding=1)
+        # (2 * outChannels) is used for accommodating skip connection.
+        self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
+           
+    def forward(self, x, skpCn):
+        """
+        Returns output tensor after passing input `x` to the neural network
+        block.
+
+        Parameters
+        ----------
+            x : tensor
+                input to the NN block.
+            skpCn : tensor
+                skip connection input to the NN block.
+
+        Returns
+        -------
+            tensor
+                output of the NN block.
+        """
+
+        # Bilinear interpolation with scaling 2.
+        x = F.interpolate(x, scale_factor=2, mode='bilinear')
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        # Convolution + Leaky ReLU on (`x`, `skpCn`)
+        x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1)
+        return x
+
+
+
+class UNet(nn.Module):
+    """
+    A class for creating UNet like architecture as specified by the
+    Super SloMo paper.
+    
+    ...
+
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+
+
+    def __init__(self, inChannels, outChannels):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the UNet.
+            outChannels : int
+                number of output channels for the UNet.
+        """
+
+        
+        super(UNet, self).__init__()
+        # Initialize neural network blocks.
+        self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3)
+        self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3)
+        self.down1 = down(32, 64, 5)
+        self.down2 = down(64, 128, 3)
+        self.down3 = down(128, 256, 3)
+        self.down4 = down(256, 512, 3)
+        self.down5 = down(512, 512, 3)
+        self.up1   = up(512, 512)
+        self.up2   = up(512, 256)
+        self.up3   = up(256, 128)
+        self.up4   = up(128, 64)
+        self.up5   = up(64, 32)
+        self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
+        
+    def forward(self, x):
+        """
+        Returns output tensor after passing input `x` to the neural network.
+
+        Parameters
+        ----------
+            x : tensor
+                input to the UNet.
+
+        Returns
+        -------
+            tensor
+                output of the UNet.
+        """
+
+
+        x  = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
+        s2 = self.down1(s1)
+        s3 = self.down2(s2)
+        s4 = self.down3(s3)
+        s5 = self.down4(s4)
+        x  = self.down5(s5)
+        x  = self.up1(x, s5)
+        x  = self.up2(x, s4)
+        x  = self.up3(x, s3)
+        x  = self.up4(x, s2)
+        x  = self.up5(x, s1)
+        x  = F.leaky_relu(self.conv3(x), negative_slope = 0.1)
+        return x
+
+
+class backWarp(nn.Module):
+    """
+    A class for creating a backwarping object.
+
+    This is used for backwarping to an image:
+
+    Given optical flow from frame I0 to I1 --> F_0_1 and frame I1, 
+    it generates I0 <-- backwarp(F_0_1, I1).
+
+    ...
+
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `img` and `flow` to the backwarping
+        block.
+    """
+
+
+    def __init__(self, W, H, device):
+        """
+        Parameters
+        ----------
+            W : int
+                width of the image.
+            H : int
+                height of the image.
+            device : device
+                computation device (cpu/cuda). 
+        """
+
+
+        super(backWarp, self).__init__()
+        # create a grid
+        gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
+        self.W = W
+        self.H = H
+        self.gridX = torch.tensor(gridX, requires_grad=False, device=device)
+        self.gridY = torch.tensor(gridY, requires_grad=False, device=device)
+        
+    def forward(self, img, flow):
+        """
+        Returns output tensor after passing input `img` and `flow` to the backwarping
+        block.
+        I0  = backwarp(I1, F_0_1)
+
+        Parameters
+        ----------
+            img : tensor
+                frame I1.
+            flow : tensor
+                optical flow from I0 and I1: F_0_1.
+
+        Returns
+        -------
+            tensor
+                frame I0.
+        """
+
+
+        # Extract horizontal and vertical flows.
+        u = flow[:, 0, :, :]
+        v = flow[:, 1, :, :]
+        x = self.gridX.unsqueeze(0).expand_as(u).float() + u
+        y = self.gridY.unsqueeze(0).expand_as(v).float() + v
+        # range -1 to 1
+        x = 2*(x/self.W - 0.5)
+        y = 2*(y/self.H - 0.5)
+        # stacking X and Y
+        grid = torch.stack((x,y), dim=3)
+        # Sample pixels using bilinear interpolation.
+        imgOut = torch.nn.functional.grid_sample(img, grid)
+        return imgOut
+
+
+# Creating an array of `t` values for the 7 intermediate frames between
+# reference frames I0 and I1. 
+t = np.linspace(0.125, 0.875, 7)
+
+def getFlowCoeff (indices, device):
+    """
+    Gets flow coefficients used for calculating intermediate optical
+    flows from optical flows between I0 and I1: F_0_1 and F_1_0.
+
+    F_t_0 = C00 x F_0_1 + C01 x F_1_0
+    F_t_1 = C10 x F_0_1 + C11 x F_1_0
+
+    where,
+    C00 = -(1 - t) x t
+    C01 = t x t
+    C10 = (1 - t) x (1 - t)
+    C11 = -t x (1 - t)
+
+    Parameters
+    ----------
+        indices : tensor
+            indices corresponding to the intermediate frame positions
+            of all samples in the batch.
+        device : device
+                computation device (cpu/cuda). 
+
+    Returns
+    -------
+        tensor
+            coefficients C00, C01, C10, C11.
+    """
+
+
+    # Convert indices tensor to numpy array
+    ind = indices.detach().numpy()
+    C11 = C00 = - (1 - (t[ind])) * (t[ind])
+    C01 = (t[ind]) * (t[ind])
+    C10 = (1 - (t[ind])) * (1 - (t[ind]))
+    return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device)
+
+def getWarpCoeff (indices, device):
+    """
+    Gets coefficients used for calculating final intermediate 
+    frame `It_gen` from backwarped images using flows F_t_0 and F_t_1.
+
+    It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1)
+
+    where,
+    C0 = 1 - t
+    C1 = t
+
+    V_t_0, V_t_1 --> visibility maps
+    g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames
+
+    Parameters
+    ----------
+        indices : tensor
+            indices corresponding to the intermediate frame positions
+            of all samples in the batch.
+        device : device
+                computation device (cpu/cuda). 
+
+    Returns
+    -------
+        tensor
+            coefficients C0 and C1.
+    """
+
+
+    # Convert indices tensor to numpy array
+    ind = indices.detach().numpy()
+    C0 = 1 - t[ind]
+    C1 = t[ind]
+    return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device)
\ No newline at end of file
diff --git a/BiSTNet-NTIRE2023/models/vgg19_gray.py b/BiSTNet-NTIRE2023/models/vgg19_gray.py
new file mode 100644
index 0000000..44ad8e0
--- /dev/null
+++ b/BiSTNet-NTIRE2023/models/vgg19_gray.py
@@ -0,0 +1,194 @@
+from functools import reduce
+
+import torch
+import torch.nn as nn
+
+
+class LambdaBase(nn.Sequential):
+    def __init__(self, fn, *args):
+        super(LambdaBase, self).__init__(*args)
+        self.lambda_func = fn
+
+    def forward_prepare(self, input):
+        output = []
+        for module in self._modules.values():
+            output.append(module(input))
+
+        return output if output else input
+
+
+class Lambda(LambdaBase):
+    def forward(self, input):
+        return self.lambda_func(self.forward_prepare(input))
+
+
+class LambdaMap(LambdaBase):
+    def forward(self, input):
+        return list(map(self.lambda_func, self.forward_prepare(input)))
+
+
+class LambdaReduce(LambdaBase):
+    def forward(self, input):
+        return reduce(self.lambda_func, self.forward_prepare(input))
+
+
+layer_names = [
+    "conv1_1",
+    "relu1_1",
+    "conv1_2",
+    "relu1_2",
+    "pool1",
+    "conv2_1",
+    "relu2_1",
+    "conv2_2",
+    "relu2_2",
+    "pool2",
+    "conv3_1",
+    "relu3_1",
+    "conv3_2",
+    "relu3_2",
+    "conv3_3",
+    "relu3_3",
+    "conv3_4",
+    "relu3_4",
+    "pool3",
+    "conv4_1",
+    "relu4_1",
+    "conv4_2",
+    "relu4_2",
+    "conv4_3",
+    "relu4_3",
+    "conv4_4",
+    "relu4_4",
+    "pool4",
+    "conv5_1",
+    "relu5_1",
+    "conv5_2",
+    "relu5_2",
+    "conv5_3",
+    "relu5_3",
+    "conv5_4",
+    "relu5_4",
+    "pool5",
+    "view1",
+    "fc6",
+    "fc6_relu",
+    "fc7",
+    "fc7_relu",
+    "fc8",
+]
+
+model = nn.Sequential(  # Sequential,
+    nn.Conv2d(3, 64, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+    nn.Conv2d(64, 128, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(128, 128, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+    nn.Conv2d(128, 256, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+    nn.Conv2d(256, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.Conv2d(512, 512, (3, 3), (1, 1), (1, 1)),
+    nn.ReLU(),
+    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
+    Lambda(lambda x: x.view(x.size(0), -1)),  # View,
+    nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(25088, 4096)),  # Linear,
+    nn.ReLU(),
+    nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(4096, 4096)),  # Linear,
+    nn.ReLU(),
+    nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(4096, 1000)),  # Linear,
+)
+
+
+model.load_state_dict(torch.load("data/vgg19_gray.pth"))
+vgg19_gray_net = torch.nn.Sequential()
+for (name, layer) in model._modules.items():
+    vgg19_gray_net.add_module(layer_names[int(name)], model[int(name)])
+
+for param in vgg19_gray_net.parameters():
+    param.requires_grad = False
+vgg19_gray_net.eval()
+
+
+class vgg19_gray(torch.nn.Module):
+    def __init__(self, requires_grad=False):
+        super(vgg19_gray, self).__init__()
+        vgg_pretrained_features = vgg19_gray_net
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        for x in range(12):
+            self.slice1.add_module(layer_names[x], vgg_pretrained_features[x])
+        for x in range(12, 21):
+            self.slice2.add_module(layer_names[x], vgg_pretrained_features[x])
+        for x in range(21, 30):
+            self.slice3.add_module(layer_names[x], vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu3_1 = h
+        h = self.slice2(h)
+        h_relu4_1 = h
+        h = self.slice3(h)
+        h_relu5_1 = h
+        return h_relu3_1, h_relu4_1, h_relu5_1
+
+
+class vgg19_gray_new(torch.nn.Module):
+    def __init__(self, requires_grad=False):
+        super(vgg19_gray_new, self).__init__()
+        vgg_pretrained_features = vgg19_gray_net
+        self.slice0 = torch.nn.Sequential()
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        for x in range(7):
+            self.slice0.add_module(layer_names[x], vgg_pretrained_features[x])
+        for x in range(7, 12):
+            self.slice1.add_module(layer_names[x], vgg_pretrained_features[x])
+        for x in range(12, 21):
+            self.slice2.add_module(layer_names[x], vgg_pretrained_features[x])
+        for x in range(21, 30):
+            self.slice3.add_module(layer_names[x], vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+
+    def forward(self, X):
+        h = self.slice0(X)
+        h_relu2_1 = h
+        h = self.slice1(h)
+        h_relu3_1 = h
+        h = self.slice2(h)
+        h_relu4_1 = h
+        h = self.slice3(h)
+        h_relu5_1 = h
+        return h_relu2_1, h_relu3_1, h_relu4_1, h_relu5_1
diff --git a/BiSTNet-NTIRE2023/pip_requirements.txt b/BiSTNet-NTIRE2023/pip_requirements.txt
new file mode 100644
index 0000000..11bc938
--- /dev/null
+++ b/BiSTNet-NTIRE2023/pip_requirements.txt
@@ -0,0 +1,67 @@
+absl-py==1.0.0
+addict==2.4.0
+cachetools==4.2.4
+charset-normalizer==2.0.12
+click==8.0.4
+cycler==0.11.0
+dataclasses==0.8
+decorator==4.4.2
+easydict==1.9
+einops==0.4.1
+future==0.18.2
+google-auth==2.6.0
+google-auth-oauthlib==0.4.6
+graphviz==0.19.1
+grpcio==1.44.0
+idna==3.3
+imageio==2.15.0
+imageio-ffmpeg==0.4.7
+importlib-metadata==4.8.3
+importlib-resources==5.4.0
+kiwisolver==1.3.1
+llvmlite==0.36.0
+lmdb==1.3.0
+logger==1.4
+Markdown==3.3.6
+matplotlib==3.3.4
+moviepy==1.0.3
+msgpack==1.0.4
+networkx==2.5.1
+numba==0.53.1
+numpy==1.19.5
+oauthlib==3.2.0
+opencv-contrib-python==4.5.5.64
+packaging==21.3
+pandas==1.1.5
+Pillow==8.4.0
+prefetch-generator==1.0.1
+proglog==0.1.10
+protobuf==3.19.4
+pyarrow==6.0.1
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pyparsing==3.0.7
+pypng==0.0.21
+python-dateutil==2.8.2
+pytz==2022.1
+PyWavelets==1.1.1
+PyYAML==6.0
+requests==2.27.1
+requests-oauthlib==1.3.1
+rsa==4.8
+scikit-image==0.17.2
+scipy==1.2.0
+six==1.16.0
+tensorboard==2.8.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tifffile==2020.9.3
+timm==0.6.7
+tqdm==4.63.0
+typing_extensions==4.1.1
+urllib3==1.26.8
+Werkzeug==2.0.3
+yacs==0.1.8
+yapf==0.32.0
+zipp==3.6.0
+torchcontrib==0.0.2
diff --git a/BiSTNet-NTIRE2023/tensorboardX/__init__.py b/BiSTNet-NTIRE2023/tensorboardX/__init__.py
new file mode 100644
index 0000000..e910378
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/__init__.py
@@ -0,0 +1,5 @@
+"""A module for visualization with tensorboard
+"""
+
+from .writer import FileWriter, SummaryWriter
+from .record_writer import RecordWriter
diff --git a/BiSTNet-NTIRE2023/tensorboardX/crc32c.py b/BiSTNet-NTIRE2023/tensorboardX/crc32c.py
new file mode 100644
index 0000000..62c9e0c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/crc32c.py
@@ -0,0 +1,123 @@
+import array
+
+
+CRC_TABLE = (
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
+    0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+    0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+    0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
+    0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
+    0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+    0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+    0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
+    0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
+    0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+    0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+    0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
+    0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
+    0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+    0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+    0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
+    0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
+    0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+    0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+    0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
+    0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
+    0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+    0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+    0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
+    0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
+    0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+    0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+    0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
+    0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
+    0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+    0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+    0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
+    0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
+    0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+    0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+    0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
+    0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
+    0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+    0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+    0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
+    0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
+    0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+    0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+)
+
+
+CRC_INIT = 0
+
+_MASK = 0xFFFFFFFF
+
+
+def crc_update(crc, data):
+    """Update CRC-32C checksum with data.
+
+    Args:
+      crc: 32-bit checksum to update as long.
+      data: byte array, string or iterable over bytes.
+
+    Returns:
+      32-bit updated CRC-32C as long.
+    """
+
+    if type(data) != array.array or data.itemsize != 1:
+        buf = array.array("B", data)
+    else:
+        buf = data
+
+    crc ^= _MASK
+    for b in buf:
+        table_index = (crc ^ b) & 0xff
+        crc = (CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK
+    return crc ^ _MASK
+
+
+def crc_finalize(crc):
+    """Finalize CRC-32C checksum.
+
+    This function should be called as last step of crc calculation.
+
+    Args:
+      crc: 32-bit checksum as long.
+
+    Returns:
+      finalized 32-bit checksum as long
+    """
+    return crc & _MASK
+
+
+def crc32c(data):
+    """Compute CRC-32C checksum of the data.
+
+    Args:
+      data: byte array, string or iterable over bytes.
+
+    Returns:
+      32-bit CRC-32C checksum of data as long.
+    """
+    return crc_finalize(crc_update(CRC_INIT, data))
diff --git a/BiSTNet-NTIRE2023/tensorboardX/embedding.py b/BiSTNet-NTIRE2023/tensorboardX/embedding.py
new file mode 100644
index 0000000..4e1935f
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/embedding.py
@@ -0,0 +1,49 @@
+import os
+
+
+def make_tsv(metadata, save_path):
+    metadata = [str(x) for x in metadata]
+    with open(os.path.join(save_path, 'metadata.tsv'), 'w') as f:
+        for x in metadata:
+            f.write(x + '\n')
+
+
+# https://github.com/tensorflow/tensorboard/issues/44 image label will be squared
+def make_sprite(label_img, save_path):
+    import math
+    import torch
+    import torchvision
+    from .x2num import makenp
+    # this ensures the sprite image has correct dimension as described in
+    # https://www.tensorflow.org/get_started/embedding_viz
+    nrow = int(math.ceil((label_img.size(0)) ** 0.5))
+
+    label_img = torch.from_numpy(makenp(label_img))  # for other framework
+    # augment images so that #images equals nrow*nrow
+    label_img = torch.cat((label_img, torch.randn(nrow ** 2 - label_img.size(0), *label_img.size()[1:]) * 255), 0)
+
+    torchvision.utils.save_image(label_img, os.path.join(save_path, 'sprite.png'), nrow=nrow, padding=0)
+
+
+def append_pbtxt(metadata, label_img, save_path, global_step, tag):
+    with open(os.path.join(save_path, 'projector_config.pbtxt'), 'a') as f:
+        # step = os.path.split(save_path)[-1]
+        f.write('embeddings {\n')
+        f.write('tensor_name: "{}:{}"\n'.format(tag, global_step))
+        f.write('tensor_path: "{}"\n'.format(os.path.join(global_step, 'tensors.tsv')))
+        if metadata is not None:
+            f.write('metadata_path: "{}"\n'.format(os.path.join(global_step, 'metadata.tsv')))
+        if label_img is not None:
+            f.write('sprite {\n')
+            f.write('image_path: "{}"\n'.format(os.path.join(global_step, 'sprite.png')))
+            f.write('single_image_dim: {}\n'.format(label_img.size(3)))
+            f.write('single_image_dim: {}\n'.format(label_img.size(2)))
+            f.write('}\n')
+        f.write('}\n')
+
+
+def make_mat(matlist, save_path):
+    with open(os.path.join(save_path, 'tensors.tsv'), 'w') as f:
+        for x in matlist:
+            x = [str(i) for i in x]
+            f.write('\t'.join(x) + '\n')
diff --git a/BiSTNet-NTIRE2023/tensorboardX/event_file_writer.py b/BiSTNet-NTIRE2023/tensorboardX/event_file_writer.py
new file mode 100644
index 0000000..0f99838
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/event_file_writer.py
@@ -0,0 +1,196 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Writes events to disk in a logdir."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os.path
+import socket
+import threading
+import time
+
+import six
+
+from .src import event_pb2
+from .record_writer import RecordWriter
+
+
+def directory_check(path):
+    '''Initialize the directory for log files.'''
+    # If the direcotry does not exist, create it!
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+
+class EventsWriter(object):
+    '''Writes `Event` protocol buffers to an event file.'''
+
+    def __init__(self, file_prefix):
+        '''
+        Events files have a name of the form
+        '/some/file/path/events.out.tfevents.[timestamp].[hostname]'
+        '''
+        self._file_prefix = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." + socket.gethostname()
+
+        # Open(Create) the log file with the particular form of name.
+        logging.basicConfig(filename=self._file_prefix)
+
+        self._num_outstanding_events = 0
+
+        self._py_recordio_writer = RecordWriter(self._file_prefix)
+
+        # Initialize an event instance.
+        self._event = event_pb2.Event()
+
+        self._event.wall_time = time.time()
+
+        self.write_event(self._event)
+
+    def write_event(self, event):
+        '''Append "event" to the file.'''
+
+        # Check if event is of type event_pb2.Event proto.
+        if not isinstance(event, event_pb2.Event):
+            raise TypeError("Expected an event_pb2.Event proto, "
+                            " but got %s" % type(event))
+        return self._write_serialized_event(event.SerializeToString())
+
+    def _write_serialized_event(self, event_str):
+        self._num_outstanding_events += 1
+        self._py_recordio_writer.write(event_str)
+
+    def flush(self):
+        '''Flushes the event file to disk.'''
+        self._num_outstanding_events = 0
+        return True
+
+    def close(self):
+        '''Call self.flush().'''
+        return_value = self.flush()
+        return return_value
+
+
+class EventFileWriter(object):
+    """Writes `Event` protocol buffers to an event file.
+    The `EventFileWriter` class creates an event file in the specified directory,
+    and asynchronously writes Event protocol buffers to the file. The Event file
+    is encoded using the tfrecord format, which is similar to RecordIO.
+    @@__init__
+    @@add_event
+    @@flush
+    @@close
+    """
+
+    def __init__(self, logdir, max_queue=10, flush_secs=120):
+        """Creates a `EventFileWriter` and an event file to write to.
+        On construction the summary writer creates a new event file in `logdir`.
+        This event file will contain `Event` protocol buffers, which are written to
+        disk via the add_event method.
+        The other arguments to the constructor control the asynchronous writes to
+        the event file:
+        *  `flush_secs`: How often, in seconds, to flush the added summaries
+           and events to disk.
+        *  `max_queue`: Maximum number of summaries or events pending to be
+           written to disk before one of the 'add' calls block.
+        Args:
+          logdir: A string. Directory where event file will be written.
+          max_queue: Integer. Size of the queue for pending events and summaries.
+          flush_secs: Number. How often, in seconds, to flush the
+            pending events and summaries to disk.
+        """
+        self._logdir = logdir
+        directory_check(self._logdir)
+        self._event_queue = six.moves.queue.Queue(max_queue)
+        self._ev_writer = EventsWriter(os.path.join(self._logdir, "events"))
+        self._closed = False
+        self._worker = _EventLoggerThread(self._event_queue, self._ev_writer,
+                                          flush_secs)
+
+        self._worker.start()
+
+    def get_logdir(self):
+        """Returns the directory where event file will be written."""
+        return self._logdir
+
+    def reopen(self):
+        """Reopens the EventFileWriter.
+        Can be called after `close()` to add more events in the same directory.
+        The events will go into a new events file.
+        Does nothing if the EventFileWriter was not closed.
+        """
+        if self._closed:
+            self._closed = False
+
+    def add_event(self, event):
+        """Adds an event to the event file.
+        Args:
+          event: An `Event` protocol buffer.
+        """
+        if not self._closed:
+            self._event_queue.put(event)
+
+    def flush(self):
+        """Flushes the event file to disk.
+        Call this method to make sure that all pending events have been written to
+        disk.
+        """
+        self._event_queue.join()
+        self._ev_writer.flush()
+
+    def close(self):
+        """Flushes the event file to disk and close the file.
+        Call this method when you do not need the summary writer anymore.
+        """
+        self.flush()
+        self._ev_writer.close()
+        self._closed = True
+
+
+class _EventLoggerThread(threading.Thread):
+    """Thread that logs events."""
+
+    def __init__(self, queue, ev_writer, flush_secs):
+        """Creates an _EventLoggerThread.
+        Args:
+          queue: A Queue from which to dequeue events.
+          ev_writer: An event writer. Used to log brain events for
+           the visualizer.
+          flush_secs: How often, in seconds, to flush the
+            pending file to disk.
+        """
+        threading.Thread.__init__(self)
+        self.daemon = True
+        self._queue = queue
+        self._ev_writer = ev_writer
+        self._flush_secs = flush_secs
+        # The first event will be flushed immediately.
+        self._next_event_flush_time = 0
+
+    def run(self):
+        while True:
+            event = self._queue.get()
+            try:
+                self._ev_writer.write_event(event)
+                # Flush the event writer every so often.
+                now = time.time()
+                if now > self._next_event_flush_time:
+                    self._ev_writer.flush()
+                    # Do it again in two minutes.
+                    self._next_event_flush_time = now + self._flush_secs
+            finally:
+                self._queue.task_done()
diff --git a/BiSTNet-NTIRE2023/tensorboardX/graph.py b/BiSTNet-NTIRE2023/tensorboardX/graph.py
new file mode 100644
index 0000000..8135fd3
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/graph.py
@@ -0,0 +1,66 @@
+from .src.graph_pb2 import GraphDef
+from .src.node_def_pb2 import NodeDef
+from .src.versions_pb2 import VersionDef
+from .src.attr_value_pb2 import AttrValue
+from .src.tensor_shape_pb2 import TensorShapeProto
+
+from distutils.version import LooseVersion
+
+
+def replace(name, scope):
+    return '/'.join([scope[name], name])
+
+
+def parse(graph):
+    import torch
+    scope = {}
+    for n in graph.nodes():
+        inputs = [i.uniqueName() for i in n.inputs()]
+        for i in range(1, len(inputs)):
+            scope[inputs[i]] = n.scopeName()
+
+        uname = next(iter(n.outputs())).uniqueName()
+        assert n.scopeName() != '', '{} has empty scope name'.format(n)
+        scope[uname] = n.scopeName()
+    if LooseVersion(torch.__version__) >= LooseVersion("0.4"):
+        scope['0'] = 'input'
+    else:
+        scope['1'] = 'input'
+
+    nodes = []
+    for n in graph.nodes():
+        attrs = {k: n[k] for k in n.attributeNames()}
+        attrs = str(attrs).replace("'", ' ')  # singlequote will be escaped by tensorboard
+        if any(i.uniqueName() not in scope.keys() for i in n.inputs()):  # 0.3.1 workaround
+            continue
+        inputs = [replace(i.uniqueName(), scope) for i in n.inputs()]
+        uname = next(iter(n.outputs())).uniqueName()  # FIXME: only first output is considered
+        nodes.append({'name': replace(uname, scope), 'op': n.kind(), 'inputs': inputs, 'attr': attrs})
+
+    for n in graph.inputs():
+        uname = n.uniqueName()
+        if uname not in scope.keys():
+            scope[uname] = 'unused'
+        nodes.append({'name': replace(uname, scope), 'op': 'Parameter', 'inputs': [], 'attr': str(n.type())})
+
+    return nodes
+
+
+def graph(model, args, verbose=False):
+    import torch
+    with torch.onnx.set_training(model, False):
+        trace, _ = torch.jit.trace(model, args)
+    if LooseVersion(torch.__version__) >= LooseVersion("0.4"):
+        torch.onnx._optimize_trace(trace, False)
+    else:
+        torch.onnx._optimize_trace(trace)
+    graph = trace.graph()
+    if verbose:
+        print(graph)
+    list_of_nodes = parse(graph)
+    nodes = []
+    for node in list_of_nodes:
+        nodes.append(
+            NodeDef(name=node['name'], op=node['op'], input=node['inputs'],
+                    attr={'lanpa': AttrValue(s=node['attr'].encode(encoding='utf_8'))}))
+    return GraphDef(node=nodes, versions=VersionDef(producer=22))
diff --git a/BiSTNet-NTIRE2023/tensorboardX/graph_onnx.py b/BiSTNet-NTIRE2023/tensorboardX/graph_onnx.py
new file mode 100644
index 0000000..96a3534
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/graph_onnx.py
@@ -0,0 +1,158 @@
+from .src.graph_pb2 import GraphDef
+from .src.node_def_pb2 import NodeDef
+from .src.versions_pb2 import VersionDef
+from .src.attr_value_pb2 import AttrValue
+from .src.tensor_shape_pb2 import TensorShapeProto
+# from .src.onnx_pb2 import ModelProto
+
+
+def gg(fname):
+    import onnx  # 0.2.1
+    m = onnx.load(fname)
+    nodes_proto = []
+    nodes = []
+    g = m.graph
+    import itertools
+    for node in itertools.chain(g.input, g.output):
+        nodes_proto.append(node)
+
+    for node in nodes_proto:
+        shapeproto = TensorShapeProto(
+            dim=[TensorShapeProto.Dim(size=d.dim_value) for d in node.type.tensor_type.shape.dim])
+        nodes.append(NodeDef(
+            name=node.name,
+            op='Variable',
+            input=[],
+            attr={
+                'dtype': AttrValue(type=node.type.tensor_type.elem_type),
+                'shape': AttrValue(shape=shapeproto),
+            })
+        )
+
+    for node in g.node:
+        attr = []
+        for s in node.attribute:
+            attr.append(' = '.join([str(f[1]) for f in s.ListFields()]))
+        attr = ', '.join(attr).encode(encoding='utf_8')
+
+        nodes.append(NodeDef(
+            name=node.output[0],
+            op=node.op_type,
+            input=node.input,
+            attr={'parameters': AttrValue(s=attr)},
+        ))
+    # two pass token replacement, appends opname to object id
+    mapping = {}
+    for node in nodes:
+        mapping[node.name] = node.op + '_' + node.name
+
+    nodes, mapping = updatenodes(nodes, mapping)
+    mapping = smartGrouping(nodes, mapping)
+    nodes, mapping = updatenodes(nodes, mapping)
+
+    return GraphDef(node=nodes, versions=VersionDef(producer=22))
+
+
+def updatenodes(nodes, mapping):
+    for node in nodes:
+        newname = mapping[node.name]
+        node.name = newname
+        newinput = []
+        for inputnode in list(node.input):
+            newinput.append(mapping[inputnode])
+            node.input.remove(inputnode)
+        node.input.extend(newinput)
+    newmap = {}
+    for k, v in mapping.items():
+        newmap[v] = v
+    return nodes, newmap
+
+
+def findnode(nodes, name):
+    """ input: node name
+        returns: node object
+    """
+    for n in nodes:
+        if n.name == name:
+            return n
+
+
+def parser(s, nodes, node):
+    print(s)
+    if len(s) == 0:
+        return
+    if len(s) > 0:
+        if s[0] == node.op:
+            print(s[0], node.name, s[1], node.input)
+            for n in node.input:
+                print(n, s[1])
+                parser(s[1], nodes, findnode(nodes, n))
+        else:
+            return False
+
+
+# TODO: use recursive parse
+
+def smartGrouping(nodes, mapping):
+    # a Fully Conv is: (TODO: check var1.size(0)==var2.size(0))
+    # GEMM <-- Variable (c1)
+    #  ^-- Transpose (c2) <-- Variable (c3)
+
+    # a Conv with bias is: (TODO: check var1.size(0)==var2.size(0))
+    # Add <-- Conv (c2) <-- Variable (c3)
+    #  ^-- Variable (c1)
+    #
+    # gemm = ('Gemm', ('Variable', ('Transpose', ('Variable'))))
+
+    FCcounter = 1
+    Convcounter = 1
+    for node in nodes:
+        if node.op == 'Gemm':
+            c1 = c2 = c3 = False
+            for name_in in node.input:
+                n = findnode(nodes, name_in)
+                if n.op == 'Variable':
+                    c1 = True
+                    c1name = n.name
+                if n.op == 'Transpose':
+                    c2 = True
+                    c2name = n.name
+                    if len(n.input) == 1:
+                        nn = findnode(nodes, n.input[0])
+                        if nn.op == 'Variable':
+                            c3 = True
+                            c3name = nn.name
+                # print(n.op, n.name, c1, c2, c3)
+            if c1 and c2 and c3:
+                # print(c1name, c2name, c3name)
+                mapping[c1name] = 'FC{}/{}'.format(FCcounter, c1name)
+                mapping[c2name] = 'FC{}/{}'.format(FCcounter, c2name)
+                mapping[c3name] = 'FC{}/{}'.format(FCcounter, c3name)
+                mapping[node.name] = 'FC{}/{}'.format(FCcounter, node.name)
+                FCcounter += 1
+                continue
+        if node.op == 'Add':
+            c1 = c2 = c3 = False
+            for name_in in node.input:
+                n = findnode(nodes, name_in)
+                if n.op == 'Variable':
+                    c1 = True
+                    c1name = n.name
+                if n.op == 'Conv':
+                    c2 = True
+                    c2name = n.name
+                    if len(n.input) >= 1:
+                        for nn_name in n.input:
+                            nn = findnode(nodes, nn_name)
+                            if nn.op == 'Variable':
+                                c3 = True
+                                c3name = nn.name
+
+            if c1 and c2 and c3:
+                # print(c1name, c2name, c3name)
+                mapping[c1name] = 'Conv{}/{}'.format(Convcounter, c1name)
+                mapping[c2name] = 'Conv{}/{}'.format(Convcounter, c2name)
+                mapping[c3name] = 'Conv{}/{}'.format(Convcounter, c3name)
+                mapping[node.name] = 'Conv{}/{}'.format(Convcounter, node.name)
+                Convcounter += 1
+    return mapping
diff --git a/BiSTNet-NTIRE2023/tensorboardX/record_writer.py b/BiSTNet-NTIRE2023/tensorboardX/record_writer.py
new file mode 100644
index 0000000..c14bd7b
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/record_writer.py
@@ -0,0 +1,47 @@
+"""
+To write tf_record into file. Here we use it for tensorboard's event writting.
+The code was borrow from https://github.com/TeamHG-Memex/tensorboard_logger
+"""
+
+import re
+import struct
+
+from .crc32c import crc32c
+
+_VALID_OP_NAME_START = re.compile('^[A-Za-z0-9.]')
+_VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+')
+
+
+class RecordWriter(object):
+    def __init__(self, path, flush_secs=2):
+        self._name_to_tf_name = {}
+        self._tf_names = set()
+        self.path = path
+        self.flush_secs = flush_secs  # TODO. flush every flush_secs, not every time.
+        self._writer = None
+        self._writer = open(path, 'wb')
+
+    def write(self, event_str):
+        w = self._writer.write
+        header = struct.pack('Q', len(event_str))
+        w(header)
+        w(struct.pack('I', masked_crc32c(header)))
+        w(event_str)
+        w(struct.pack('I', masked_crc32c(event_str)))
+        self._writer.flush()
+
+
+def masked_crc32c(data):
+    x = u32(crc32c(data))
+    return u32(((x >> 15) | u32(x << 17)) + 0xa282ead8)
+
+
+def u32(x):
+    return x & 0xffffffff
+
+
+def make_valid_tf_name(name):
+    if not _VALID_OP_NAME_START.match(name):
+        # Must make it valid somehow, but don't want to remove stuff
+        name = '.' + name
+    return '_'.join(_VALID_OP_NAME_PART.findall(name))
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/__init__.py b/BiSTNet-NTIRE2023/tensorboardX/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/attr_value_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/attr_value_pb2.py
new file mode 100644
index 0000000..95fae04
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/attr_value_pb2.py
@@ -0,0 +1,372 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/attr_value.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import tensor_pb2 as tensorboardX_dot_src_dot_tensor__pb2
+from tensorboardX.src import tensor_shape_pb2 as tensorboardX_dot_src_dot_tensor__shape__pb2
+from tensorboardX.src import types_pb2 as tensorboardX_dot_src_dot_types__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/attr_value.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n!tensorboardX/src/attr_value.proto\x12\x0btensorboard\x1a\x1dtensorboardX/src/tensor.proto\x1a#tensorboardX/src/tensor_shape.proto\x1a\x1ctensorboardX/src/types.proto\"\xaf\x04\n\tAttrValue\x12\x0b\n\x01s\x18\x02 \x01(\x0cH\x00\x12\x0b\n\x01i\x18\x03 \x01(\x03H\x00\x12\x0b\n\x01\x66\x18\x04 \x01(\x02H\x00\x12\x0b\n\x01\x62\x18\x05 \x01(\x08H\x00\x12%\n\x04type\x18\x06 \x01(\x0e\x32\x15.tensorboard.DataTypeH\x00\x12.\n\x05shape\x18\x07 \x01(\x0b\x32\x1d.tensorboard.TensorShapeProtoH\x00\x12*\n\x06tensor\x18\x08 \x01(\x0b\x32\x18.tensorboard.TensorProtoH\x00\x12\x30\n\x04list\x18\x01 \x01(\x0b\x32 .tensorboard.AttrValue.ListValueH\x00\x12)\n\x04\x66unc\x18\n \x01(\x0b\x32\x19.tensorboard.NameAttrListH\x00\x12\x15\n\x0bplaceholder\x18\t \x01(\tH\x00\x1a\xed\x01\n\tListValue\x12\t\n\x01s\x18\x02 \x03(\x0c\x12\r\n\x01i\x18\x03 \x03(\x03\x42\x02\x10\x01\x12\r\n\x01\x66\x18\x04 \x03(\x02\x42\x02\x10\x01\x12\r\n\x01\x62\x18\x05 \x03(\x08\x42\x02\x10\x01\x12\'\n\x04type\x18\x06 \x03(\x0e\x32\x15.tensorboard.DataTypeB\x02\x10\x01\x12,\n\x05shape\x18\x07 \x03(\x0b\x32\x1d.tensorboard.TensorShapeProto\x12(\n\x06tensor\x18\x08 \x03(\x0b\x32\x18.tensorboard.TensorProto\x12\'\n\x04\x66unc\x18\t \x03(\x0b\x32\x19.tensorboard.NameAttrListB\x07\n\x05value\"\x94\x01\n\x0cNameAttrList\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x04\x61ttr\x18\x02 \x03(\x0b\x32#.tensorboard.NameAttrList.AttrEntry\x1a\x43\n\tAttrEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.tensorboard.AttrValue:\x02\x38\x01\x42\x30\n\x18org.tensorflow.frameworkB\x0f\x41ttrValueProtosP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_tensor__pb2.DESCRIPTOR,tensorboardX_dot_src_dot_tensor__shape__pb2.DESCRIPTOR,tensorboardX_dot_src_dot_types__pb2.DESCRIPTOR,])
+
+
+
+
+_ATTRVALUE_LISTVALUE = _descriptor.Descriptor(
+  name='ListValue',
+  full_name='tensorboard.AttrValue.ListValue',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='s', full_name='tensorboard.AttrValue.ListValue.s', index=0,
+      number=2, type=12, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='i', full_name='tensorboard.AttrValue.ListValue.i', index=1,
+      number=3, type=3, cpp_type=2, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='f', full_name='tensorboard.AttrValue.ListValue.f', index=2,
+      number=4, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='b', full_name='tensorboard.AttrValue.ListValue.b', index=3,
+      number=5, type=8, cpp_type=7, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='type', full_name='tensorboard.AttrValue.ListValue.type', index=4,
+      number=6, type=14, cpp_type=8, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='shape', full_name='tensorboard.AttrValue.ListValue.shape', index=5,
+      number=7, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tensor', full_name='tensorboard.AttrValue.ListValue.tensor', index=6,
+      number=8, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='func', full_name='tensorboard.AttrValue.ListValue.func', index=7,
+      number=9, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=462,
+  serialized_end=699,
+)
+
+_ATTRVALUE = _descriptor.Descriptor(
+  name='AttrValue',
+  full_name='tensorboard.AttrValue',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='s', full_name='tensorboard.AttrValue.s', index=0,
+      number=2, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='i', full_name='tensorboard.AttrValue.i', index=1,
+      number=3, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='f', full_name='tensorboard.AttrValue.f', index=2,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='b', full_name='tensorboard.AttrValue.b', index=3,
+      number=5, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='type', full_name='tensorboard.AttrValue.type', index=4,
+      number=6, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='shape', full_name='tensorboard.AttrValue.shape', index=5,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tensor', full_name='tensorboard.AttrValue.tensor', index=6,
+      number=8, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='list', full_name='tensorboard.AttrValue.list', index=7,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='func', full_name='tensorboard.AttrValue.func', index=8,
+      number=10, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='placeholder', full_name='tensorboard.AttrValue.placeholder', index=9,
+      number=9, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_ATTRVALUE_LISTVALUE, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='value', full_name='tensorboard.AttrValue.value',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=149,
+  serialized_end=708,
+)
+
+
+_NAMEATTRLIST_ATTRENTRY = _descriptor.Descriptor(
+  name='AttrEntry',
+  full_name='tensorboard.NameAttrList.AttrEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='tensorboard.NameAttrList.AttrEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='tensorboard.NameAttrList.AttrEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=792,
+  serialized_end=859,
+)
+
+_NAMEATTRLIST = _descriptor.Descriptor(
+  name='NameAttrList',
+  full_name='tensorboard.NameAttrList',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='name', full_name='tensorboard.NameAttrList.name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='attr', full_name='tensorboard.NameAttrList.attr', index=1,
+      number=2, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_NAMEATTRLIST_ATTRENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=711,
+  serialized_end=859,
+)
+
+_ATTRVALUE_LISTVALUE.fields_by_name['type'].enum_type = tensorboardX_dot_src_dot_types__pb2._DATATYPE
+_ATTRVALUE_LISTVALUE.fields_by_name['shape'].message_type = tensorboardX_dot_src_dot_tensor__shape__pb2._TENSORSHAPEPROTO
+_ATTRVALUE_LISTVALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_src_dot_tensor__pb2._TENSORPROTO
+_ATTRVALUE_LISTVALUE.fields_by_name['func'].message_type = _NAMEATTRLIST
+_ATTRVALUE_LISTVALUE.containing_type = _ATTRVALUE
+_ATTRVALUE.fields_by_name['type'].enum_type = tensorboardX_dot_src_dot_types__pb2._DATATYPE
+_ATTRVALUE.fields_by_name['shape'].message_type = tensorboardX_dot_src_dot_tensor__shape__pb2._TENSORSHAPEPROTO
+_ATTRVALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_src_dot_tensor__pb2._TENSORPROTO
+_ATTRVALUE.fields_by_name['list'].message_type = _ATTRVALUE_LISTVALUE
+_ATTRVALUE.fields_by_name['func'].message_type = _NAMEATTRLIST
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['s'])
+_ATTRVALUE.fields_by_name['s'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['i'])
+_ATTRVALUE.fields_by_name['i'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['f'])
+_ATTRVALUE.fields_by_name['f'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['b'])
+_ATTRVALUE.fields_by_name['b'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['type'])
+_ATTRVALUE.fields_by_name['type'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['shape'])
+_ATTRVALUE.fields_by_name['shape'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['tensor'])
+_ATTRVALUE.fields_by_name['tensor'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['list'])
+_ATTRVALUE.fields_by_name['list'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['func'])
+_ATTRVALUE.fields_by_name['func'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_ATTRVALUE.oneofs_by_name['value'].fields.append(
+  _ATTRVALUE.fields_by_name['placeholder'])
+_ATTRVALUE.fields_by_name['placeholder'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
+_NAMEATTRLIST_ATTRENTRY.fields_by_name['value'].message_type = _ATTRVALUE
+_NAMEATTRLIST_ATTRENTRY.containing_type = _NAMEATTRLIST
+_NAMEATTRLIST.fields_by_name['attr'].message_type = _NAMEATTRLIST_ATTRENTRY
+DESCRIPTOR.message_types_by_name['AttrValue'] = _ATTRVALUE
+DESCRIPTOR.message_types_by_name['NameAttrList'] = _NAMEATTRLIST
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+AttrValue = _reflection.GeneratedProtocolMessageType('AttrValue', (_message.Message,), dict(
+
+  ListValue = _reflection.GeneratedProtocolMessageType('ListValue', (_message.Message,), dict(
+    DESCRIPTOR = _ATTRVALUE_LISTVALUE,
+    __module__ = 'tensorboardX.src.attr_value_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.AttrValue.ListValue)
+    ))
+  ,
+  DESCRIPTOR = _ATTRVALUE,
+  __module__ = 'tensorboardX.src.attr_value_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.AttrValue)
+  ))
+_sym_db.RegisterMessage(AttrValue)
+_sym_db.RegisterMessage(AttrValue.ListValue)
+
+NameAttrList = _reflection.GeneratedProtocolMessageType('NameAttrList', (_message.Message,), dict(
+
+  AttrEntry = _reflection.GeneratedProtocolMessageType('AttrEntry', (_message.Message,), dict(
+    DESCRIPTOR = _NAMEATTRLIST_ATTRENTRY,
+    __module__ = 'tensorboardX.src.attr_value_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.NameAttrList.AttrEntry)
+    ))
+  ,
+  DESCRIPTOR = _NAMEATTRLIST,
+  __module__ = 'tensorboardX.src.attr_value_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.NameAttrList)
+  ))
+_sym_db.RegisterMessage(NameAttrList)
+_sym_db.RegisterMessage(NameAttrList.AttrEntry)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\017AttrValueProtosP\001\370\001\001'))
+_ATTRVALUE_LISTVALUE.fields_by_name['i'].has_options = True
+_ATTRVALUE_LISTVALUE.fields_by_name['i']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_ATTRVALUE_LISTVALUE.fields_by_name['f'].has_options = True
+_ATTRVALUE_LISTVALUE.fields_by_name['f']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_ATTRVALUE_LISTVALUE.fields_by_name['b'].has_options = True
+_ATTRVALUE_LISTVALUE.fields_by_name['b']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_ATTRVALUE_LISTVALUE.fields_by_name['type'].has_options = True
+_ATTRVALUE_LISTVALUE.fields_by_name['type']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_NAMEATTRLIST_ATTRENTRY.has_options = True
+_NAMEATTRLIST_ATTRENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/event_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/event_pb2.py
new file mode 100644
index 0000000..459dfbd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/event_pb2.py
@@ -0,0 +1,376 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/event.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import summary_pb2 as tensorboardX_dot_src_dot_summary__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/event.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1ctensorboardX/src/event.proto\x12\x0btensorboard\x1a\x1etensorboardX/src/summary.proto\"\xbf\x02\n\x05\x45vent\x12\x11\n\twall_time\x18\x01 \x01(\x01\x12\x0c\n\x04step\x18\x02 \x01(\x03\x12\x16\n\x0c\x66ile_version\x18\x03 \x01(\tH\x00\x12\x13\n\tgraph_def\x18\x04 \x01(\x0cH\x00\x12\'\n\x07summary\x18\x05 \x01(\x0b\x32\x14.tensorboard.SummaryH\x00\x12.\n\x0blog_message\x18\x06 \x01(\x0b\x32\x17.tensorboard.LogMessageH\x00\x12.\n\x0bsession_log\x18\x07 \x01(\x0b\x32\x17.tensorboard.SessionLogH\x00\x12=\n\x13tagged_run_metadata\x18\x08 \x01(\x0b\x32\x1e.tensorboard.TaggedRunMetadataH\x00\x12\x18\n\x0emeta_graph_def\x18\t \x01(\x0cH\x00\x42\x06\n\x04what\"\x96\x01\n\nLogMessage\x12,\n\x05level\x18\x01 \x01(\x0e\x32\x1d.tensorboard.LogMessage.Level\x12\x0f\n\x07message\x18\x02 \x01(\t\"I\n\x05Level\x12\x0b\n\x07UNKNOWN\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\n\x12\x08\n\x04INFO\x10\x14\x12\x08\n\x04WARN\x10\x1e\x12\t\n\x05\x45RROR\x10(\x12\t\n\x05\x46\x41TAL\x10\x32\"\xb7\x01\n\nSessionLog\x12\x35\n\x06status\x18\x01 \x01(\x0e\x32%.tensorboard.SessionLog.SessionStatus\x12\x17\n\x0f\x63heckpoint_path\x18\x02 \x01(\t\x12\x0b\n\x03msg\x18\x03 \x01(\t\"L\n\rSessionStatus\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\t\n\x05START\x10\x01\x12\x08\n\x04STOP\x10\x02\x12\x0e\n\nCHECKPOINT\x10\x03\"6\n\x11TaggedRunMetadata\x12\x0b\n\x03tag\x18\x01 \x01(\t\x12\x14\n\x0crun_metadata\x18\x02 \x01(\x0c\x42\'\n\x13org.tensorflow.utilB\x0b\x45ventProtosP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_summary__pb2.DESCRIPTOR,])
+
+
+
+_LOGMESSAGE_LEVEL = _descriptor.EnumDescriptor(
+  name='Level',
+  full_name='tensorboard.LogMessage.Level',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='UNKNOWN', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DEBUG', index=1, number=10,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='INFO', index=2, number=20,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='WARN', index=3, number=30,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='ERROR', index=4, number=40,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='FATAL', index=5, number=50,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=477,
+  serialized_end=550,
+)
+_sym_db.RegisterEnumDescriptor(_LOGMESSAGE_LEVEL)
+
+_SESSIONLOG_SESSIONSTATUS = _descriptor.EnumDescriptor(
+  name='SessionStatus',
+  full_name='tensorboard.SessionLog.SessionStatus',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='STATUS_UNSPECIFIED', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='START', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='STOP', index=2, number=2,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='CHECKPOINT', index=3, number=3,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=660,
+  serialized_end=736,
+)
+_sym_db.RegisterEnumDescriptor(_SESSIONLOG_SESSIONSTATUS)
+
+
+_EVENT = _descriptor.Descriptor(
+  name='Event',
+  full_name='tensorboard.Event',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='wall_time', full_name='tensorboard.Event.wall_time', index=0,
+      number=1, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='step', full_name='tensorboard.Event.step', index=1,
+      number=2, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='file_version', full_name='tensorboard.Event.file_version', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='graph_def', full_name='tensorboard.Event.graph_def', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='summary', full_name='tensorboard.Event.summary', index=4,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='log_message', full_name='tensorboard.Event.log_message', index=5,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='session_log', full_name='tensorboard.Event.session_log', index=6,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tagged_run_metadata', full_name='tensorboard.Event.tagged_run_metadata', index=7,
+      number=8, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='meta_graph_def', full_name='tensorboard.Event.meta_graph_def', index=8,
+      number=9, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='what', full_name='tensorboard.Event.what',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=78,
+  serialized_end=397,
+)
+
+
+_LOGMESSAGE = _descriptor.Descriptor(
+  name='LogMessage',
+  full_name='tensorboard.LogMessage',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='level', full_name='tensorboard.LogMessage.level', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='message', full_name='tensorboard.LogMessage.message', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _LOGMESSAGE_LEVEL,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=400,
+  serialized_end=550,
+)
+
+
+_SESSIONLOG = _descriptor.Descriptor(
+  name='SessionLog',
+  full_name='tensorboard.SessionLog',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='status', full_name='tensorboard.SessionLog.status', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='checkpoint_path', full_name='tensorboard.SessionLog.checkpoint_path', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='msg', full_name='tensorboard.SessionLog.msg', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _SESSIONLOG_SESSIONSTATUS,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=553,
+  serialized_end=736,
+)
+
+
+_TAGGEDRUNMETADATA = _descriptor.Descriptor(
+  name='TaggedRunMetadata',
+  full_name='tensorboard.TaggedRunMetadata',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='tag', full_name='tensorboard.TaggedRunMetadata.tag', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='run_metadata', full_name='tensorboard.TaggedRunMetadata.run_metadata', index=1,
+      number=2, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=738,
+  serialized_end=792,
+)
+
+_EVENT.fields_by_name['summary'].message_type = tensorboardX_dot_src_dot_summary__pb2._SUMMARY
+_EVENT.fields_by_name['log_message'].message_type = _LOGMESSAGE
+_EVENT.fields_by_name['session_log'].message_type = _SESSIONLOG
+_EVENT.fields_by_name['tagged_run_metadata'].message_type = _TAGGEDRUNMETADATA
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['file_version'])
+_EVENT.fields_by_name['file_version'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['graph_def'])
+_EVENT.fields_by_name['graph_def'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['summary'])
+_EVENT.fields_by_name['summary'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['log_message'])
+_EVENT.fields_by_name['log_message'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['session_log'])
+_EVENT.fields_by_name['session_log'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['tagged_run_metadata'])
+_EVENT.fields_by_name['tagged_run_metadata'].containing_oneof = _EVENT.oneofs_by_name['what']
+_EVENT.oneofs_by_name['what'].fields.append(
+  _EVENT.fields_by_name['meta_graph_def'])
+_EVENT.fields_by_name['meta_graph_def'].containing_oneof = _EVENT.oneofs_by_name['what']
+_LOGMESSAGE.fields_by_name['level'].enum_type = _LOGMESSAGE_LEVEL
+_LOGMESSAGE_LEVEL.containing_type = _LOGMESSAGE
+_SESSIONLOG.fields_by_name['status'].enum_type = _SESSIONLOG_SESSIONSTATUS
+_SESSIONLOG_SESSIONSTATUS.containing_type = _SESSIONLOG
+DESCRIPTOR.message_types_by_name['Event'] = _EVENT
+DESCRIPTOR.message_types_by_name['LogMessage'] = _LOGMESSAGE
+DESCRIPTOR.message_types_by_name['SessionLog'] = _SESSIONLOG
+DESCRIPTOR.message_types_by_name['TaggedRunMetadata'] = _TAGGEDRUNMETADATA
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+Event = _reflection.GeneratedProtocolMessageType('Event', (_message.Message,), dict(
+  DESCRIPTOR = _EVENT,
+  __module__ = 'tensorboardX.src.event_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.Event)
+  ))
+_sym_db.RegisterMessage(Event)
+
+LogMessage = _reflection.GeneratedProtocolMessageType('LogMessage', (_message.Message,), dict(
+  DESCRIPTOR = _LOGMESSAGE,
+  __module__ = 'tensorboardX.src.event_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.LogMessage)
+  ))
+_sym_db.RegisterMessage(LogMessage)
+
+SessionLog = _reflection.GeneratedProtocolMessageType('SessionLog', (_message.Message,), dict(
+  DESCRIPTOR = _SESSIONLOG,
+  __module__ = 'tensorboardX.src.event_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.SessionLog)
+  ))
+_sym_db.RegisterMessage(SessionLog)
+
+TaggedRunMetadata = _reflection.GeneratedProtocolMessageType('TaggedRunMetadata', (_message.Message,), dict(
+  DESCRIPTOR = _TAGGEDRUNMETADATA,
+  __module__ = 'tensorboardX.src.event_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.TaggedRunMetadata)
+  ))
+_sym_db.RegisterMessage(TaggedRunMetadata)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\023org.tensorflow.utilB\013EventProtosP\001\370\001\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/graph_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/graph_pb2.py
new file mode 100644
index 0000000..23dda00
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/graph_pb2.py
@@ -0,0 +1,92 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/graph.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import node_def_pb2 as tensorboardX_dot_src_dot_node__def__pb2
+from tensorboardX.src import versions_pb2 as tensorboardX_dot_src_dot_versions__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/graph.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1ctensorboardX/src/graph.proto\x12\x0btensorboard\x1a\x1ftensorboardX/src/node_def.proto\x1a\x1ftensorboardX/src/versions.proto\"n\n\x08GraphDef\x12\"\n\x04node\x18\x01 \x03(\x0b\x32\x14.tensorboard.NodeDef\x12)\n\x08versions\x18\x04 \x01(\x0b\x32\x17.tensorboard.VersionDef\x12\x13\n\x07version\x18\x03 \x01(\x05\x42\x02\x18\x01\x42,\n\x18org.tensorflow.frameworkB\x0bGraphProtosP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_node__def__pb2.DESCRIPTOR,tensorboardX_dot_src_dot_versions__pb2.DESCRIPTOR,])
+
+
+
+
+_GRAPHDEF = _descriptor.Descriptor(
+  name='GraphDef',
+  full_name='tensorboard.GraphDef',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='node', full_name='tensorboard.GraphDef.node', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='versions', full_name='tensorboard.GraphDef.versions', index=1,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='version', full_name='tensorboard.GraphDef.version', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=111,
+  serialized_end=221,
+)
+
+_GRAPHDEF.fields_by_name['node'].message_type = tensorboardX_dot_src_dot_node__def__pb2._NODEDEF
+_GRAPHDEF.fields_by_name['versions'].message_type = tensorboardX_dot_src_dot_versions__pb2._VERSIONDEF
+DESCRIPTOR.message_types_by_name['GraphDef'] = _GRAPHDEF
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+GraphDef = _reflection.GeneratedProtocolMessageType('GraphDef', (_message.Message,), dict(
+  DESCRIPTOR = _GRAPHDEF,
+  __module__ = 'tensorboardX.src.graph_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.GraphDef)
+  ))
+_sym_db.RegisterMessage(GraphDef)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\013GraphProtosP\001\370\001\001'))
+_GRAPHDEF.fields_by_name['version'].has_options = True
+_GRAPHDEF.fields_by_name['version']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/node_def_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/node_def_pb2.py
new file mode 100644
index 0000000..11ec64e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/node_def_pb2.py
@@ -0,0 +1,151 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/node_def.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import attr_value_pb2 as tensorboardX_dot_src_dot_attr__value__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/node_def.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1ftensorboardX/src/node_def.proto\x12\x0btensorboard\x1a!tensorboardX/src/attr_value.proto\"\xb5\x01\n\x07NodeDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02op\x18\x02 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12\x0e\n\x06\x64\x65vice\x18\x04 \x01(\t\x12,\n\x04\x61ttr\x18\x05 \x03(\x0b\x32\x1e.tensorboard.NodeDef.AttrEntry\x1a\x43\n\tAttrEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.tensorboard.AttrValue:\x02\x38\x01\x42*\n\x18org.tensorflow.frameworkB\tNodeProtoP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_attr__value__pb2.DESCRIPTOR,])
+
+
+
+
+_NODEDEF_ATTRENTRY = _descriptor.Descriptor(
+  name='AttrEntry',
+  full_name='tensorboard.NodeDef.AttrEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='tensorboard.NodeDef.AttrEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='tensorboard.NodeDef.AttrEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=198,
+  serialized_end=265,
+)
+
+_NODEDEF = _descriptor.Descriptor(
+  name='NodeDef',
+  full_name='tensorboard.NodeDef',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='name', full_name='tensorboard.NodeDef.name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='op', full_name='tensorboard.NodeDef.op', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='input', full_name='tensorboard.NodeDef.input', index=2,
+      number=3, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='device', full_name='tensorboard.NodeDef.device', index=3,
+      number=4, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='attr', full_name='tensorboard.NodeDef.attr', index=4,
+      number=5, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_NODEDEF_ATTRENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=84,
+  serialized_end=265,
+)
+
+_NODEDEF_ATTRENTRY.fields_by_name['value'].message_type = tensorboardX_dot_src_dot_attr__value__pb2._ATTRVALUE
+_NODEDEF_ATTRENTRY.containing_type = _NODEDEF
+_NODEDEF.fields_by_name['attr'].message_type = _NODEDEF_ATTRENTRY
+DESCRIPTOR.message_types_by_name['NodeDef'] = _NODEDEF
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+NodeDef = _reflection.GeneratedProtocolMessageType('NodeDef', (_message.Message,), dict(
+
+  AttrEntry = _reflection.GeneratedProtocolMessageType('AttrEntry', (_message.Message,), dict(
+    DESCRIPTOR = _NODEDEF_ATTRENTRY,
+    __module__ = 'tensorboardX.src.node_def_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.NodeDef.AttrEntry)
+    ))
+  ,
+  DESCRIPTOR = _NODEDEF,
+  __module__ = 'tensorboardX.src.node_def_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.NodeDef)
+  ))
+_sym_db.RegisterMessage(NodeDef)
+_sym_db.RegisterMessage(NodeDef.AttrEntry)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\tNodeProtoP\001\370\001\001'))
+_NODEDEF_ATTRENTRY.has_options = True
+_NODEDEF_ATTRENTRY._options = _descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/plugin_pr_curve_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/plugin_pr_curve_pb2.py
new file mode 100644
index 0000000..c219d52
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/plugin_pr_curve_pb2.py
@@ -0,0 +1,76 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/plugin_pr_curve.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/plugin_pr_curve.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n&tensorboardX/src/plugin_pr_curve.proto\x12\x0btensorboard\"<\n\x11PrCurvePluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x16\n\x0enum_thresholds\x18\x02 \x01(\rb\x06proto3')
+)
+
+
+
+
+_PRCURVEPLUGINDATA = _descriptor.Descriptor(
+  name='PrCurvePluginData',
+  full_name='tensorboard.PrCurvePluginData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='version', full_name='tensorboard.PrCurvePluginData.version', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='num_thresholds', full_name='tensorboard.PrCurvePluginData.num_thresholds', index=1,
+      number=2, type=13, cpp_type=3, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=55,
+  serialized_end=115,
+)
+
+DESCRIPTOR.message_types_by_name['PrCurvePluginData'] = _PRCURVEPLUGINDATA
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+PrCurvePluginData = _reflection.GeneratedProtocolMessageType('PrCurvePluginData', (_message.Message,), dict(
+  DESCRIPTOR = _PRCURVEPLUGINDATA,
+  __module__ = 'tensorboardX.src.plugin_pr_curve_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.PrCurvePluginData)
+  ))
+_sym_db.RegisterMessage(PrCurvePluginData)
+
+
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/resource_handle_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/resource_handle_pb2.py
new file mode 100644
index 0000000..276703e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/resource_handle_pb2.py
@@ -0,0 +1,99 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/resource_handle.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/resource_handle.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n&tensorboardX/src/resource_handle.proto\x12\x0btensorboard\"r\n\x13ResourceHandleProto\x12\x0e\n\x06\x64\x65vice\x18\x01 \x01(\t\x12\x11\n\tcontainer\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x11\n\thash_code\x18\x04 \x01(\x04\x12\x17\n\x0fmaybe_type_name\x18\x05 \x01(\tB/\n\x18org.tensorflow.frameworkB\x0eResourceHandleP\x01\xf8\x01\x01\x62\x06proto3')
+)
+
+
+
+
+_RESOURCEHANDLEPROTO = _descriptor.Descriptor(
+  name='ResourceHandleProto',
+  full_name='tensorboard.ResourceHandleProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='device', full_name='tensorboard.ResourceHandleProto.device', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='container', full_name='tensorboard.ResourceHandleProto.container', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='name', full_name='tensorboard.ResourceHandleProto.name', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='hash_code', full_name='tensorboard.ResourceHandleProto.hash_code', index=3,
+      number=4, type=4, cpp_type=4, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='maybe_type_name', full_name='tensorboard.ResourceHandleProto.maybe_type_name', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=55,
+  serialized_end=169,
+)
+
+DESCRIPTOR.message_types_by_name['ResourceHandleProto'] = _RESOURCEHANDLEPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ResourceHandleProto = _reflection.GeneratedProtocolMessageType('ResourceHandleProto', (_message.Message,), dict(
+  DESCRIPTOR = _RESOURCEHANDLEPROTO,
+  __module__ = 'tensorboardX.src.resource_handle_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.ResourceHandleProto)
+  ))
+_sym_db.RegisterMessage(ResourceHandleProto)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\016ResourceHandleP\001\370\001\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/summary_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/summary_pb2.py
new file mode 100644
index 0000000..db24ea8
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/summary_pb2.py
@@ -0,0 +1,532 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/summary.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import tensor_pb2 as tensorboardX_dot_src_dot_tensor__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/summary.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1etensorboardX/src/summary.proto\x12\x0btensorboard\x1a\x1dtensorboardX/src/tensor.proto\"\'\n\x12SummaryDescription\x12\x11\n\ttype_hint\x18\x01 \x01(\t\"\x87\x01\n\x0eHistogramProto\x12\x0b\n\x03min\x18\x01 \x01(\x01\x12\x0b\n\x03max\x18\x02 \x01(\x01\x12\x0b\n\x03num\x18\x03 \x01(\x01\x12\x0b\n\x03sum\x18\x04 \x01(\x01\x12\x13\n\x0bsum_squares\x18\x05 \x01(\x01\x12\x18\n\x0c\x62ucket_limit\x18\x06 \x03(\x01\x42\x02\x10\x01\x12\x12\n\x06\x62ucket\x18\x07 \x03(\x01\x42\x02\x10\x01\"\x83\x01\n\x0fSummaryMetadata\x12<\n\x0bplugin_data\x18\x01 \x03(\x0b\x32\'.tensorboard.SummaryMetadata.PluginData\x1a\x32\n\nPluginData\x12\x13\n\x0bplugin_name\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\"\xe4\x04\n\x07Summary\x12)\n\x05value\x18\x01 \x03(\x0b\x32\x1a.tensorboard.Summary.Value\x1aX\n\x05Image\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x12\n\ncolorspace\x18\x03 \x01(\x05\x12\x1c\n\x14\x65ncoded_image_string\x18\x04 \x01(\x0c\x1a}\n\x05\x41udio\x12\x13\n\x0bsample_rate\x18\x01 \x01(\x02\x12\x14\n\x0cnum_channels\x18\x02 \x01(\x03\x12\x15\n\rlength_frames\x18\x03 \x01(\x03\x12\x1c\n\x14\x65ncoded_audio_string\x18\x04 \x01(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x05 \x01(\t\x1a\xd4\x02\n\x05Value\x12\x11\n\tnode_name\x18\x07 \x01(\t\x12\x0b\n\x03tag\x18\x01 \x01(\t\x12.\n\x08metadata\x18\t \x01(\x0b\x32\x1c.tensorboard.SummaryMetadata\x12\x16\n\x0csimple_value\x18\x02 \x01(\x02H\x00\x12&\n\x1cobsolete_old_style_histogram\x18\x03 \x01(\x0cH\x00\x12+\n\x05image\x18\x04 \x01(\x0b\x32\x1a.tensorboard.Summary.ImageH\x00\x12,\n\x05histo\x18\x05 \x01(\x0b\x32\x1b.tensorboard.HistogramProtoH\x00\x12+\n\x05\x61udio\x18\x06 \x01(\x0b\x32\x1a.tensorboard.Summary.AudioH\x00\x12*\n\x06tensor\x18\x08 \x01(\x0b\x32\x18.tensorboard.TensorProtoH\x00\x42\x07\n\x05valueB.\n\x18org.tensorflow.frameworkB\rSummaryProtosP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_tensor__pb2.DESCRIPTOR,])
+
+
+
+
+_SUMMARYDESCRIPTION = _descriptor.Descriptor(
+  name='SummaryDescription',
+  full_name='tensorboard.SummaryDescription',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='type_hint', full_name='tensorboard.SummaryDescription.type_hint', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=78,
+  serialized_end=117,
+)
+
+
+_HISTOGRAMPROTO = _descriptor.Descriptor(
+  name='HistogramProto',
+  full_name='tensorboard.HistogramProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='min', full_name='tensorboard.HistogramProto.min', index=0,
+      number=1, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='max', full_name='tensorboard.HistogramProto.max', index=1,
+      number=2, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='num', full_name='tensorboard.HistogramProto.num', index=2,
+      number=3, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='sum', full_name='tensorboard.HistogramProto.sum', index=3,
+      number=4, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='sum_squares', full_name='tensorboard.HistogramProto.sum_squares', index=4,
+      number=5, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='bucket_limit', full_name='tensorboard.HistogramProto.bucket_limit', index=5,
+      number=6, type=1, cpp_type=5, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='bucket', full_name='tensorboard.HistogramProto.bucket', index=6,
+      number=7, type=1, cpp_type=5, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=120,
+  serialized_end=255,
+)
+
+
+_SUMMARYMETADATA_PLUGINDATA = _descriptor.Descriptor(
+  name='PluginData',
+  full_name='tensorboard.SummaryMetadata.PluginData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='plugin_name', full_name='tensorboard.SummaryMetadata.PluginData.plugin_name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='content', full_name='tensorboard.SummaryMetadata.PluginData.content', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=339,
+  serialized_end=389,
+)
+
+_SUMMARYMETADATA = _descriptor.Descriptor(
+  name='SummaryMetadata',
+  full_name='tensorboard.SummaryMetadata',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='plugin_data', full_name='tensorboard.SummaryMetadata.plugin_data', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_SUMMARYMETADATA_PLUGINDATA, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=258,
+  serialized_end=389,
+)
+
+
+_SUMMARY_IMAGE = _descriptor.Descriptor(
+  name='Image',
+  full_name='tensorboard.Summary.Image',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='height', full_name='tensorboard.Summary.Image.height', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='width', full_name='tensorboard.Summary.Image.width', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='colorspace', full_name='tensorboard.Summary.Image.colorspace', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='encoded_image_string', full_name='tensorboard.Summary.Image.encoded_image_string', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=446,
+  serialized_end=534,
+)
+
+_SUMMARY_AUDIO = _descriptor.Descriptor(
+  name='Audio',
+  full_name='tensorboard.Summary.Audio',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='sample_rate', full_name='tensorboard.Summary.Audio.sample_rate', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='num_channels', full_name='tensorboard.Summary.Audio.num_channels', index=1,
+      number=2, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='length_frames', full_name='tensorboard.Summary.Audio.length_frames', index=2,
+      number=3, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='encoded_audio_string', full_name='tensorboard.Summary.Audio.encoded_audio_string', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='content_type', full_name='tensorboard.Summary.Audio.content_type', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=536,
+  serialized_end=661,
+)
+
+_SUMMARY_VALUE = _descriptor.Descriptor(
+  name='Value',
+  full_name='tensorboard.Summary.Value',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='node_name', full_name='tensorboard.Summary.Value.node_name', index=0,
+      number=7, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tag', full_name='tensorboard.Summary.Value.tag', index=1,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='metadata', full_name='tensorboard.Summary.Value.metadata', index=2,
+      number=9, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='simple_value', full_name='tensorboard.Summary.Value.simple_value', index=3,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='obsolete_old_style_histogram', full_name='tensorboard.Summary.Value.obsolete_old_style_histogram', index=4,
+      number=3, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='image', full_name='tensorboard.Summary.Value.image', index=5,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='histo', full_name='tensorboard.Summary.Value.histo', index=6,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='audio', full_name='tensorboard.Summary.Value.audio', index=7,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tensor', full_name='tensorboard.Summary.Value.tensor', index=8,
+      number=8, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='value', full_name='tensorboard.Summary.Value.value',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=664,
+  serialized_end=1004,
+)
+
+_SUMMARY = _descriptor.Descriptor(
+  name='Summary',
+  full_name='tensorboard.Summary',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='value', full_name='tensorboard.Summary.value', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_SUMMARY_IMAGE, _SUMMARY_AUDIO, _SUMMARY_VALUE, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=392,
+  serialized_end=1004,
+)
+
+_SUMMARYMETADATA_PLUGINDATA.containing_type = _SUMMARYMETADATA
+_SUMMARYMETADATA.fields_by_name['plugin_data'].message_type = _SUMMARYMETADATA_PLUGINDATA
+_SUMMARY_IMAGE.containing_type = _SUMMARY
+_SUMMARY_AUDIO.containing_type = _SUMMARY
+_SUMMARY_VALUE.fields_by_name['metadata'].message_type = _SUMMARYMETADATA
+_SUMMARY_VALUE.fields_by_name['image'].message_type = _SUMMARY_IMAGE
+_SUMMARY_VALUE.fields_by_name['histo'].message_type = _HISTOGRAMPROTO
+_SUMMARY_VALUE.fields_by_name['audio'].message_type = _SUMMARY_AUDIO
+_SUMMARY_VALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_src_dot_tensor__pb2._TENSORPROTO
+_SUMMARY_VALUE.containing_type = _SUMMARY
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['simple_value'])
+_SUMMARY_VALUE.fields_by_name['simple_value'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['obsolete_old_style_histogram'])
+_SUMMARY_VALUE.fields_by_name['obsolete_old_style_histogram'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['image'])
+_SUMMARY_VALUE.fields_by_name['image'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['histo'])
+_SUMMARY_VALUE.fields_by_name['histo'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['audio'])
+_SUMMARY_VALUE.fields_by_name['audio'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
+  _SUMMARY_VALUE.fields_by_name['tensor'])
+_SUMMARY_VALUE.fields_by_name['tensor'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
+_SUMMARY.fields_by_name['value'].message_type = _SUMMARY_VALUE
+DESCRIPTOR.message_types_by_name['SummaryDescription'] = _SUMMARYDESCRIPTION
+DESCRIPTOR.message_types_by_name['HistogramProto'] = _HISTOGRAMPROTO
+DESCRIPTOR.message_types_by_name['SummaryMetadata'] = _SUMMARYMETADATA
+DESCRIPTOR.message_types_by_name['Summary'] = _SUMMARY
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+SummaryDescription = _reflection.GeneratedProtocolMessageType('SummaryDescription', (_message.Message,), dict(
+  DESCRIPTOR = _SUMMARYDESCRIPTION,
+  __module__ = 'tensorboardX.src.summary_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.SummaryDescription)
+  ))
+_sym_db.RegisterMessage(SummaryDescription)
+
+HistogramProto = _reflection.GeneratedProtocolMessageType('HistogramProto', (_message.Message,), dict(
+  DESCRIPTOR = _HISTOGRAMPROTO,
+  __module__ = 'tensorboardX.src.summary_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.HistogramProto)
+  ))
+_sym_db.RegisterMessage(HistogramProto)
+
+SummaryMetadata = _reflection.GeneratedProtocolMessageType('SummaryMetadata', (_message.Message,), dict(
+
+  PluginData = _reflection.GeneratedProtocolMessageType('PluginData', (_message.Message,), dict(
+    DESCRIPTOR = _SUMMARYMETADATA_PLUGINDATA,
+    __module__ = 'tensorboardX.src.summary_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.SummaryMetadata.PluginData)
+    ))
+  ,
+  DESCRIPTOR = _SUMMARYMETADATA,
+  __module__ = 'tensorboardX.src.summary_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.SummaryMetadata)
+  ))
+_sym_db.RegisterMessage(SummaryMetadata)
+_sym_db.RegisterMessage(SummaryMetadata.PluginData)
+
+Summary = _reflection.GeneratedProtocolMessageType('Summary', (_message.Message,), dict(
+
+  Image = _reflection.GeneratedProtocolMessageType('Image', (_message.Message,), dict(
+    DESCRIPTOR = _SUMMARY_IMAGE,
+    __module__ = 'tensorboardX.src.summary_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.Summary.Image)
+    ))
+  ,
+
+  Audio = _reflection.GeneratedProtocolMessageType('Audio', (_message.Message,), dict(
+    DESCRIPTOR = _SUMMARY_AUDIO,
+    __module__ = 'tensorboardX.src.summary_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.Summary.Audio)
+    ))
+  ,
+
+  Value = _reflection.GeneratedProtocolMessageType('Value', (_message.Message,), dict(
+    DESCRIPTOR = _SUMMARY_VALUE,
+    __module__ = 'tensorboardX.src.summary_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.Summary.Value)
+    ))
+  ,
+  DESCRIPTOR = _SUMMARY,
+  __module__ = 'tensorboardX.src.summary_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.Summary)
+  ))
+_sym_db.RegisterMessage(Summary)
+_sym_db.RegisterMessage(Summary.Image)
+_sym_db.RegisterMessage(Summary.Audio)
+_sym_db.RegisterMessage(Summary.Value)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\rSummaryProtosP\001\370\001\001'))
+_HISTOGRAMPROTO.fields_by_name['bucket_limit'].has_options = True
+_HISTOGRAMPROTO.fields_by_name['bucket_limit']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_HISTOGRAMPROTO.fields_by_name['bucket'].has_options = True
+_HISTOGRAMPROTO.fields_by_name['bucket']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/tensor_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/tensor_pb2.py
new file mode 100644
index 0000000..b54d9c1
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/tensor_pb2.py
@@ -0,0 +1,185 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/tensor.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from tensorboardX.src import resource_handle_pb2 as tensorboardX_dot_src_dot_resource__handle__pb2
+from tensorboardX.src import tensor_shape_pb2 as tensorboardX_dot_src_dot_tensor__shape__pb2
+from tensorboardX.src import types_pb2 as tensorboardX_dot_src_dot_types__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/tensor.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1dtensorboardX/src/tensor.proto\x12\x0btensorboard\x1a&tensorboardX/src/resource_handle.proto\x1a#tensorboardX/src/tensor_shape.proto\x1a\x1ctensorboardX/src/types.proto\"\xa6\x03\n\x0bTensorProto\x12$\n\x05\x64type\x18\x01 \x01(\x0e\x32\x15.tensorboard.DataType\x12\x33\n\x0ctensor_shape\x18\x02 \x01(\x0b\x32\x1d.tensorboard.TensorShapeProto\x12\x16\n\x0eversion_number\x18\x03 \x01(\x05\x12\x16\n\x0etensor_content\x18\x04 \x01(\x0c\x12\x14\n\x08half_val\x18\r \x03(\x05\x42\x02\x10\x01\x12\x15\n\tfloat_val\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x16\n\ndouble_val\x18\x06 \x03(\x01\x42\x02\x10\x01\x12\x13\n\x07int_val\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x12\n\nstring_val\x18\x08 \x03(\x0c\x12\x18\n\x0cscomplex_val\x18\t \x03(\x02\x42\x02\x10\x01\x12\x15\n\tint64_val\x18\n \x03(\x03\x42\x02\x10\x01\x12\x14\n\x08\x62ool_val\x18\x0b \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x64\x63omplex_val\x18\x0c \x03(\x01\x42\x02\x10\x01\x12=\n\x13resource_handle_val\x18\x0e \x03(\x0b\x32 .tensorboard.ResourceHandleProtoB-\n\x18org.tensorflow.frameworkB\x0cTensorProtosP\x01\xf8\x01\x01\x62\x06proto3')
+  ,
+  dependencies=[tensorboardX_dot_src_dot_resource__handle__pb2.DESCRIPTOR,tensorboardX_dot_src_dot_tensor__shape__pb2.DESCRIPTOR,tensorboardX_dot_src_dot_types__pb2.DESCRIPTOR,])
+
+
+
+
+_TENSORPROTO = _descriptor.Descriptor(
+  name='TensorProto',
+  full_name='tensorboard.TensorProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='dtype', full_name='tensorboard.TensorProto.dtype', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tensor_shape', full_name='tensorboard.TensorProto.tensor_shape', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='version_number', full_name='tensorboard.TensorProto.version_number', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='tensor_content', full_name='tensorboard.TensorProto.tensor_content', index=3,
+      number=4, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='half_val', full_name='tensorboard.TensorProto.half_val', index=4,
+      number=13, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='float_val', full_name='tensorboard.TensorProto.float_val', index=5,
+      number=5, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='double_val', full_name='tensorboard.TensorProto.double_val', index=6,
+      number=6, type=1, cpp_type=5, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='int_val', full_name='tensorboard.TensorProto.int_val', index=7,
+      number=7, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='string_val', full_name='tensorboard.TensorProto.string_val', index=8,
+      number=8, type=12, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='scomplex_val', full_name='tensorboard.TensorProto.scomplex_val', index=9,
+      number=9, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='int64_val', full_name='tensorboard.TensorProto.int64_val', index=10,
+      number=10, type=3, cpp_type=2, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='bool_val', full_name='tensorboard.TensorProto.bool_val', index=11,
+      number=11, type=8, cpp_type=7, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='dcomplex_val', full_name='tensorboard.TensorProto.dcomplex_val', index=12,
+      number=12, type=1, cpp_type=5, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))),
+    _descriptor.FieldDescriptor(
+      name='resource_handle_val', full_name='tensorboard.TensorProto.resource_handle_val', index=13,
+      number=14, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=154,
+  serialized_end=576,
+)
+
+_TENSORPROTO.fields_by_name['dtype'].enum_type = tensorboardX_dot_src_dot_types__pb2._DATATYPE
+_TENSORPROTO.fields_by_name['tensor_shape'].message_type = tensorboardX_dot_src_dot_tensor__shape__pb2._TENSORSHAPEPROTO
+_TENSORPROTO.fields_by_name['resource_handle_val'].message_type = tensorboardX_dot_src_dot_resource__handle__pb2._RESOURCEHANDLEPROTO
+DESCRIPTOR.message_types_by_name['TensorProto'] = _TENSORPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+TensorProto = _reflection.GeneratedProtocolMessageType('TensorProto', (_message.Message,), dict(
+  DESCRIPTOR = _TENSORPROTO,
+  __module__ = 'tensorboardX.src.tensor_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.TensorProto)
+  ))
+_sym_db.RegisterMessage(TensorProto)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\014TensorProtosP\001\370\001\001'))
+_TENSORPROTO.fields_by_name['half_val'].has_options = True
+_TENSORPROTO.fields_by_name['half_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['float_val'].has_options = True
+_TENSORPROTO.fields_by_name['float_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['double_val'].has_options = True
+_TENSORPROTO.fields_by_name['double_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['int_val'].has_options = True
+_TENSORPROTO.fields_by_name['int_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['scomplex_val'].has_options = True
+_TENSORPROTO.fields_by_name['scomplex_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['int64_val'].has_options = True
+_TENSORPROTO.fields_by_name['int64_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['bool_val'].has_options = True
+_TENSORPROTO.fields_by_name['bool_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+_TENSORPROTO.fields_by_name['dcomplex_val'].has_options = True
+_TENSORPROTO.fields_by_name['dcomplex_val']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/tensor_shape_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/tensor_shape_pb2.py
new file mode 100644
index 0000000..4934cb6
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/tensor_shape_pb2.py
@@ -0,0 +1,125 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/tensor_shape.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/tensor_shape.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n#tensorboardX/src/tensor_shape.proto\x12\x0btensorboard\"{\n\x10TensorShapeProto\x12.\n\x03\x64im\x18\x02 \x03(\x0b\x32!.tensorboard.TensorShapeProto.Dim\x12\x14\n\x0cunknown_rank\x18\x03 \x01(\x08\x1a!\n\x03\x44im\x12\x0c\n\x04size\x18\x01 \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\tB2\n\x18org.tensorflow.frameworkB\x11TensorShapeProtosP\x01\xf8\x01\x01\x62\x06proto3')
+)
+
+
+
+
+_TENSORSHAPEPROTO_DIM = _descriptor.Descriptor(
+  name='Dim',
+  full_name='tensorboard.TensorShapeProto.Dim',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='size', full_name='tensorboard.TensorShapeProto.Dim.size', index=0,
+      number=1, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='name', full_name='tensorboard.TensorShapeProto.Dim.name', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=142,
+  serialized_end=175,
+)
+
+_TENSORSHAPEPROTO = _descriptor.Descriptor(
+  name='TensorShapeProto',
+  full_name='tensorboard.TensorShapeProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='dim', full_name='tensorboard.TensorShapeProto.dim', index=0,
+      number=2, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='unknown_rank', full_name='tensorboard.TensorShapeProto.unknown_rank', index=1,
+      number=3, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_TENSORSHAPEPROTO_DIM, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=52,
+  serialized_end=175,
+)
+
+_TENSORSHAPEPROTO_DIM.containing_type = _TENSORSHAPEPROTO
+_TENSORSHAPEPROTO.fields_by_name['dim'].message_type = _TENSORSHAPEPROTO_DIM
+DESCRIPTOR.message_types_by_name['TensorShapeProto'] = _TENSORSHAPEPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+TensorShapeProto = _reflection.GeneratedProtocolMessageType('TensorShapeProto', (_message.Message,), dict(
+
+  Dim = _reflection.GeneratedProtocolMessageType('Dim', (_message.Message,), dict(
+    DESCRIPTOR = _TENSORSHAPEPROTO_DIM,
+    __module__ = 'tensorboardX.src.tensor_shape_pb2'
+    # @@protoc_insertion_point(class_scope:tensorboard.TensorShapeProto.Dim)
+    ))
+  ,
+  DESCRIPTOR = _TENSORSHAPEPROTO,
+  __module__ = 'tensorboardX.src.tensor_shape_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.TensorShapeProto)
+  ))
+_sym_db.RegisterMessage(TensorShapeProto)
+_sym_db.RegisterMessage(TensorShapeProto.Dim)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\021TensorShapeProtosP\001\370\001\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/types_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/types_pb2.py
new file mode 100644
index 0000000..cd02d02
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/types_pb2.py
@@ -0,0 +1,254 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/types.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/types.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1ctensorboardX/src/types.proto\x12\x0btensorboard*\xc2\x05\n\x08\x44\x61taType\x12\x0e\n\nDT_INVALID\x10\x00\x12\x0c\n\x08\x44T_FLOAT\x10\x01\x12\r\n\tDT_DOUBLE\x10\x02\x12\x0c\n\x08\x44T_INT32\x10\x03\x12\x0c\n\x08\x44T_UINT8\x10\x04\x12\x0c\n\x08\x44T_INT16\x10\x05\x12\x0b\n\x07\x44T_INT8\x10\x06\x12\r\n\tDT_STRING\x10\x07\x12\x10\n\x0c\x44T_COMPLEX64\x10\x08\x12\x0c\n\x08\x44T_INT64\x10\t\x12\x0b\n\x07\x44T_BOOL\x10\n\x12\x0c\n\x08\x44T_QINT8\x10\x0b\x12\r\n\tDT_QUINT8\x10\x0c\x12\r\n\tDT_QINT32\x10\r\x12\x0f\n\x0b\x44T_BFLOAT16\x10\x0e\x12\r\n\tDT_QINT16\x10\x0f\x12\x0e\n\nDT_QUINT16\x10\x10\x12\r\n\tDT_UINT16\x10\x11\x12\x11\n\rDT_COMPLEX128\x10\x12\x12\x0b\n\x07\x44T_HALF\x10\x13\x12\x0f\n\x0b\x44T_RESOURCE\x10\x14\x12\x10\n\x0c\x44T_FLOAT_REF\x10\x65\x12\x11\n\rDT_DOUBLE_REF\x10\x66\x12\x10\n\x0c\x44T_INT32_REF\x10g\x12\x10\n\x0c\x44T_UINT8_REF\x10h\x12\x10\n\x0c\x44T_INT16_REF\x10i\x12\x0f\n\x0b\x44T_INT8_REF\x10j\x12\x11\n\rDT_STRING_REF\x10k\x12\x14\n\x10\x44T_COMPLEX64_REF\x10l\x12\x10\n\x0c\x44T_INT64_REF\x10m\x12\x0f\n\x0b\x44T_BOOL_REF\x10n\x12\x10\n\x0c\x44T_QINT8_REF\x10o\x12\x11\n\rDT_QUINT8_REF\x10p\x12\x11\n\rDT_QINT32_REF\x10q\x12\x13\n\x0f\x44T_BFLOAT16_REF\x10r\x12\x11\n\rDT_QINT16_REF\x10s\x12\x12\n\x0e\x44T_QUINT16_REF\x10t\x12\x11\n\rDT_UINT16_REF\x10u\x12\x15\n\x11\x44T_COMPLEX128_REF\x10v\x12\x0f\n\x0b\x44T_HALF_REF\x10w\x12\x13\n\x0f\x44T_RESOURCE_REF\x10xB,\n\x18org.tensorflow.frameworkB\x0bTypesProtosP\x01\xf8\x01\x01\x62\x06proto3')
+)
+
+_DATATYPE = _descriptor.EnumDescriptor(
+  name='DataType',
+  full_name='tensorboard.DataType',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='DT_INVALID', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_FLOAT', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_DOUBLE', index=2, number=2,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT32', index=3, number=3,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_UINT8', index=4, number=4,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT16', index=5, number=5,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT8', index=6, number=6,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_STRING', index=7, number=7,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_COMPLEX64', index=8, number=8,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT64', index=9, number=9,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_BOOL', index=10, number=10,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT8', index=11, number=11,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QUINT8', index=12, number=12,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT32', index=13, number=13,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_BFLOAT16', index=14, number=14,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT16', index=15, number=15,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QUINT16', index=16, number=16,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_UINT16', index=17, number=17,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_COMPLEX128', index=18, number=18,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_HALF', index=19, number=19,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_RESOURCE', index=20, number=20,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_FLOAT_REF', index=21, number=101,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_DOUBLE_REF', index=22, number=102,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT32_REF', index=23, number=103,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_UINT8_REF', index=24, number=104,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT16_REF', index=25, number=105,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT8_REF', index=26, number=106,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_STRING_REF', index=27, number=107,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_COMPLEX64_REF', index=28, number=108,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_INT64_REF', index=29, number=109,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_BOOL_REF', index=30, number=110,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT8_REF', index=31, number=111,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QUINT8_REF', index=32, number=112,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT32_REF', index=33, number=113,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_BFLOAT16_REF', index=34, number=114,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QINT16_REF', index=35, number=115,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_QUINT16_REF', index=36, number=116,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_UINT16_REF', index=37, number=117,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_COMPLEX128_REF', index=38, number=118,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_HALF_REF', index=39, number=119,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DT_RESOURCE_REF', index=40, number=120,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=46,
+  serialized_end=752,
+)
+_sym_db.RegisterEnumDescriptor(_DATATYPE)
+
+DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
+DT_INVALID = 0
+DT_FLOAT = 1
+DT_DOUBLE = 2
+DT_INT32 = 3
+DT_UINT8 = 4
+DT_INT16 = 5
+DT_INT8 = 6
+DT_STRING = 7
+DT_COMPLEX64 = 8
+DT_INT64 = 9
+DT_BOOL = 10
+DT_QINT8 = 11
+DT_QUINT8 = 12
+DT_QINT32 = 13
+DT_BFLOAT16 = 14
+DT_QINT16 = 15
+DT_QUINT16 = 16
+DT_UINT16 = 17
+DT_COMPLEX128 = 18
+DT_HALF = 19
+DT_RESOURCE = 20
+DT_FLOAT_REF = 101
+DT_DOUBLE_REF = 102
+DT_INT32_REF = 103
+DT_UINT8_REF = 104
+DT_INT16_REF = 105
+DT_INT8_REF = 106
+DT_STRING_REF = 107
+DT_COMPLEX64_REF = 108
+DT_INT64_REF = 109
+DT_BOOL_REF = 110
+DT_QINT8_REF = 111
+DT_QUINT8_REF = 112
+DT_QINT32_REF = 113
+DT_BFLOAT16_REF = 114
+DT_QINT16_REF = 115
+DT_QUINT16_REF = 116
+DT_UINT16_REF = 117
+DT_COMPLEX128_REF = 118
+DT_HALF_REF = 119
+DT_RESOURCE_REF = 120
+
+
+DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\013TypesProtosP\001\370\001\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/src/versions_pb2.py b/BiSTNet-NTIRE2023/tensorboardX/src/versions_pb2.py
new file mode 100644
index 0000000..1daceb7
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/src/versions_pb2.py
@@ -0,0 +1,85 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: tensorboardX/src/versions.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='tensorboardX/src/versions.proto',
+  package='tensorboard',
+  syntax='proto3',
+  serialized_pb=_b('\n\x1ftensorboardX/src/versions.proto\x12\x0btensorboard\"K\n\nVersionDef\x12\x10\n\x08producer\x18\x01 \x01(\x05\x12\x14\n\x0cmin_consumer\x18\x02 \x01(\x05\x12\x15\n\rbad_consumers\x18\x03 \x03(\x05\x42/\n\x18org.tensorflow.frameworkB\x0eVersionsProtosP\x01\xf8\x01\x01\x62\x06proto3')
+)
+
+
+
+
+_VERSIONDEF = _descriptor.Descriptor(
+  name='VersionDef',
+  full_name='tensorboard.VersionDef',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='producer', full_name='tensorboard.VersionDef.producer', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='min_consumer', full_name='tensorboard.VersionDef.min_consumer', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='bad_consumers', full_name='tensorboard.VersionDef.bad_consumers', index=2,
+      number=3, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=48,
+  serialized_end=123,
+)
+
+DESCRIPTOR.message_types_by_name['VersionDef'] = _VERSIONDEF
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+VersionDef = _reflection.GeneratedProtocolMessageType('VersionDef', (_message.Message,), dict(
+  DESCRIPTOR = _VERSIONDEF,
+  __module__ = 'tensorboardX.src.versions_pb2'
+  # @@protoc_insertion_point(class_scope:tensorboard.VersionDef)
+  ))
+_sym_db.RegisterMessage(VersionDef)
+
+
+DESCRIPTOR.has_options = True
+DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\n\030org.tensorflow.frameworkB\016VersionsProtosP\001\370\001\001'))
+# @@protoc_insertion_point(module_scope)
diff --git a/BiSTNet-NTIRE2023/tensorboardX/summary.py b/BiSTNet-NTIRE2023/tensorboardX/summary.py
new file mode 100644
index 0000000..80d3c69
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/summary.py
@@ -0,0 +1,264 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""## Generation of summaries.
+### Class for writing Summaries
+@@FileWriter
+@@FileWriterCache
+### Summary Ops
+@@tensor_summary
+@@scalar
+@@histogram
+@@audio
+@@image
+@@merge
+@@merge_all
+## Utilities
+@@get_summary_description
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import re as _re
+import bisect
+from six import StringIO
+from six.moves import range
+import numpy as np
+# pylint: disable=unused-import
+from .src.summary_pb2 import Summary
+from .src.summary_pb2 import HistogramProto
+from .src.summary_pb2 import SummaryMetadata
+from .src.tensor_pb2 import TensorProto
+from .src.tensor_shape_pb2 import TensorShapeProto
+from .src.plugin_pr_curve_pb2 import PrCurvePluginData
+from .x2num import makenp
+
+_INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]')
+
+
+def _clean_tag(name):
+    # In the past, the first argument to summary ops was a tag, which allowed
+    # arbitrary characters. Now we are changing the first argument to be the node
+    # name. This has a number of advantages (users of summary ops now can
+    # take advantage of the tf name scope system) but risks breaking existing
+    # usage, because a much smaller set of characters are allowed in node names.
+    # This function replaces all illegal characters with _s, and logs a warning.
+    # It also strips leading slashes from the name.
+    if name is not None:
+        new_name = _INVALID_TAG_CHARACTERS.sub('_', name)
+        new_name = new_name.lstrip('/')  # Remove leading slashes
+        if new_name != name:
+            logging.info('Summary name %s is illegal; using %s instead.' % (name, new_name))
+            name = new_name
+    return name
+
+
+def scalar(name, scalar, collections=None):
+    """Outputs a `Summary` protocol buffer containing a single scalar value.
+    The generated Summary has a Tensor.proto containing the input Tensor.
+    Args:
+      name: A name for the generated node. Will also serve as the series name in
+        TensorBoard.
+      tensor: A real numeric Tensor containing a single value.
+      collections: Optional list of graph collections keys. The new summary op is
+        added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
+    Returns:
+      A scalar `Tensor` of type `string`. Which contains a `Summary` protobuf.
+    Raises:
+      ValueError: If tensor has the wrong shape or type.
+    """
+    name = _clean_tag(name)
+    scalar = makenp(scalar)
+    assert(scalar.squeeze().ndim == 0), 'scalar should be 0D'
+    scalar = float(scalar)
+    return Summary(value=[Summary.Value(tag=name, simple_value=scalar)])
+
+
+def histogram(name, values, bins, collections=None):
+    # pylint: disable=line-too-long
+    """Outputs a `Summary` protocol buffer with a histogram.
+    The generated
+    [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+    has one summary value containing a histogram for `values`.
+    This op reports an `InvalidArgument` error if any value is not finite.
+    Args:
+      name: A name for the generated node. Will also serve as a series name in
+        TensorBoard.
+      values: A real numeric `Tensor`. Any shape. Values to use to
+        build the histogram.
+      collections: Optional list of graph collections keys. The new summary op is
+        added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
+    Returns:
+      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+      buffer.
+    """
+    name = _clean_tag(name)
+    values = makenp(values)
+    hist = make_histogram(values.astype(float), bins)
+    return Summary(value=[Summary.Value(tag=name, histo=hist)])
+
+
+def make_histogram(values, bins):
+    """Convert values into a histogram proto using logic from histogram.cc."""
+    values = values.reshape(-1)
+    counts, limits = np.histogram(values, bins=bins)
+    limits = limits[1:]
+
+    sum_sq = values.dot(values)
+    return HistogramProto(min=values.min(),
+                          max=values.max(),
+                          num=len(values),
+                          sum=values.sum(),
+                          sum_squares=sum_sq,
+                          bucket_limit=limits,
+                          bucket=counts)
+
+
+def image(tag, tensor):
+    """Outputs a `Summary` protocol buffer with images.
+    The summary has up to `max_images` summary values containing images. The
+    images are built from `tensor` which must be 3-D with shape `[height, width,
+    channels]` and where `channels` can be:
+    *  1: `tensor` is interpreted as Grayscale.
+    *  3: `tensor` is interpreted as RGB.
+    *  4: `tensor` is interpreted as RGBA.
+    The `name` in the outputted Summary.Value protobufs is generated based on the
+    name, with a suffix depending on the max_outputs setting:
+    *  If `max_outputs` is 1, the summary value tag is '*name*/image'.
+    *  If `max_outputs` is greater than 1, the summary value tags are
+       generated sequentially as '*name*/image/0', '*name*/image/1', etc.
+    Args:
+      tag: A name for the generated node. Will also serve as a series name in
+        TensorBoard.
+      tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
+        channels]` where `channels` is 1, 3, or 4.
+    Returns:
+      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
+      buffer.
+    """
+    tag = _clean_tag(tag)
+    tensor = makenp(tensor, 'IMG')
+    tensor = tensor.astype(np.float32)
+    tensor = (tensor * 255).astype(np.uint8)
+    image = make_image(tensor)
+    return Summary(value=[Summary.Value(tag=tag, image=image)])
+
+
+def make_image(tensor):
+    """Convert an numpy representation image to Image protobuf"""
+    from PIL import Image
+    height, width, channel = tensor.shape
+    image = Image.fromarray(tensor)
+    import io
+    output = io.BytesIO()
+    image.save(output, format='PNG')
+    image_string = output.getvalue()
+    output.close()
+    return Summary.Image(height=height,
+                         width=width,
+                         colorspace=channel,
+                         encoded_image_string=image_string)
+
+
+def audio(tag, tensor, sample_rate=44100):
+    tensor = makenp(tensor)
+    tensor = tensor.squeeze()
+    if abs(tensor).max() > 1:
+        print('warning: audio amplitude out of range, auto clipped.')
+        tensor = tensor.clip(-1, 1)
+    assert(tensor.ndim == 1), 'input tensor should be 1 dimensional.'
+
+    tensor_list = [int(32767.0 * x) for x in tensor]
+    import io
+    import wave
+    import struct
+    fio = io.BytesIO()
+    Wave_write = wave.open(fio, 'wb')
+    Wave_write.setnchannels(1)
+    Wave_write.setsampwidth(2)
+    Wave_write.setframerate(sample_rate)
+    tensor_enc = b''
+    for v in tensor_list:
+        tensor_enc += struct.pack('<h', v)
+
+    Wave_write.writeframes(tensor_enc)
+    Wave_write.close()
+    audio_string = fio.getvalue()
+    fio.close()
+    audio = Summary.Audio(sample_rate=sample_rate,
+                          num_channels=1,
+                          length_frames=len(tensor_list),
+                          encoded_audio_string=audio_string,
+                          content_type='audio/wav')
+    return Summary(value=[Summary.Value(tag=tag, audio=audio)])
+
+
+def text(tag, text):
+    import json
+    PluginData = [SummaryMetadata.PluginData(plugin_name='text')]
+    smd = SummaryMetadata(plugin_data=PluginData)
+    tensor = TensorProto(dtype='DT_STRING',
+                         string_val=[text.encode(encoding='utf_8')],
+                         tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]))
+    return Summary(value=[Summary.Value(node_name=tag, metadata=smd, tensor=tensor)])
+
+
+def pr_curve(tag, labels, predictions, num_thresholds=127, weights=None):
+    if num_thresholds > 127:  # wierd, value > 127 breaks protobuf
+        num_thresholds = 127
+    data = compute_curve(labels, predictions, num_thresholds=num_thresholds, weights=weights)
+    pr_curve_plugin_data = PrCurvePluginData(version=0, num_thresholds=num_thresholds).SerializeToString()
+    PluginData = [SummaryMetadata.PluginData(plugin_name='pr_curves', content=pr_curve_plugin_data)]
+    smd = SummaryMetadata(plugin_data=PluginData)
+    tensor = TensorProto(dtype='DT_FLOAT',
+                         float_val=data.reshape(-1).tolist(),
+                         tensor_shape=TensorShapeProto(
+                             dim=[TensorShapeProto.Dim(size=data.shape[0]), TensorShapeProto.Dim(size=data.shape[1])]))
+    return Summary(value=[Summary.Value(tag=tag, metadata=smd, tensor=tensor)])
+
+
+# https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/summary.py
+def compute_curve(labels, predictions, num_thresholds=None, weights=None):
+    _MINIMUM_COUNT = 1e-7
+
+    if weights is None:
+        weights = 1.0
+
+    # Compute bins of true positives and false positives.
+    bucket_indices = np.int32(np.floor(predictions * (num_thresholds - 1)))
+    float_labels = labels.astype(np.float)
+    histogram_range = (0, num_thresholds - 1)
+    tp_buckets, _ = np.histogram(
+        bucket_indices,
+        bins=num_thresholds,
+        range=histogram_range,
+        weights=float_labels * weights)
+    fp_buckets, _ = np.histogram(
+        bucket_indices,
+        bins=num_thresholds,
+        range=histogram_range,
+        weights=(1.0 - float_labels) * weights)
+
+    # Obtain the reverse cumulative sum.
+    tp = np.cumsum(tp_buckets[::-1])[::-1]
+    fp = np.cumsum(fp_buckets[::-1])[::-1]
+    tn = fp[0] - fp
+    fn = tp[0] - tp
+    precision = tp / np.maximum(_MINIMUM_COUNT, tp + fp)
+    recall = tp / np.maximum(_MINIMUM_COUNT, tp + fn)
+    return np.stack((tp, fp, tn, fn, precision, recall))
diff --git a/BiSTNet-NTIRE2023/tensorboardX/writer.py b/BiSTNet-NTIRE2023/tensorboardX/writer.py
new file mode 100644
index 0000000..3578c1c
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/writer.py
@@ -0,0 +1,485 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides an API for generating Event protocol buffers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+import json
+import os
+from .src import event_pb2
+from .src import summary_pb2
+from .src import graph_pb2
+from .event_file_writer import EventFileWriter
+from .summary import scalar, histogram, image, audio, text, pr_curve
+from .graph import graph
+from .graph_onnx import gg
+from .embedding import make_mat, make_sprite, make_tsv, append_pbtxt
+
+
+class SummaryToEventTransformer(object):
+    """Abstractly implements the SummaryWriter API.
+    This API basically implements a number of endpoints (add_summary,
+    add_session_log, etc). The endpoints all generate an event protobuf, which is
+    passed to the contained event_writer.
+    @@__init__
+    @@add_summary
+    @@add_session_log
+    @@add_graph
+    @@add_meta_graph
+    @@add_run_metadata
+    """
+
+    def __init__(self, event_writer, graph=None, graph_def=None):
+        """Creates a `SummaryWriter` and an event file.
+        On construction the summary writer creates a new event file in `logdir`.
+        This event file will contain `Event` protocol buffers constructed when you
+        call one of the following functions: `add_summary()`, `add_session_log()`,
+        `add_event()`, or `add_graph()`.
+        If you pass a `Graph` to the constructor it is added to
+        the event file. (This is equivalent to calling `add_graph()` later).
+        TensorBoard will pick the graph from the file and display it graphically so
+        you can interactively explore the graph you built. You will usually pass
+        the graph from the session in which you launched it:
+        ```python
+        ...create a graph...
+        # Launch the graph in a session.
+        sess = tf.Session()
+        # Create a summary writer, add the 'graph' to the event file.
+        writer = tf.summary.FileWriter(<some-directory>, sess.graph)
+        ```
+        Args:
+          event_writer: An EventWriter. Implements add_event method.
+          graph: A `Graph` object, such as `sess.graph`.
+          graph_def: DEPRECATED: Use the `graph` argument instead.
+        """
+        self.event_writer = event_writer
+        # For storing used tags for session.run() outputs.
+        self._session_run_tags = {}
+        # TODO(zihaolucky). pass this an empty graph to check whether it's necessary.
+        # currently we don't support graph in MXNet using tensorboard.
+
+    def add_summary(self, summary, global_step=None):
+        """Adds a `Summary` protocol buffer to the event file.
+        This method wraps the provided summary in an `Event` protocol buffer
+        and adds it to the event file.
+        You can pass the result of evaluating any summary op, using
+        [`Session.run()`](client.md#Session.run) or
+        [`Tensor.eval()`](framework.md#Tensor.eval), to this
+        function. Alternatively, you can pass a `tf.Summary` protocol
+        buffer that you populate with your own data. The latter is
+        commonly done to report evaluation results in event files.
+        Args:
+          summary: A `Summary` protocol buffer, optionally serialized as a string.
+          global_step: Number. Optional global step value to record with the
+            summary.
+        """
+        if isinstance(summary, bytes):
+            summ = summary_pb2.Summary()
+            summ.ParseFromString(summary)
+            summary = summ
+        event = event_pb2.Event(summary=summary)
+        self._add_event(event, global_step)
+
+    def add_graph_onnx(self, graph):
+        """Adds a `Graph` protocol buffer to the event file.
+        """
+        event = event_pb2.Event(graph_def=graph.SerializeToString())
+        self._add_event(event, None)
+
+    def add_graph(self, graph):
+        """Adds a `Graph` protocol buffer to the event file.
+        """
+        event = event_pb2.Event(graph_def=graph.SerializeToString())
+        self._add_event(event, None)
+
+    def add_session_log(self, session_log, global_step=None):
+        """Adds a `SessionLog` protocol buffer to the event file.
+        This method wraps the provided session in an `Event` protocol buffer
+        and adds it to the event file.
+        Args:
+          session_log: A `SessionLog` protocol buffer.
+          global_step: Number. Optional global step value to record with the
+            summary.
+        """
+        event = event_pb2.Event(session_log=session_log)
+        self._add_event(event, global_step)
+
+    def _add_event(self, event, step):
+        event.wall_time = time.time()
+        if step is not None:
+            event.step = int(step)
+        self.event_writer.add_event(event)
+
+
+class FileWriter(SummaryToEventTransformer):
+    """Writes `Summary` protocol buffers to event files.
+    The `FileWriter` class provides a mechanism to create an event file in a
+    given directory and add summaries and events to it. The class updates the
+    file contents asynchronously. This allows a training program to call methods
+    to add data to the file directly from the training loop, without slowing down
+    training.
+    @@__init__
+    @@add_summary
+    @@add_session_log
+    @@add_event
+    @@add_graph
+    @@add_run_metadata
+    @@get_logdir
+    @@flush
+    @@close
+    """
+
+    def __init__(self,
+                 logdir,
+                 graph=None,
+                 max_queue=10,
+                 flush_secs=120,
+                 graph_def=None):
+        """Creates a `FileWriter` and an event file.
+        On construction the summary writer creates a new event file in `logdir`.
+        This event file will contain `Event` protocol buffers constructed when you
+        call one of the following functions: `add_summary()`, `add_session_log()`,
+        `add_event()`, or `add_graph()`.
+        If you pass a `Graph` to the constructor it is added to
+        the event file. (This is equivalent to calling `add_graph()` later).
+        TensorBoard will pick the graph from the file and display it graphically so
+        you can interactively explore the graph you built. You will usually pass
+        the graph from the session in which you launched it:
+        ```python
+        ...create a graph...
+        # Launch the graph in a session.
+        sess = tf.Session()
+        # Create a summary writer, add the 'graph' to the event file.
+        writer = tf.summary.FileWriter(<some-directory>, sess.graph)
+        ```
+        The other arguments to the constructor control the asynchronous writes to
+        the event file:
+        *  `flush_secs`: How often, in seconds, to flush the added summaries
+           and events to disk.
+        *  `max_queue`: Maximum number of summaries or events pending to be
+           written to disk before one of the 'add' calls block.
+        Args:
+          logdir: A string. Directory where event file will be written.
+          graph: A `Graph` object, such as `sess.graph`.
+          max_queue: Integer. Size of the queue for pending events and summaries.
+          flush_secs: Number. How often, in seconds, to flush the
+            pending events and summaries to disk.
+          graph_def: DEPRECATED: Use the `graph` argument instead.
+        """
+        event_writer = EventFileWriter(logdir, max_queue, flush_secs)
+        super(FileWriter, self).__init__(event_writer, graph, graph_def)
+
+    def get_logdir(self):
+        """Returns the directory where event file will be written."""
+        return self.event_writer.get_logdir()
+
+    def add_event(self, event):
+        """Adds an event to the event file.
+        Args:
+          event: An `Event` protocol buffer.
+        """
+        self.event_writer.add_event(event)
+
+    def flush(self):
+        """Flushes the event file to disk.
+        Call this method to make sure that all pending events have been written to
+        disk.
+        """
+        self.event_writer.flush()
+
+    def close(self):
+        """Flushes the event file to disk and close the file.
+        Call this method when you do not need the summary writer anymore.
+        """
+        self.event_writer.close()
+
+    def reopen(self):
+        """Reopens the EventFileWriter.
+        Can be called after `close()` to add more events in the same directory.
+        The events will go into a new events file.
+        Does nothing if the EventFileWriter was not closed.
+        """
+        self.event_writer.reopen()
+
+
+class SummaryWriter(object):
+    """Writes `Summary` directly to event files.
+    The `SummaryWriter` class provides a high-level api to create an event file in a
+    given directory and add summaries and events to it. The class updates the
+    file contents asynchronously. This allows a training program to call methods
+    to add data to the file directly from the training loop, without slowing down
+    training.
+    """
+    def __init__(self, log_dir=None, comment=''):
+        """
+        Args:
+            log_dir (string): save location, default is: runs/**CURRENT_DATETIME_HOSTNAME**, which changes after each
+              run. Use hierarchical folder structure to compare between runs easily. e.g. 'runs/exp1', 'runs/exp2'
+            comment (string): comment that appends to the default log_dir
+        """
+        if not log_dir:
+            import socket
+            from datetime import datetime
+            current_time = datetime.now().strftime('%b%d_%H-%M-%S')
+            log_dir = os.path.join('runs', current_time + '_' + socket.gethostname() + comment)
+        self.file_writer = FileWriter(logdir=log_dir)
+        v = 1E-12
+        buckets = []
+        neg_buckets = []
+        while v < 1E20:
+            buckets.append(v)
+            neg_buckets.append(-v)
+            v *= 1.1
+        self.default_bins = neg_buckets[::-1] + [0] + buckets
+        self.text_tags = []
+        #
+        self.all_writers = {self.file_writer.get_logdir(): self.file_writer}
+        self.scalar_dict = {}  # {writer_id : [[timestamp, step, value],...],...}
+
+    def __append_to_scalar_dict(self, tag, scalar_value, global_step,
+                                timestamp):
+        """This adds an entry to the self.scalar_dict datastructure with format
+        {writer_id : [[timestamp, step, value], ...], ...}.
+        """
+        from .x2num import makenp
+        if tag not in self.scalar_dict.keys():
+            self.scalar_dict[tag] = []
+        self.scalar_dict[tag].append([timestamp, global_step, float(makenp(scalar_value))])
+
+    def add_scalar(self, tag, scalar_value, global_step=None):
+        """Add scalar data to summary.
+
+        Args:
+            tag (string): Data identifier
+            scalar_value (float): Value to save
+            global_step (int): Global step value to record
+        """
+        self.file_writer.add_summary(scalar(tag, scalar_value), global_step)
+        self.__append_to_scalar_dict(tag, scalar_value, global_step, time.time())
+
+    def add_scalars(self, main_tag, tag_scalar_dict, global_step=None):
+        """Adds many scalar data to summary.
+
+        Args:
+            tag (string): Data identifier
+            main_tag (string): The parent name for the tags
+            tag_scalar_dict (dict): Key-value pair storing the tag and corresponding values
+            global_step (int): Global step value to record
+
+        Examples::
+
+            writer.add_scalars('run_14h',{'xsinx':i*np.sin(i/r),
+                                          'xcosx':i*np.cos(i/r),
+                                          'arctanx': numsteps*np.arctan(i/r)}, i)
+            # This function adds three values to the same scalar plot with the tag
+            # 'run_14h' in TensorBoard's scalar section.
+        """
+        timestamp = time.time()
+        fw_logdir = self.file_writer.get_logdir()
+        for tag, scalar_value in tag_scalar_dict.items():
+            fw_tag = fw_logdir + "/" + main_tag + "/" + tag
+            if fw_tag in self.all_writers.keys():
+                fw = self.all_writers[fw_tag]
+            else:
+                fw = FileWriter(logdir=fw_tag)
+                self.all_writers[fw_tag] = fw
+            fw.add_summary(scalar(main_tag, scalar_value), global_step)
+            self.__append_to_scalar_dict(fw_tag, scalar_value, global_step, timestamp)
+
+    def export_scalars_to_json(self, path):
+        """Exports to the given path an ASCII file containing all the scalars written
+        so far by this instance, with the following format:
+        {writer_id : [[timestamp, step, value], ...], ...}
+        """
+        with open(path, "w") as f:
+            json.dump(self.scalar_dict, f)
+
+    def add_histogram(self, tag, values, global_step=None, bins='tensorflow'):
+        """Add histogram to summary.
+
+        Args:
+            tag (string): Data identifier
+            values (numpy.array): Values to build histogram
+            global_step (int): Global step value to record
+            bins (string): one of {'tensorflow','auto', 'fd', ...}, this determines how the bins are made. You can find
+              other options in: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
+        """
+        if bins == 'tensorflow':
+            bins = self.default_bins
+        self.file_writer.add_summary(histogram(tag, values, bins), global_step)
+
+    def add_image(self, tag, img_tensor, global_step=None):
+        """Add image data to summary.
+
+        Note that this requires the ``pillow`` package.
+
+        Args:
+            tag (string): Data identifier
+            img_tensor (torch.Tensor): Image data
+            global_step (int): Global step value to record
+        Shape:
+            img_tensor: :math:`(3, H, W)`. Use ``torchvision.utils.make_grid()`` to prepare it is a good idea.
+        """
+        self.file_writer.add_summary(image(tag, img_tensor), global_step)
+
+    def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100):
+        """Add audio data to summary.
+
+        Args:
+            tag (string): Data identifier
+            snd_tensor (torch.Tensor): Sound data
+            global_step (int): Global step value to record
+            sample_rate (int): sample rate in Hz
+
+        Shape:
+            snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
+        """
+        self.file_writer.add_summary(audio(tag, snd_tensor, sample_rate=sample_rate), global_step)
+
+    def add_text(self, tag, text_string, global_step=None):
+        """Add text data to summary.
+
+        Args:
+            tag (string): Data identifier
+            text_string (string): String to save
+            global_step (int): Global step value to record
+
+        Examples::
+
+            writer.add_text('lstm', 'This is an lstm', 0)
+            writer.add_text('rnn', 'This is an rnn', 10)
+        """
+        self.file_writer.add_summary(text(tag, text_string), global_step)
+        if tag not in self.text_tags:
+            self.text_tags.append(tag)
+            extension_dir = self.file_writer.get_logdir() + '/plugins/tensorboard_text/'
+            if not os.path.exists(extension_dir):
+                os.makedirs(extension_dir)
+            with open(extension_dir + 'tensors.json', 'w') as fp:
+                json.dump(self.text_tags, fp)
+
+    def add_graph_onnx(self, prototxt):
+        self.file_writer.add_graph_onnx(gg(prototxt))
+
+    def add_graph(self, model, input_to_model, verbose=False):
+        # prohibit second call?
+        # no, let tensorboard handles it and show its warning message.
+        """Add graph data to summary.
+
+        Args:
+            model (torch.nn.Module): model to draw.
+            input_to_model (torch.autograd.Variable): a variable or a tuple of variables to be fed.
+
+        """
+        import torch
+        from distutils.version import LooseVersion
+        if LooseVersion(torch.__version__) >= LooseVersion("0.3.1"):
+            pass
+        else:
+            if LooseVersion(torch.__version__) >= LooseVersion("0.3.0"):
+                print('You are using PyTorch==0.3.0, use add_graph_onnx()')
+                return
+            if not hasattr(torch.autograd.Variable, 'grad_fn'):
+                print('add_graph() only supports PyTorch v0.2.')
+                return
+        self.file_writer.add_graph(graph(model, input_to_model, verbose))
+
+    def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default'):
+        """Add embedding projector data to summary.
+
+        Args:
+            mat (torch.Tensor): A matrix which each row is the feature vector of the data point
+            metadata (list): A list of labels, each element will be convert to string
+            label_img (torch.Tensor): Images correspond to each data point
+            global_step (int): Global step value to record
+            tag (string): Name for the embedding
+        Shape:
+            mat: :math:`(N, D)`, where N is number of data and D is feature dimension
+
+            label_img: :math:`(N, C, H, W)`
+
+        Examples::
+
+            import keyword
+            import torch
+            meta = []
+            while len(meta)<100:
+                meta = meta+keyword.kwlist # get some strings
+            meta = meta[:100]
+
+            for i, v in enumerate(meta):
+                meta[i] = v+str(i)
+
+            label_img = torch.rand(100, 3, 10, 32)
+            for i in range(100):
+                label_img[i]*=i/100.0
+
+            writer.add_embedding(torch.randn(100, 5), metadata=meta, label_img=label_img)
+            writer.add_embedding(torch.randn(100, 5), label_img=label_img)
+            writer.add_embedding(torch.randn(100, 5), metadata=meta)
+        """
+        if global_step is None:
+            global_step = 0
+            # clear pbtxt?
+        save_path = os.path.join(self.file_writer.get_logdir(), str(global_step).zfill(5))
+        try:
+            os.makedirs(save_path)
+        except OSError:
+            print('warning: Embedding dir exists, did you set global_step for add_embedding()?')
+        if metadata is not None:
+            assert mat.size(0) == len(metadata), '#labels should equal with #data points'
+            make_tsv(metadata, save_path)
+        if label_img is not None:
+            assert mat.size(0) == label_img.size(0), '#images should equal with #data points'
+            make_sprite(label_img, save_path)
+        assert mat.dim() == 2, 'mat should be 2D, where mat.size(0) is the number of data points'
+        make_mat(mat.tolist(), save_path)
+        # new funcion to append to the config file a new embedding
+        append_pbtxt(metadata, label_img, self.file_writer.get_logdir(), str(global_step).zfill(5), tag)
+
+    def add_pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None):
+        """Adds precision recall curve.
+
+        Args:
+            tag (string): Data identifier
+            labels (torch.Tensor): Ground truth data. Binary label for each element.
+            predictions (torch.Tensor): The probability that an element be classified as true. Value should in [0, 1]
+            global_step (int): Global step value to record
+            num_thresholds (int): Number of thresholds used to draw the curve.
+
+        """
+        from .x2num import makenp
+        labels = makenp(labels)
+        predictions = makenp(predictions)
+        self.file_writer.add_summary(pr_curve(tag, labels, predictions, num_thresholds, weights), global_step)
+
+    def close(self):
+        if self.file_writer is None:
+            return  # ignore double close
+        self.file_writer.flush()
+        self.file_writer.close()
+        for path, writer in self.all_writers.items():
+            writer.flush()
+            writer.close()
+        self.file_writer = self.all_writers = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
diff --git a/BiSTNet-NTIRE2023/tensorboardX/x2num.py b/BiSTNet-NTIRE2023/tensorboardX/x2num.py
new file mode 100644
index 0000000..f2f89d5
--- /dev/null
+++ b/BiSTNet-NTIRE2023/tensorboardX/x2num.py
@@ -0,0 +1,90 @@
+# DO NOT alter/distruct/free input object !
+
+import numpy as np
+
+
+def makenp(x, modality=None):
+    # if already numpy, return
+    if isinstance(x, np.ndarray):
+        if modality == 'IMG' and x.dtype == np.uint8:
+            return x.astype(np.float32) / 255.0
+        return x
+    if np.isscalar(x):
+        return np.array([x])
+    if 'torch' in str(type(x)):
+        return pytorch_np(x, modality)
+    if 'chainer' in str(type(x)):
+        return chainer_np(x, modality)
+    if 'mxnet' in str(type(x)):
+        return mxnet_np(x, modality)
+
+
+def pytorch_np(x, modality):
+    import torch
+    if isinstance(x, torch.autograd.Variable):
+        x = x.data
+    x = x.cpu().numpy()
+    if modality == 'IMG':
+        x = _prepare_image(x)
+    return x
+
+
+def theano_np(x):
+    import theano
+    pass
+
+
+def caffe2_np(x):
+    pass
+
+
+def mxnet_np(x, modality):
+    x = x.asnumpy()
+    if modality == 'IMG':
+        x = _prepare_image(x)
+    return x
+
+
+def chainer_np(x, modality):
+    import chainer
+    x = chainer.cuda.to_cpu(x.data)
+    if modality == 'IMG':
+        x = _prepare_image(x)
+    return x
+
+
+def make_grid(I, ncols=8):
+    assert isinstance(I, np.ndarray), 'plugin error, should pass numpy array here'
+    assert I.ndim == 4 and I.shape[1] == 3
+    nimg = I.shape[0]
+    H = I.shape[2]
+    W = I.shape[3]
+    ncols = min(nimg, ncols)
+    nrows = int(np.ceil(float(nimg) / ncols))
+    canvas = np.zeros((3, H * nrows, W * ncols))
+    i = 0
+    for y in range(nrows):
+        for x in range(ncols):
+            if i >= nimg:
+                break
+            canvas[:, y * H:(y + 1) * H, x * W:(x + 1) * W] = I[i]
+            i = i + 1
+    return canvas
+
+
+def _prepare_image(I):
+    assert isinstance(I, np.ndarray), 'plugin error, should pass numpy array here'
+    assert I.ndim == 2 or I.ndim == 3 or I.ndim == 4
+    if I.ndim == 4:  # NCHW
+        if I.shape[1] == 1:  # N1HW
+            I = np.concatenate((I, I, I), 1)  # N3HW
+        assert I.shape[1] == 3
+        I = make_grid(I)  # 3xHxW
+    if I.ndim == 3 and I.shape[0] == 1:  # 1xHxW
+        I = np.concatenate((I, I, I), 0)  # 3xHxW
+    if I.ndim == 2:  # HxW
+        I = np.expand_dims(I, 0)  # 1xHxW
+        I = np.concatenate((I, I, I), 0)  # 3xHxW
+    I = I.transpose(1, 2, 0)
+
+    return I
diff --git a/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py b/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py
new file mode 100644
index 0000000..e51bc9e
--- /dev/null
+++ b/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py
@@ -0,0 +1,537 @@
+from __future__ import print_function
+
+import argparse
+import glob
+import os
+import time
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transform_lib
+import PIL
+from PIL import Image
+from tqdm import tqdm
+
+import lib.TestTransforms as transforms
+from models.ColorVidNet import SPyNet
+
+from models.FrameColor import frame_colorization_20230311_tcvc as frame_colorization
+
+from models.NonlocalNet import VGG19_pytorch, WarpNet_debug
+from utils.util import (batch_lab2rgb_transpose_mc, folder2vid, mkdir_if_not,
+                        save_frames, save_frames_wOriName, tensor_lab2rgb, uncenter_l)
+from utils.util_distortion import CenterPad, Normalize, RGB2Lab, ToTensor
+
+from collections import OrderedDict
+
+from torchvision import utils as vutils
+from utils.util import gray2rgb_batch
+import cv2
+
+# PSNR SSIM
+from skimage.metrics import mean_squared_error
+from skimage.metrics import peak_signal_noise_ratio
+import numpy as np
+
+# mmedit flow_warp
+from mmedit.models.common import (PixelShufflePack, ResidualBlockNoBN,
+                                  flow_warp, make_layer)
+
+
+# ATB block
+from models.ColorVidNet import ColorVidNet_wBasicVSR_v2 as ColorVidNet
+from models.ColorVidNet import ColorVidNet_wBasicVSR_v3
+from models.ColorVidNet import ATB_block as ATB
+
+# RAFT
+from models.raft_core.raft import RAFT 
+
+
+# SuperSloMo
+import models.superslomo_model as Superslomo
+from torchvision import transforms as superslomo_transforms
+from torch.functional import F
+from collections import OrderedDict
+
+
+# HED
+from models.hed import Network as Hed
+
+
+# Proto Seg
+import pickle
+from models.protoseg_core.segmentor.tester import Tester_inference as Tester
+from models.protoseg_core.lib.utils.tools.logger import Logger as Log
+from models.protoseg_core.lib.utils.tools.configer import Configer
+from PIL import Image
+from models.protoseg_core.lib.vis.palette import get_cityscapes_colors, get_ade_colors, get_lip_colors, get_camvid_colors
+from models.protoseg_core.lib.utils.helpers.file_helper import FileHelper
+from models.protoseg_core.lib.utils.helpers.image_helper import ImageHelper
+
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+torch.cuda.set_device(0)
+
+def tensor_gray2rgb(input):
+    gray_IA = input
+    ab_channal = torch.cat([torch.zeros_like(input), torch.zeros_like(input)], dim=1)
+    gray_IA_rgb_from_gray = batch_lab2rgb_transpose_mc(gray_IA, ab_channal)
+    return gray_IA_rgb_from_gray
+
+def exists_or_mkdir(path, verbose=False):
+    try:
+        if not os.path.exists(path):
+            if verbose:
+                print("creates %s ..."%path)  
+            os.makedirs(path)
+            return False
+        else:
+            if verbose:
+                print("%s exists ..."%path)  
+            return True     
+    except Exception as e:
+         print(e)
+
+def save_image_tensor(input_tensor: torch.Tensor, filename):
+    assert (len(input_tensor.shape) == 4 and input_tensor.shape[0] == 1)
+    input_tensor = input_tensor.clone().detach()
+    input_tensor = input_tensor.to(torch.device('cpu'))
+    vutils.save_image(input_tensor, filename)
+
+
+def save_image_tensor_lab(input_tensor: torch.Tensor, filename):
+    assert (len(input_tensor.shape) == 4 and input_tensor.shape[0] == 1)
+    input_tensor = input_tensor.clone().detach()
+    input_tensor = input_tensor.to(torch.device('cpu'))
+    vutils.save_image(input_tensor, filename)
+
+def ColorVid_inference(I_list, I1reference_video, features_B, vggnet, nonlocal_net, colornet, joint_training=False, flag_forward=True):
+    # ref1 
+    I_last_lab_predict = None
+    colorvid1 = []
+    similarity_map_list = []
+    I_reference_lab = I1reference_video
+
+    iter_item = range(len(I_list)) if flag_forward else range(len(I_list)-1, -1, -1)
+    print('ColorVid_inference1') if flag_forward else print('ColorVid_inference2') 
+    for index, i_idx in enumerate(tqdm(iter_item)):
+    # for i_idx in iter_item:
+        with torch.autograd.set_grad_enabled(joint_training):
+            I_current_lab = I_list[i_idx]
+            if I_last_lab_predict is None:
+                I_last_lab_predict = torch.zeros_like(I_current_lab).cuda()
+            
+            I_current_nonlocal_lab_predict, similarity_map = frame_colorization(
+                I_current_lab,
+                I_reference_lab,
+                I_last_lab_predict,
+                features_B,
+                vggnet,
+                nonlocal_net,
+                colornet,
+                joint_training=joint_training,
+                feature_noise=0,
+                temperature=1e-10,
+            )
+            # I_last_lab_predict = torch.cat((I_current_lab[:,:1,:,:], I_current_ab_predict), dim=1)
+            colorvid1.append(I_current_nonlocal_lab_predict)
+            similarity_map_list.append(similarity_map)
+
+    return colorvid1, similarity_map_list
+
+def compute_flow(lrs, raft, flag_save_flow_warp):
+    n, t, c, h, w = lrs.size()
+    flows_forward = []
+    flows_backward = []
+    with torch.no_grad():
+        idx = 0
+        for image1, image2 in zip(lrs[0,:-1,:,:,:], lrs[0,1:,:,:,:]):
+            image1 = image1.unsqueeze(0) * 255.
+            image2 = image2.unsqueeze(0) * 255.            
+
+            flow_low, flow_forward = raft(image2, image1, iters=20, test_mode=True)
+            flow_low, flow_backward = raft(image1, image2, iters=20, test_mode=True)
+            flows_forward.append(flow_forward)
+            flows_backward.append(flow_backward)
+    return flows_forward, flows_backward
+
+
+def bipropagation(colorvid1, colorvid2, I_list, flownet, atb, flag_save_flow_warp):
+    I_gray2rgbbatach_list = [gray2rgb_batch(I[:,:1,:,:]).unsqueeze(0) for I in I_list]
+
+    lrs = torch.cat(I_gray2rgbbatach_list, dim = 1)
+    n, t, c, h, w = lrs.size()
+    flows_forward, flows_backward = compute_flow(lrs, flownet, flag_save_flow_warp)
+
+    # return fused
+    return flows_forward, flows_backward
+
+def HED_EdgeMask(I_list):
+    joint_training = False
+    I_current_l = torch.cat(I_list, dim = 0)[:,:1,:,:]
+    I_current_lll = torch.cat([I_current_l, I_current_l, I_current_l], dim=1)
+
+    ###### HED: Edge Detection ######
+    tenInput2 = I_current_lll
+
+    with torch.autograd.set_grad_enabled(joint_training):
+        hed_edge2 = hed(tenInput2).clip(0.0, 1.0)
+
+    hed_edge_ori2 = hed_edge2
+    return hed_edge_ori2
+
+def proto_segmask(I_list, flag_save_protoseg=False):
+    # trans input resolution
+    I_current_l = torch.cat(I_list, dim = 0)[:,:1,:,:]
+    I_current_lll = torch.cat([I_current_l, I_current_l, I_current_l], dim=1)
+    input_protoseg = trans_forward_protoseg_lll(I_current_lll)
+
+    configer = Configer()
+    data_dir = configer.get('data', 'data_dir')
+    if isinstance(data_dir, str):
+        data_dir = [data_dir]
+    abs_data_dir = [os.path.expanduser(x) for x in data_dir]
+    configer.update(['data', 'data_dir'], abs_data_dir)
+    project_dir = os.path.dirname(os.path.realpath(__file__))
+    configer.add(['project_dir'], project_dir)
+    if configer.get('logging', 'log_to_file'):
+        log_file = configer.get('logging', 'log_file')
+        new_log_file = '{}_{}'.format(log_file, time.strftime("%Y-%m-%d_%X", time.localtime()))
+        configer.update(['logging', 'log_file'], new_log_file)
+    else:
+        configer.update(['logging', 'logfile_level'], None)
+    Log.init(logfile_level=configer.get('logging', 'logfile_level'),
+             stdout_level=configer.get('logging', 'stdout_level'),
+             log_file=configer.get('logging', 'log_file'),
+             log_format=configer.get('logging', 'log_format'),
+             rewrite=configer.get('logging', 'rewrite'))
+
+    model = Tester(configer)
+
+    with torch.no_grad():
+        outputs = model.test_deep_exemplar(input_protoseg)
+    return outputs
+
+
+def colorize_video(opt, input_path, reference_file, output_path, nonlocal_net, colornet, fusenet, vggnet, flownet, flag_lf_split_test_set, start_idx, end_idx):
+    # parameters for wls filter
+    wls_filter_on = True
+    lambda_value = 500
+    sigma_color = 4
+
+    # processing folders
+    mkdir_if_not(output_path)
+    files = glob.glob(output_path + "*")
+    print("processing the folder:", input_path)
+    path, dirs, filenames = os.walk(input_path).__next__()
+    file_count = len(filenames)
+    filenames.sort(key=lambda f: int("".join(filter(str.isdigit, f) or -1)))
+    
+    if flag_lf_split_test_set:
+        filenames = filenames[start_idx:end_idx]
+        print('num of testing images: %s starts from: %s ends from: %s'%(len(filenames), filenames[0], filenames[-1]))
+
+    transform = transforms.Compose(
+        # [CenterPad(opt.image_size), transform_lib.CenterCrop(opt.image_size), RGB2Lab(), ToTensor(), Normalize()]
+        [superslomo_transforms.Resize(opt_image_size), RGB2Lab(), ToTensor(), Normalize()]
+    )
+
+    transform_full_l = transforms.Compose(
+        # [CenterPad(opt.image_size), transform_lib.CenterCrop(opt.image_size), RGB2Lab(), ToTensor(), Normalize()]
+        [RGB2Lab(), ToTensor(), Normalize()]
+    )
+
+    I_list = [Image.open(os.path.join(input_path, frame_name)).convert('RGB') for frame_name in filenames]
+    I_list_large = [transform(frame1).unsqueeze(0).cuda() for frame1 in I_list]
+
+    I_list_large_full_l = [transform_full_l(frame1).unsqueeze(0).cuda() for frame1 in I_list]
+
+    I_list = [torch.nn.functional.interpolate(IA_lab_large, scale_factor=0.5, mode="bilinear") for IA_lab_large in I_list_large]
+
+    print("reference name1:", reference_file[start_idx])
+    ref_name1 = reference_file[start_idx]
+    with torch.no_grad():
+        frame_ref = Image.open(ref_name1).convert('RGB')
+        IB_lab_large = transform(frame_ref).unsqueeze(0).cuda()
+        IB_lab1 = torch.nn.functional.interpolate(IB_lab_large, scale_factor=0.5, mode="bilinear")
+        I_reference_rgb_from_gray = gray2rgb_batch(IB_lab1[:, 0:1, :, :])
+        features_B1 = vggnet(I_reference_rgb_from_gray, ["r12", "r22", "r32", "r42", "r52"], preprocess=True)
+
+    print("reference name2:", reference_file[end_idx-1])
+    ref_name2 = reference_file[end_idx-1]
+    with torch.no_grad():
+        frame_ref = Image.open(ref_name2).convert('RGB')
+        IB_lab_large = transform(frame_ref).unsqueeze(0).cuda()
+        IB_lab2 = torch.nn.functional.interpolate(IB_lab_large, scale_factor=0.5, mode="bilinear")
+        I_reference_rgb_from_gray = gray2rgb_batch(IB_lab2[:, 0:1, :, :])
+        features_B2 = vggnet(I_reference_rgb_from_gray, ["r12", "r22", "r32", "r42", "r52"], preprocess=True)
+
+
+    # ColorVid inference
+    colorvid1, similarity_map_list1 = ColorVid_inference(I_list, IB_lab1, features_B1, vggnet, nonlocal_net, colornet, joint_training=False, flag_forward=True)
+    colorvid2, similarity_map_list2 = ColorVid_inference(I_list, IB_lab2, features_B2, vggnet, nonlocal_net, colornet, joint_training=False, flag_forward=False)
+    colorvid2.reverse()
+    similarity_map_list2.reverse()
+
+    # FUSION SimilarityMap
+    similarityMap = []
+    for i in range(len(similarity_map_list1)):
+        # Fusion Mask Test
+        FusionMask = torch.gt(similarity_map_list1[i], similarity_map_list2[i])
+        FusionMask = torch.cat([FusionMask,FusionMask,FusionMask], dim = 1)
+
+        Fused_Color = colorvid2[i]
+        Fused_Color[FusionMask] = colorvid1[i][FusionMask]
+        similarityMap.append(Fused_Color)
+
+    # HED EdgeMask
+    edgemask = HED_EdgeMask(I_list)
+
+    # Proto Seg
+    segmask = proto_segmask(I_list, flag_save_protoseg=False)
+
+    flows_forward, flows_backward = bipropagation(colorvid1, colorvid2, I_list, flownet, atb, flag_save_flow_warp=False)
+
+    print('fusenet v1: concat ref1+ref2')
+    joint_training = False
+    for index, i_idx in enumerate(tqdm(range(len(I_list)))):
+        I_current_l = I_list[i_idx][:,:1,:,:]
+        I_current_ab = I_list[i_idx][:,1:,:,:]
+
+        # module: atb_test
+        feat_fused, ab_fuse_videointerp, ab_fuse_atb = atb(colorvid1, colorvid2, flows_forward, flows_backward)
+
+        fuse_input = torch.cat([I_list[i_idx][:,:1,:,:], colorvid1[i_idx][:,1:,:,:], colorvid2[i_idx][:,1:,:,:], feat_fused[i_idx], segmask[i_idx,:,:,:].unsqueeze(0), edgemask[i_idx,:,:,:].unsqueeze(0), similarityMap[i_idx][:,1:,:,:]], dim=1)
+        
+        with torch.no_grad():
+            level1_shape = [fuse_input.shape[2], fuse_input.shape[3]]
+            level2_shape = [int(fuse_input.shape[2]/2), int(fuse_input.shape[3]/2)]
+            level3_shape = [int(fuse_input.shape[2]/4), int(fuse_input.shape[3]/4)]
+
+            # v0 
+            resize_b1tob2 = transform_lib.Resize(level2_shape)
+            resize_b2tob3 = transform_lib.Resize(level3_shape)
+
+            input_pyr_b1 = fuse_input
+            input_pyr_b2 = resize_b1tob2(fuse_input)
+            input_pyr_b3 = resize_b2tob3(input_pyr_b2)
+
+
+            input_fusenet = [input_pyr_b1, input_pyr_b2, input_pyr_b3]
+            output_fusenet = fusenet(input_fusenet)
+
+            I_current_ab_predict = output_fusenet[0]
+
+
+        IA_lab_large = I_list_large_full_l[i_idx]
+        curr_bs_l = IA_lab_large[:, 0:1, :, :]
+        curr_predict = (
+            torch.nn.functional.interpolate(I_current_ab_predict.data.cpu(), scale_factor=2, mode="bilinear") * 1.25
+        )
+        curr_predict = (
+            torch.nn.functional.interpolate(curr_predict, size=opt_image_size_ori, mode="bilinear")
+        )
+
+
+        # filtering
+        if wls_filter_on:
+            guide_image = uncenter_l(curr_bs_l) * 255 / 100
+            wls_filter = cv2.ximgproc.createFastGlobalSmootherFilter(
+                guide_image[0, 0, :, :].cpu().numpy().astype(np.uint8), lambda_value, sigma_color
+            )
+            curr_predict_a = wls_filter.filter(curr_predict[0, 0, :, :].cpu().numpy())
+            curr_predict_b = wls_filter.filter(curr_predict[0, 1, :, :].cpu().numpy())
+            curr_predict_a = torch.from_numpy(curr_predict_a).unsqueeze(0).unsqueeze(0)
+            curr_predict_b = torch.from_numpy(curr_predict_b).unsqueeze(0).unsqueeze(0)
+            curr_predict_filter = torch.cat((curr_predict_a, curr_predict_b), dim=1)
+            IA_predict_rgb = batch_lab2rgb_transpose_mc(curr_bs_l[:32], curr_predict_filter[:32, ...])
+        else:
+            IA_predict_rgb = batch_lab2rgb_transpose_mc(curr_bs_l[:32], curr_predict[:32, ...])
+
+        save_frames_wOriName(IA_predict_rgb, output_path, image_name=filenames[index])
+
+def load_pth(model, pth_path):
+    nonlocal_test_path = pth_path
+    state_dict_nonlocal_net = torch.load(nonlocal_test_path)
+    new_state_dict = OrderedDict()
+    for k, v in state_dict_nonlocal_net.items():
+        param = k.split(".")
+        k = ".".join(param[1:])
+        new_state_dict[k] = v
+    model.load_state_dict(new_state_dict)
+
+if __name__ == "__main__":
+    flag_ntire23 = True # else use DAVIS raw ref dataset structure 
+    flag_ntire23_OOMSplitVideo = False # else use DAVIS raw ref dataset structure && split videos to F300 F300-600 F600
+    flag_ntire23_OOMSplitVideo_v2Automatic = True # split videos to len_interval 
+
+    epoch = 105000
+    dirName_ckp = '20230311_NTIRE2023'
+    nonlocal_test_path = os.path.join("checkpoints/", "finetune_test0610/nonlocal_net_iter_6000.pth")
+    color_test_path = os.path.join("checkpoints/", "finetune_test0610/colornet_iter_6000.pth")
+    fusenet_path = os.path.join("checkpoints/", "%s/fusenet_iter_%s.pth"%(dirName_ckp, epoch))
+    atb_path = os.path.join("checkpoints/", "%s/atb_iter_%s.pth"%(dirName_ckp, epoch))
+
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--frame_propagate", default=False, type=bool, help="propagation mode, , please check the paper"
+    )
+    parser.add_argument("--image_size", type=int, default=[448 , 896], help="the image size, eg. [216,384]")
+    parser.add_argument("--cuda", action="store_false")
+    parser.add_argument("--gpu_ids", type=str, default="0", help="separate by comma")
+   
+   
+    # 20230215 ntire test set 
+    parser.add_argument("--clip_path", type=str, default="../demo_dataset/input", help="path of input clips")
+    parser.add_argument("--ref_path", type=str, default="../demo_dataset/ref", help="path of refernce images")
+    parser.add_argument("--output_path", type=str, default="../demo_dataset/output", help="path of output clips")
+
+    start_idx = 0
+    end_idx = -1
+
+    # RAFT params
+    parser.add_argument('--model', default='data/raft-sintel.pth', type=str, help="restore checkpoint")
+    parser.add_argument('--path', help="dataset for evaluation")
+    parser.add_argument('--small', action='store_true', help='use small model')
+    parser.add_argument('--mixed_precision', action='store_true', help='use mixed precision')
+    parser.add_argument('--alternate_corr', action='store_true', help='use efficent correlation implementation')
+
+    opt = parser.parse_args()
+    opt.gpu_ids = [int(x) for x in opt.gpu_ids.split(",")]
+    cudnn.benchmark = True
+    print("running on GPU", opt.gpu_ids)
+
+    opt_clip_path = opt.clip_path
+    opt_ref_path = opt.ref_path
+    opt_output_path = opt.output_path
+
+    nonlocal_net = WarpNet_debug(1)
+    colornet = ColorVidNet(7)
+    vggnet = VGG19_pytorch()
+    fusenet = ColorVidNet_wBasicVSR_v3(33, flag_propagation = False)
+
+
+    ### Flownet: raft version  
+    flownet = RAFT(opt)
+
+    ### ATB
+    atb = ATB()
+
+    vggnet.load_state_dict(torch.load("data/vgg19_conv.pth"))
+    for param in vggnet.parameters():
+        param.requires_grad = False
+
+    load_pth(nonlocal_net, nonlocal_test_path)
+    load_pth(colornet, color_test_path)
+    load_pth(fusenet, fusenet_path)
+    load_pth(flownet, opt.model)
+    load_pth(atb, atb_path)
+    print("succesfully load nonlocal model: ", nonlocal_test_path)
+    print("succesfully load color model: ", color_test_path)
+    print("succesfully load fusenet model: ", fusenet_path)
+    print("succesfully load flownet model: ", 'raft')
+    print("succesfully load atb model: ", atb_path)
+
+    fusenet.eval()
+    fusenet.cuda()
+    flownet.eval()
+    flownet.cuda()
+    atb.eval()
+    atb.cuda()
+    nonlocal_net.eval()
+    colornet.eval()
+    vggnet.eval()
+    nonlocal_net.cuda()
+    colornet.cuda()
+    vggnet.cuda()
+
+ 
+    opt_image_size = opt.image_size
+
+    # HED
+    hed = Hed().cuda().eval()
+    w0, h0 = opt_image_size[0], opt_image_size[1]
+    w, h = (w0 // 32) * 32, (h0 // 32) * 32
+    # forward l
+    intWidth = 480
+    intHeight = 320
+    meanlab = [-50, -50, -50]   # (A - mean) / std
+    stdlab = [100, 100, 100]   # (A - mean) / std
+    trans_forward_hed_lll = superslomo_transforms.Compose([superslomo_transforms.Normalize(mean=meanlab, std=stdlab), superslomo_transforms.Resize([intHeight, intWidth])])
+    # backward
+    trans_backward = superslomo_transforms.Compose([superslomo_transforms.Resize([w0,h0])])
+
+    # proto seg
+    meanlab_protoseg = [0.485, 0.485, 0.485]   # (A - mean) / std
+    stdlab_protoseg = [0.229, 0.229, 0.229]   # (A - mean) / std
+    trans_forward_protoseg_lll = superslomo_transforms.Compose([superslomo_transforms.Normalize(mean=meanlab, std=stdlab), superslomo_transforms.Normalize(mean=meanlab_protoseg, std=stdlab_protoseg)])
+
+
+    # dataset preprocessing for batch testing
+    clips = sorted(os.listdir(opt_clip_path))
+    opt_clip_path_ori = opt_clip_path
+    opt_ref_path_ori = opt_ref_path
+    opt_output_path_ori = opt_output_path
+
+    for idx_clip, clip in enumerate(clips):
+        dirTestImageName = os.path.join(opt_clip_path_ori, sorted(os.listdir(opt_clip_path_ori))[idx_clip])
+        TestImageName = os.path.join(opt_clip_path_ori, sorted(os.listdir(opt_clip_path_ori))[idx_clip], os.listdir(dirTestImageName)[0])
+        test_img = Image.open(TestImageName).convert('RGB')
+        opt_image_size_ori = np.shape(test_img)[:2]
+
+        opt_image_size = opt.image_size
+
+        dirName_input = os.path.join(opt_clip_path_ori, clip)
+        dirName_ref = os.path.join(opt_ref_path_ori, clip)
+        dirName_output = os.path.join(opt_output_path_ori, clip)
+
+        opt_clip_path = dirName_input
+        opt_ref_path = dirName_ref
+        opt_output_path = dirName_output
+
+        print(idx_clip, clip, opt_clip_path, opt_ref_path, opt_output_path)
+
+        exists_or_mkdir(dirName_output)
+        clip_name = opt_clip_path.split("/")[-1]
+        refs = os.listdir(opt_ref_path)
+        refs.sort()
+
+        ref_name = refs[start_idx].split('.')[0] + '_' + refs[end_idx].split('.')[0]
+
+        len_interval = 50
+        flag_lf_split_test_set = True
+
+        for i in range(0, len(refs), len_interval):
+            if i != 0:
+                sub_ref = refs[i-1:i + len_interval]
+                ActStartIdx = i-1
+                ActEndIdx = i + len_interval 
+            else:
+                sub_ref = refs[i:i + len_interval]
+                ActStartIdx = i
+                ActEndIdx = i + len_interval
+            ActEndIdx = min(ActEndIdx, len(refs))
+
+            print(i, 'startImg: %s endImg: %s, ActStartIdx: %s, ActEndIdx: %s'%(sub_ref[0], sub_ref[-1], ActStartIdx, ActEndIdx))
+
+            colorize_video(
+                opt,
+                opt_clip_path,
+                [os.path.join(opt_ref_path, name) for name in refs],
+                # os.path.join(opt_output_path, clip_name + "_" + ref_name.split(".")[0]),
+                os.path.join(opt_output_path),
+                nonlocal_net,
+                colornet,
+                fusenet,
+                vggnet,
+                flownet,
+                flag_lf_split_test_set,
+                ActStartIdx,
+                ActEndIdx,
+            )
+
+
+
diff --git a/BiSTNet-NTIRE2023/utils/__init__.py b/BiSTNet-NTIRE2023/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/BiSTNet-NTIRE2023/utils/flowlib.py b/BiSTNet-NTIRE2023/utils/flowlib.py
new file mode 100644
index 0000000..5dae596
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/flowlib.py
@@ -0,0 +1,523 @@
+#!/usr/bin/python
+"""
+# ==============================
+# flowlib.py
+# library for optical flow processing
+# Author: Ruoteng Li
+# Date: 6th Aug 2016
+# ==============================
+"""
+import matplotlib.colors as cl
+import matplotlib.pyplot as plt
+import numpy as np
+import png
+from PIL import Image
+
+UNKNOWN_FLOW_THRESH = 1e7
+SMALLFLOW = 0.0
+LARGEFLOW = 1e8
+
+"""
+=============
+Flow Section
+=============
+"""
+
+
+def show_flow(filename):
+    """
+    visualize optical flow map using matplotlib
+    :param filename: optical flow file
+    :return: None
+    """
+    flow = read_flow(filename)
+    img = flow_to_image(flow)
+    plt.imshow(img)
+    plt.show()
+
+
+def visualize_flow(flow, mode="Y"):
+    """
+    this function visualize the input flow
+    :param flow: input flow in array
+    :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)
+    :return: None
+    """
+    if mode == "Y":
+        # Ccbcr color wheel
+        img = flow_to_image(flow)
+        plt.imshow(img)
+        plt.show()
+    elif mode == "RGB":
+        (h, w) = flow.shape[0:2]
+        du = flow[:, :, 0]
+        dv = flow[:, :, 1]
+        valid = flow[:, :, 2]
+        max_flow = max(np.max(du), np.max(dv))
+        img = np.zeros((h, w, 3), dtype=np.float64)
+        # angle layer
+        img[:, :, 0] = np.arctan2(dv, du) / (2 * np.pi)
+        # magnitude layer, normalized to 1
+        img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow
+        # phase layer
+        img[:, :, 2] = 8 - img[:, :, 1]
+        # clip to [0,1]
+        small_idx = img[:, :, 0:3] < 0
+        large_idx = img[:, :, 0:3] > 1
+        img[small_idx] = 0
+        img[large_idx] = 1
+        # convert to rgb
+        img = cl.hsv_to_rgb(img)
+        # remove invalid point
+        img[:, :, 0] = img[:, :, 0] * valid
+        img[:, :, 1] = img[:, :, 1] * valid
+        img[:, :, 2] = img[:, :, 2] * valid
+        # show
+        plt.imshow(img)
+        plt.show()
+
+    return None
+
+
+def read_flow(filename):
+    """
+    read optical flow from Middlebury .flo file
+    :param filename: name of the flow file
+    :return: optical flow data in matrix
+    """
+    f = open(filename, "rb")
+    try:
+        magic = np.fromfile(f, np.float32, count=1)[0]  # For Python3.x
+    except:
+        magic = np.fromfile(f, np.float32, count=1)  # For Python2.x
+    data2d = None
+
+    if 202021.25 != magic:
+        print("Magic number incorrect. Invalid .flo file")
+    else:
+        w = np.fromfile(f, np.int32, count=1)[0]
+        h = np.fromfile(f, np.int32, count=1)[0]
+        # print("Reading %d x %d flo file" % (h, w))
+        data2d = np.fromfile(f, np.float32, count=2 * w * h)
+        # reshape data into 3D array (columns, rows, channels)
+        data2d = np.resize(data2d, (h, w, 2))
+    f.close()
+    return data2d
+
+
+def read_flow_png(flow_file):
+    """
+    Read optical flow from KITTI .png file
+    :param flow_file: name of the flow file
+    :return: optical flow data in matrix
+    """
+    flow_object = png.Reader(filename=flow_file)
+    flow_direct = flow_object.asDirect()
+    flow_data = list(flow_direct[2])
+    (w, h) = flow_direct[3]["size"]
+    flow = np.zeros((h, w, 3), dtype=np.float64)
+    for i in range(len(flow_data)):
+        flow[i, :, 0] = flow_data[i][0::3]
+        flow[i, :, 1] = flow_data[i][1::3]
+        flow[i, :, 2] = flow_data[i][2::3]
+
+    invalid_idx = flow[:, :, 2] == 0
+    flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
+    flow[invalid_idx, 0] = 0
+    flow[invalid_idx, 1] = 0
+    return flow
+
+
+def write_flow(flow, filename):
+    """
+    write optical flow in Middlebury .flo format
+    :param flow: optical flow map
+    :param filename: optical flow file path to be saved
+    :return: None
+    """
+    with open(filename, "wb") as f:
+        magic = np.array([202021.25], dtype=np.float32)
+        (height, width) = flow.shape[0:2]
+        w = np.array([width], dtype=np.int32)
+        h = np.array([height], dtype=np.int32)
+        magic.tofile(f)
+        w.tofile(f)
+        h.tofile(f)
+        flow.tofile(f)
+
+
+def segment_flow(flow):
+    h = flow.shape[0]
+    w = flow.shape[1]
+    u = flow[:, :, 0]
+    v = flow[:, :, 1]
+
+    idx = (abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW)
+    idx2 = abs(u) == SMALLFLOW
+    class0 = (v == 0) & (u == 0)
+    u[idx2] = 0.00001
+    tan_value = v / u
+
+    class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)
+    class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)
+    class3 = (tan_value < -1) & (u <= 0) & (v >= 0)
+    class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)
+    class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)
+    class7 = (tan_value < -1) & (u >= 0) & (v <= 0)
+    class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)
+    class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)
+
+    seg = np.zeros((h, w))
+
+    seg[class1] = 1
+    seg[class2] = 2
+    seg[class3] = 3
+    seg[class4] = 4
+    seg[class5] = 5
+    seg[class6] = 6
+    seg[class7] = 7
+    seg[class8] = 8
+    seg[class0] = 0
+    seg[idx] = 0
+
+    return seg
+
+
+def flow_error(tu, tv, u, v):
+    """
+    Calculate average end point error
+    :param tu: ground-truth horizontal flow map
+    :param tv: ground-truth vertical flow map
+    :param u:  estimated horizontal flow map
+    :param v:  estimated vertical flow map
+    :return: End point error of the estimated flow
+    """
+    smallflow = 0.0
+    """
+    stu = tu[bord+1:end-bord,bord+1:end-bord]
+    stv = tv[bord+1:end-bord,bord+1:end-bord]
+    su = u[bord+1:end-bord,bord+1:end-bord]
+    sv = v[bord+1:end-bord,bord+1:end-bord]
+    """
+    stu = tu[:]
+    stv = tv[:]
+    su = u[:]
+    sv = v[:]
+
+    idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
+    stu[idxUnknow] = 0
+    stv[idxUnknow] = 0
+    su[idxUnknow] = 0
+    sv[idxUnknow] = 0
+
+    ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
+    index_su = su[ind2]
+    index_sv = sv[ind2]
+    an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
+    un = index_su * an
+    vn = index_sv * an
+
+    index_stu = stu[ind2]
+    index_stv = stv[ind2]
+    tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
+    tun = index_stu * tn
+    tvn = index_stv * tn
+
+    """
+    angle = un * tun + vn * tvn + (an * tn)
+    index = [angle == 1.0]
+    angle[index] = 0.999
+    ang = np.arccos(angle)
+    mang = np.mean(ang)
+    mang = mang * 180 / np.pi
+    """
+
+    epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
+    epe = epe[ind2]
+    return np.mean(epe)
+
+
+def flow_to_image(flow, display=False):
+    """
+    Convert flow into middlebury color code image
+    :param flow: optical flow map
+    :return: optical flow image in middlebury color
+    """
+    u = flow[:, :, 0]
+    v = flow[:, :, 1]
+
+    maxu = -999.0
+    maxv = -999.0
+    minu = 999.0
+    minv = 999.0
+
+    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
+    u[idxUnknow] = 0
+    v[idxUnknow] = 0
+
+    maxu = max(maxu, np.max(u))
+    minu = min(minu, np.min(u))
+
+    maxv = max(maxv, np.max(v))
+    minv = min(minv, np.min(v))
+
+    rad = np.sqrt(u ** 2 + v ** 2)
+    maxrad = max(-1, np.max(rad))
+
+    if display:
+        print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu, maxu, minv, maxv))
+
+    u = u / (maxrad + np.finfo(float).eps)
+    v = v / (maxrad + np.finfo(float).eps)
+
+    img = compute_color(u, v)
+
+    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+    img[idx] = 0
+
+    return np.uint8(img)
+
+
+def evaluate_flow_file(gt, pred):
+    """
+    evaluate the estimated optical flow end point error according to ground truth provided
+    :param gt: ground truth file path
+    :param pred: estimated optical flow file path
+    :return: end point error, float32
+    """
+    # Read flow files and calculate the errors
+    gt_flow = read_flow(gt)  # ground truth flow
+    eva_flow = read_flow(pred)  # predicted flow
+    # Calculate errors
+    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])
+    return average_pe
+
+
+def evaluate_flow(gt_flow, pred_flow):
+    """
+    gt: ground-truth flow
+    pred: estimated flow
+    """
+    return flow_error(
+        gt_flow[:, :, 0],
+        gt_flow[:, :, 1],
+        pred_flow[:, :, 0],
+        pred_flow[:, :, 1],
+    )
+
+
+"""
+==============
+Disparity Section
+==============
+"""
+
+
+def read_disp_png(file_name):
+    """
+    Read optical flow from KITTI .png file
+    :param file_name: name of the flow file
+    :return: optical flow data in matrix
+    """
+    image_object = png.Reader(filename=file_name)
+    image_direct = image_object.asDirect()
+    image_data = list(image_direct[2])
+    (w, h) = image_direct[3]["size"]
+    channel = len(image_data[0]) / w
+    flow = np.zeros((h, w, channel), dtype=np.uint16)
+    for i in range(len(image_data)):
+        for j in range(channel):
+            flow[i, :, j] = image_data[i][j::channel]
+    return flow[:, :, 0] / 256
+
+
+def disp_to_flowfile(disp, filename):
+    """
+    Read KITTI disparity file in png format
+    :param disp: disparity matrix
+    :param filename: the flow file name to save
+    :return: None
+    """
+    f = open(filename, "wb")
+    magic = np.array([202021.25], dtype=np.float32)
+    (height, width) = disp.shape[0:2]
+    w = np.array([width], dtype=np.int32)
+    h = np.array([height], dtype=np.int32)
+    empty_map = np.zeros((height, width), dtype=np.float32)
+    data = np.dstack((disp, empty_map))
+    magic.tofile(f)
+    w.tofile(f)
+    h.tofile(f)
+    data.tofile(f)
+    f.close()
+
+
+"""
+==============
+Image Section
+==============
+"""
+
+
+def read_image(filename):
+    """
+    Read normal image of any format
+    :param filename: name of the image file
+    :return: image data in matrix uint8 type
+    """
+    img = Image.open(filename)
+    return np.array(img)
+
+
+def warp_image(im, flow):
+    """
+    Use optical flow to warp image to the next
+    :param im: image to warp
+    :param flow: optical flow
+    :return: warped image
+    """
+    from scipy import interpolate
+
+    image_height = im.shape[0]
+    image_width = im.shape[1]
+    flow_height = flow.shape[0]
+    flow_width = flow.shape[1]
+    n = image_height * image_width
+    (iy, ix) = np.float32(np.mgrid[0:image_height, 0:image_width])
+    (fy, fx) = np.float32(np.mgrid[0:flow_height, 0:flow_width])
+    fx += flow[:, :, 0].astype(np.float)
+    fy += flow[:, :, 1]
+    mask = np.logical_or(fx < 0, fx > flow_width)
+    mask = np.logical_or(mask, fy < 0)
+    mask = np.logical_or(mask, fy > flow_height)
+    fx = np.minimum(np.maximum(fx, 0), flow_width)
+    fy = np.minimum(np.maximum(fy, 0), flow_height)
+    points = np.concatenate((ix.reshape(n, 1), iy.reshape(n, 1)), axis=1)
+    xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n, 1)), axis=1)
+    warp = np.zeros((image_height, image_width, im.shape[2]))
+    for i in range(im.shape[2]):
+        channel = im[:, :, i]
+        # plt.imshow(channel, cmap='gray')
+        values = channel.reshape(n, 1)
+        new_channel = interpolate.griddata(points, values, xi, method="cubic")
+        new_channel = np.reshape(new_channel, [flow_height, flow_width])
+        new_channel[mask] = 1
+        warp[:, :, i] = new_channel.astype(np.uint8)
+
+    return warp.astype(np.uint8)
+
+
+"""
+==============
+Others
+==============
+"""
+
+
+def scale_image(image, new_range):
+    """
+    Linearly scale the image into desired range
+    :param image: input image
+    :param new_range: the new range to be aligned
+    :return: image normalized in new range
+    """
+    min_val = np.min(image).astype(np.float32)
+    max_val = np.max(image).astype(np.float32)
+    min_val_new = np.array(min(new_range), dtype=np.float32)
+    max_val_new = np.array(max(new_range), dtype=np.float32)
+    scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new
+    return scaled_image.astype(np.uint8)
+
+
+def compute_color(u, v):
+    """
+    compute optical flow color map
+    :param u: optical flow horizontal map
+    :param v: optical flow vertical map
+    :return: optical flow in color code
+    """
+    [h, w] = u.shape
+    img = np.zeros([h, w, 3])
+    nanIdx = np.isnan(u) | np.isnan(v)
+    u[nanIdx] = 0
+    v[nanIdx] = 0
+
+    colorwheel = make_color_wheel()
+    ncols = np.size(colorwheel, 0)
+
+    rad = np.sqrt(u ** 2 + v ** 2)
+
+    a = np.arctan2(-v, -u) / np.pi
+
+    fk = (a + 1) / 2 * (ncols - 1) + 1
+
+    k0 = np.floor(fk).astype(int)
+
+    k1 = k0 + 1
+    k1[k1 == ncols + 1] = 1
+    f = fk - k0
+
+    for i in range(np.size(colorwheel, 1)):
+        tmp = colorwheel[:, i]
+        col0 = tmp[k0 - 1] / 255
+        col1 = tmp[k1 - 1] / 255
+        col = (1 - f) * col0 + f * col1
+
+        idx = rad <= 1
+        col[idx] = 1 - rad[idx] * (1 - col[idx])
+        notidx = np.logical_not(idx)
+
+        col[notidx] *= 0.75
+        img[:, :, i] = np.uint8(np.floor(255 * col * (1 - nanIdx)))
+
+    return img
+
+
+def make_color_wheel():
+    """
+    Generate color wheel according Middlebury color code
+    :return: Color wheel
+    """
+    RY = 15
+    YG = 6
+    GC = 4
+    CB = 11
+    BM = 13
+    MR = 6
+
+    ncols = RY + YG + GC + CB + BM + MR
+
+    colorwheel = np.zeros([ncols, 3])
+
+    col = 0
+
+    # RY
+    colorwheel[0:RY, 0] = 255
+    colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
+    col += RY
+
+    # YG
+    colorwheel[col : col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
+    colorwheel[col : col + YG, 1] = 255
+    col += YG
+
+    # GC
+    colorwheel[col : col + GC, 1] = 255
+    colorwheel[col : col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
+    col += GC
+
+    # CB
+    colorwheel[col : col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
+    colorwheel[col : col + CB, 2] = 255
+    col += CB
+
+    # BM
+    colorwheel[col : col + BM, 2] = 255
+    colorwheel[col : col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
+    col += +BM
+
+    # MR
+    colorwheel[col : col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
+    colorwheel[col : col + MR, 0] = 255
+
+    return colorwheel
diff --git a/BiSTNet-NTIRE2023/utils/tb_image_recorder.py b/BiSTNet-NTIRE2023/utils/tb_image_recorder.py
new file mode 100644
index 0000000..39ee0e4
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/tb_image_recorder.py
@@ -0,0 +1,28 @@
+import threading
+
+import numpy as np
+
+
+class TBImageRecorder(threading.Thread):
+    """
+    TBImageRecorder
+    """
+
+    def __init__(self, tb_writer, func, queue):
+        super(TBImageRecorder, self).__init__()
+        self._tb_writer = tb_writer
+        self._func = func
+        self._queue = queue
+
+    def run(self):
+        while True:
+            msgs, iter_index = self._queue.get()
+            if msgs:
+                img_info = self._func(*msgs)
+
+                print("logging the images")
+                for tag, images in img_info.items():
+                    if images is not None:
+                        self._tb_writer.add_image(tag, np.clip(images, 0, 255).astype(np.uint8), iter_index)
+            else:
+                break
diff --git a/BiSTNet-NTIRE2023/utils/util.py b/BiSTNet-NTIRE2023/utils/util.py
new file mode 100644
index 0000000..e354fab
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/util.py
@@ -0,0 +1,402 @@
+import os
+import shutil
+import sys
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torchvision.utils as vutils
+from skimage import color, io
+from torch.autograd import Variable
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+cv2.setNumThreads(0)
+
+# l: [-50,50]
+# ab: [-128, 128]
+l_norm, ab_norm = 1.0, 1.0
+l_mean, ab_mean = 50.0, 0
+
+
+###### utility ######
+def to_np(x):
+    return x.data.cpu().numpy()
+
+
+def utf8_str(in_str):
+    try:
+        in_str = in_str.decode("UTF-8")
+    except Exception:
+        in_str = in_str.encode("UTF-8").decode("UTF-8")
+    return in_str
+
+
+class MovingAvg(object):
+    def __init__(self, pool_size=100):
+        from queue import Queue
+
+        self.pool = Queue(maxsize=pool_size)
+        self.sum = 0
+        self.curr_pool_size = 0
+
+    def set_curr_val(self, val):
+        if not self.pool.full():
+            self.curr_pool_size += 1
+            self.pool.put_nowait(val)
+        else:
+            last_first_val = self.pool.get_nowait()
+            self.pool.put_nowait(val)
+            self.sum -= last_first_val
+
+        self.sum += val
+        return self.sum / self.curr_pool_size
+
+
+###### image normalization ######
+def center_l(l):
+    # normalization for l
+    l_mc = (l - l_mean) / l_norm
+    return l_mc
+
+
+# denormalization for l
+def uncenter_l(l):
+    return l * l_norm + l_mean
+
+
+# normalization for ab
+def center_ab(ab):
+    return (ab - ab_mean) / ab_norm
+
+# normalization for ab
+def uncenter_ab(ab):
+    return ab * ab_norm + ab_mean
+
+
+# normalization for lab image
+def center_lab_img(img_lab):
+    return (
+        img_lab / np.array((l_norm, ab_norm, ab_norm))[:, np.newaxis, np.newaxis]
+        - np.array((l_mean / l_norm, ab_mean / ab_norm, ab_mean / ab_norm))[:, np.newaxis, np.newaxis]
+    )
+
+
+###### color space transformation ######
+def rgb2lab_transpose(img_rgb):
+    return color.rgb2lab(img_rgb).transpose((2, 0, 1))
+
+
+def lab2rgb(img_l, img_ab):
+    """INPUTS
+        img_l      XxXx1     [0,100]
+        img_ab     XxXx2     [-100,100]
+    OUTPUTS
+        returned value is XxXx3"""
+    pred_lab = np.concatenate((img_l, img_ab), axis=2).astype("float64")
+    pred_rgb = color.lab2rgb(pred_lab)
+    pred_rgb = (np.clip(pred_rgb, 0, 1) * 255).astype("uint8")
+    return pred_rgb
+
+
+def gray2rgb_batch(l):
+    # gray image tensor to rgb image tensor
+    l_uncenter = uncenter_l(l)
+    l_uncenter = l_uncenter / (2 * l_mean)
+    return torch.cat((l_uncenter, l_uncenter, l_uncenter), dim=1)
+
+
+def lab2rgb_transpose(img_l, img_ab):
+    """INPUTS
+        img_l      1xXxX     [0,100]
+        img_ab     2xXxX     [-100,100]
+    OUTPUTS
+        returned value is XxXx3"""
+    pred_lab = np.concatenate((img_l, img_ab), axis=0).transpose((1, 2, 0))
+    return (np.clip(color.lab2rgb(pred_lab), 0, 1) * 255).astype("uint8")
+
+
+def lab2rgb_transpose_mc(img_l_mc, img_ab_mc):
+    if isinstance(img_l_mc, Variable):
+        img_l_mc = img_l_mc.data.cpu()
+    if isinstance(img_ab_mc, Variable):
+        img_ab_mc = img_ab_mc.data.cpu()
+
+    if img_l_mc.is_cuda:
+        img_l_mc = img_l_mc.cpu()
+    if img_ab_mc.is_cuda:
+        img_ab_mc = img_ab_mc.cpu()
+
+    assert img_l_mc.dim() == 3 and img_ab_mc.dim() == 3, "only for batch input"
+
+    img_l = img_l_mc * l_norm + l_mean
+    img_ab = img_ab_mc * ab_norm + ab_mean
+    pred_lab = torch.cat((img_l, img_ab), dim=0)
+    grid_lab = pred_lab.numpy().astype("float64")
+    return (np.clip(color.lab2rgb(grid_lab.transpose((1, 2, 0))), 0, 1) * 255).astype("uint8")
+
+
+def batch_lab2rgb_transpose_mc(img_l_mc, img_ab_mc, nrow=8):
+    if isinstance(img_l_mc, Variable):
+        img_l_mc = img_l_mc.data.cpu()
+    if isinstance(img_ab_mc, Variable):
+        img_ab_mc = img_ab_mc.data.cpu()
+
+    if img_l_mc.is_cuda:
+        img_l_mc = img_l_mc.cpu()
+    if img_ab_mc.is_cuda:
+        img_ab_mc = img_ab_mc.cpu()
+
+    assert img_l_mc.dim() == 4 and img_ab_mc.dim() == 4, "only for batch input"
+
+    img_l = img_l_mc * l_norm + l_mean
+    img_ab = img_ab_mc * ab_norm + ab_mean
+    pred_lab = torch.cat((img_l, img_ab), dim=1)
+    grid_lab = vutils.make_grid(pred_lab, nrow=nrow).numpy().astype("float64")
+    return (np.clip(color.lab2rgb(grid_lab.transpose((1, 2, 0))), 0, 1) * 255).astype("uint8")
+
+
+###### loss functions ######
+def feature_normalize(feature_in):
+    feature_in_norm = torch.norm(feature_in, 2, 1, keepdim=True) + sys.float_info.epsilon
+    feature_in_norm = torch.div(feature_in, feature_in_norm)
+    return feature_in_norm
+
+
+def statistics_matching(feature1, feature2):
+    N, C, H, W = feature1.shape
+    feature1 = feature1.view(N, C, -1)
+    feature2 = feature2.view(N, C, -1)
+
+    mean1 = feature1.mean(dim=-1)
+    mean2 = feature2.mean(dim=-1)
+    std1 = feature1.var(dim=-1).sqrt()
+    std2 = feature2.var(dim=-1).sqrt()
+
+    return mse_loss(mean1, mean2) + mse_loss(std1, std2)
+
+
+def cosine_similarity(input, target):
+    input_norm = torch.norm(input, 2, 1, keepdim=True) + sys.float_info.epsilon
+    target_norm = torch.norm(target, 2, 1, keepdim=True) + sys.float_info.epsilon
+    normalized_input = torch.div(input, input_norm)
+    normalized_target = torch.div(target, target_norm)
+    cos_similarity = torch.mul(normalized_input, normalized_target)
+    return torch.sum(cos_similarity, dim=1, keepdim=True)
+
+
+def mse_loss(input, target=0):
+    return torch.mean((input - target) ** 2)
+
+
+def l1_loss(input, target=0):
+    return torch.mean(torch.abs(input - target))
+
+def edge_loss(input, target=0):
+    return torch.mean(torch.abs(input - target))
+
+def calc_ab_gradient(input_ab):
+    x_grad = input_ab[:, :, :, 1:] - input_ab[:, :, :, :-1]
+    y_grad = input_ab[:, :, 1:, :] - input_ab[:, :, :-1, :]
+    return x_grad, y_grad
+
+
+def calc_tv_loss(input):
+    x_grad = input[:, :, :, 1:] - input[:, :, :, :-1]
+    y_grad = input[:, :, 1:, :] - input[:, :, :-1, :]
+    return torch.sum(x_grad ** 2) / x_grad.nelement() + torch.sum(y_grad ** 2) / y_grad.nelement()
+
+
+def calc_cosine_dist_loss(input, target):
+    input_norm = torch.norm(input, 2, 1, keepdim=True) + sys.float_info.epsilon
+    target_norm = torch.norm(target, 2, 1, keepdim=True) + sys.float_info.epsilon
+    normalized_input = torch.div(input, input_norm)
+    normalized_target = torch.div(target, target_norm)
+    cos_dist = torch.mul(normalized_input, normalized_target)
+    return torch.mean(1 - torch.sum(cos_dist, dim=1))
+
+
+###### video related #######
+def save_frames(image, image_folder, index=None, image_name=None):
+    if image is not None:
+        image = np.clip(image, 0, 255).astype(np.uint8)
+        if image_name:
+            io.imsave(os.path.join(image_folder, image_name), image)
+        else:
+            # io.imsave(os.path.join(image_folder, str(index).zfill(5) + ".jpg"), image)
+            io.imsave(os.path.join(image_folder, str(index).zfill(5) + ".png"), image)
+
+def save_frames_wOriName(image, image_folder, index=None, image_name=None):
+    if image is not None:
+        image = np.clip(image, 0, 255).astype(np.uint8)
+        if image_name:
+            io.imsave(os.path.join(image_folder, image_name), image)
+        else:
+            # io.imsave(os.path.join(image_folder, str(index).zfill(5) + ".jpg"), image)
+            io.imsave(os.path.join(image_folder, str(index).zfill(5) + ".png"), image)
+
+def folder2vid(image_folder, output_dir, filename):
+    images = [img for img in os.listdir(image_folder) if img.endswith(".jpg") or img.endswith(".png")]
+    images.sort()
+    frame = cv2.imread(os.path.join(image_folder, images[0]))
+    height, width, layers = frame.shape
+    print("writing to video " + os.path.join(output_dir, filename))
+    video = cv2.VideoWriter(
+        os.path.join(output_dir, filename), cv2.VideoWriter_fourcc("D", "I", "V", "X"), 24, (width, height)
+    )
+
+    for image in images:
+        video.write(cv2.imread(os.path.join(image_folder, image)))
+
+    video.release()
+
+    # import imageio
+    # frames = []
+    # for image in images:
+    #     frames.append(imageio.imread(os.path.join(image_folder, image)))
+    # imageio.mimsave('movie.gif', frames)
+
+
+###### file system ######
+def get_size(start_path="."):
+    total_size = 0
+    for dirpath, dirnames, filenames in os.walk(start_path):
+        for f in filenames:
+            fp = os.path.join(dirpath, f)
+            total_size += os.path.getsize(fp)
+    return total_size
+
+
+def mkdir_if_not(dir_path):
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+
+
+def parse(parser, save=True):
+    opt = parser.parse_args(args=[])
+    args = vars(opt)
+
+    from time import gmtime, strftime
+
+    print("------------ Options -------------")
+    for k, v in sorted(args.items()):
+        print("%s: %s" % (str(k), str(v)))
+    print("-------------- End ----------------")
+
+    # save to the disk
+    if save:
+        file_name = os.path.join("opt.txt")
+        with open(file_name, "wt") as opt_file:
+            opt_file.write(os.path.basename(sys.argv[0]) + " " + strftime("%Y-%m-%d %H:%M:%S", gmtime()) + "\n")
+            opt_file.write("------------ Options -------------\n")
+            for k, v in sorted(args.items()):
+                opt_file.write("%s: %s\n" % (str(k), str(v)))
+            opt_file.write("-------------- End ----------------\n")
+    return opt
+
+
+###### interactive ######
+def clean_tensorboard(directory):
+    folder_list = os.walk(directory).__next__()[1]
+    for folder in folder_list:
+        folder = directory + folder
+        if get_size(folder) < 10000000:
+            print("delete the folder of " + folder)
+            shutil.rmtree(folder)
+
+
+def imshow(input_image, title=None, type_conversion=False):
+    inp = input_image
+    if type_conversion or type(input_image) is torch.Tensor:
+        inp = input_image.numpy()
+    else:
+        inp = input_image
+    fig = plt.figure()
+    if inp.ndim == 2:
+        fig = plt.imshow(inp, cmap="gray", clim=[0, 255])
+    else:
+        fig = plt.imshow(np.transpose(inp, [1, 2, 0]).astype(np.uint8))
+    plt.axis("off")
+    fig.axes.get_xaxis().set_visible(False)
+    fig.axes.get_yaxis().set_visible(False)
+    plt.title(title)
+
+
+def imshow_lab(input_lab):
+    plt.imshow((batch_lab2rgb_transpose_mc(input_lab[:32, 0:1, :, :], input_lab[:32, 1:3, :, :])).astype(np.uint8))
+
+
+###### vgg preprocessing ######
+def vgg_preprocess(tensor):
+    # input is RGB tensor which ranges in [0,1]
+    # output is BGR tensor which ranges in [0,255]
+    tensor_bgr = torch.cat((tensor[:, 2:3, :, :], tensor[:, 1:2, :, :], tensor[:, 0:1, :, :]), dim=1)
+    tensor_bgr_ml = tensor_bgr - torch.Tensor([0.40760392, 0.45795686, 0.48501961]).type_as(tensor_bgr).view(1, 3, 1, 1)
+    return tensor_bgr_ml * 255
+
+
+def torch_vgg_preprocess(tensor):
+    # pytorch version normalization
+    # note that both input and output are RGB tensors;
+    # input and output ranges in [0,1]
+    # normalize the tensor with mean and variance
+    tensor_mc = tensor - torch.Tensor([0.485, 0.456, 0.406]).type_as(tensor).view(1, 3, 1, 1)
+    return tensor_mc / torch.Tensor([0.229, 0.224, 0.225]).type_as(tensor_mc).view(1, 3, 1, 1)
+
+
+def network_gradient(net, gradient_on=True):
+    for param in net.parameters():
+        param.requires_grad = bool(gradient_on)
+    return net
+
+
+##### color space
+xyz_from_rgb = np.array(
+    [[0.412453, 0.357580, 0.180423], [0.212671, 0.715160, 0.072169], [0.019334, 0.119193, 0.950227]]
+)
+rgb_from_xyz = np.array(
+    [[3.24048134, -0.96925495, 0.05564664], [-1.53715152, 1.87599, -0.20404134], [-0.49853633, 0.04155593, 1.05731107]]
+)
+
+
+def tensor_lab2rgb(input):
+    """
+    n * 3* h *w
+    """
+    input_trans = input.transpose(1, 2).transpose(2, 3)  # n * h * w * 3
+    L, a, b = input_trans[:, :, :, 0:1], input_trans[:, :, :, 1:2], input_trans[:, :, :, 2:]
+    y = (L + 16.0) / 116.0
+    x = (a / 500.0) + y
+    z = y - (b / 200.0)
+
+    neg_mask = z.data < 0
+    z[neg_mask] = 0
+    xyz = torch.cat((x, y, z), dim=3)
+
+    mask = xyz.data > 0.2068966
+    mask_xyz = xyz.clone()
+    mask_xyz[mask] = torch.pow(xyz[mask], 3.0)
+    mask_xyz[~mask] = (xyz[~mask] - 16.0 / 116.0) / 7.787
+    mask_xyz[:, :, :, 0] = mask_xyz[:, :, :, 0] * 0.95047
+    mask_xyz[:, :, :, 2] = mask_xyz[:, :, :, 2] * 1.08883
+
+    rgb_trans = torch.mm(mask_xyz.view(-1, 3), torch.from_numpy(rgb_from_xyz).type_as(xyz)).view(
+        input.size(0), input.size(2), input.size(3), 3
+    )
+    rgb = rgb_trans.transpose(2, 3).transpose(1, 2)
+
+    mask = rgb > 0.0031308
+    mask_rgb = rgb.clone()
+    mask_rgb[mask] = 1.055 * torch.pow((rgb[mask].abs() + 1e-8), 1 / 2.4) - 0.055
+    mask_rgb[~mask] = rgb[~mask] * 12.92
+
+    neg_mask = mask_rgb.data < 0
+    large_mask = mask_rgb.data > 1
+    mask_rgb[neg_mask] = 0
+    mask_rgb[large_mask] = 1
+    return mask_rgb
+
+
+
diff --git a/BiSTNet-NTIRE2023/utils/util_distortion.py b/BiSTNet-NTIRE2023/utils/util_distortion.py
new file mode 100644
index 0000000..73dc45d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/util_distortion.py
@@ -0,0 +1,583 @@
+import math
+import random
+
+import cv2
+import lib.functional as F
+import numpy as np
+from PIL import Image
+from scipy.ndimage.filters import gaussian_filter
+from skimage import color
+from skimage.draw import random_shapes
+from skimage.filters import gaussian
+from skimage.transform import resize
+
+cv2.setNumThreads(0)
+from numba import jit, u1, u2
+
+
+class RGB2Lab(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        return color.rgb2lab(inputs)
+
+
+class Guassian_noise(object):
+    """Elastic distortion"""
+
+    def __init__(self, noise_sigma=0.1):
+        self.noise_sigma = noise_sigma
+
+    def __call__(self, inputs):
+        h = inputs.shape[0]
+        w = inputs.shape[1]
+        noisy_image = inputs
+        noise = np.random.randn(h, w) * self.noise_sigma
+        noisy_image[:, :, 0] = inputs[:, :, 0] + noise
+
+        return noisy_image
+
+
+class Distortion(object):
+    """Elastic distortion"""
+
+    def __init__(self, distortion_level=3, flip_probability=0):
+        self.alpha_max = distortion_level
+        self.flip_probability = flip_probability
+
+    def __call__(self, inputs):
+        if np.random.rand() < self.flip_probability:
+            inputs = inputs.transpose(Image.FLIP_LEFT_RIGHT)
+
+        inputs = np.array(inputs)
+        alpha = np.random.rand() * self.alpha_max
+        sigma = 50
+        random_state = np.random.RandomState(None)
+        shape = inputs.shape[0], inputs.shape[1]
+
+        dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha * 1000
+        dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha * 1000
+
+        x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing="ij")
+        remap_image = cv2.remap(
+            inputs, (dy + y).astype(np.float32), (dx + x).astype(np.float32), interpolation=cv2.INTER_LINEAR
+        )
+
+        return Image.fromarray(remap_image)
+
+
+class Distortion_with_flow(object):
+    """Elastic distortion"""
+
+    def __init__(self):
+        return
+
+    def __call__(self, inputs, dx, dy):
+        inputs = np.array(inputs)
+        shape = inputs.shape[0], inputs.shape[1]
+        inputs = np.array(inputs)
+        remap_image = forward_mapping(inputs, dy, dx, maxIter=3, precision=1e-3)
+
+        return Image.fromarray(remap_image)
+
+
+class Normalize(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        inputs[0:1, :, :] = F.normalize(inputs[0:1, :, :], 50, 1)
+        inputs[1:3, :, :] = F.normalize(inputs[1:3, :, :], (0, 0), (1, 1))
+        return inputs
+
+
+class ToTensor(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, inputs):
+        return F.to_mytensor(inputs)
+
+
+class RandomErasing(object):
+    """
+    Class that performs Random Erasing in Random Erasing Data Augmentation by Zhong et al.
+    -------------------------------------------------------------------------------------
+    probability: The probability that the operation will be performed.
+    sl: min erasing area
+    sh: max erasing area
+    r1: min aspect ratio
+    mean: erasing value
+    -------------------------------------------------------------------------------------
+    """
+
+    def __init__(self, probability=0.6, sl=0.05, sh=0.6):
+        self.probability = probability
+        self.sl = sl
+        self.sh = sh
+
+    def __call__(self, img):
+        img = np.array(img)
+        if random.uniform(0, 1) > self.probability:
+            return Image.fromarray(img)
+
+        area = img.shape[0] * img.shape[1]
+        h0 = img.shape[0]
+        w0 = img.shape[1]
+        channel = img.shape[2]
+
+        h = int(round(random.uniform(self.sl, self.sh) * h0))
+        w = int(round(random.uniform(self.sl, self.sh) * w0))
+
+        if w < img.shape[1] and h < img.shape[0]:
+            x1 = random.randint(0, img.shape[0] - h)
+            y1 = random.randint(0, img.shape[1] - w)
+            img[x1 : x1 + h, y1 : y1 + w, :] = np.random.rand(h, w, channel) * 255
+
+            return Image.fromarray(img)
+
+        return Image.fromarray(img)
+
+
+class CenteredPad(object):
+    """
+    pad the frame with black border,
+    make square image for processing
+    """
+
+    def __init__(self, image_size):
+        self.image_size = image_size
+
+    def __call__(self, img):
+        img = np.array(img)
+        width = np.size(img, 1)
+        height = np.size(img, 0)
+        old_size = [height, width]
+
+        ratio = float(self.image_size) / max(height, width)
+        new_size = [int(x * ratio) for x in old_size]
+        I_resize = resize(img, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+        width_new = np.size(I_resize, 1)
+        height_new = np.size(I_resize, 0)
+
+        I_pad = np.zeros((self.image_size, self.image_size, 3))
+        start_height = (self.image_size - new_size[0]) // 2
+        start_width = (self.image_size - new_size[1]) // 2
+        I_pad[start_height : (start_height + height_new), start_width : (start_width + width_new), :] = I_resize
+
+        return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class centeredPad_with_height(object):
+    """
+    pad the image according to the height
+    """
+
+    def __init__(self, image_size):
+        self.height = image_size[0]
+        self.width = image_size[1]
+
+    def __call__(self, image):
+        # pad the image to 16:9
+        # pad height
+        I = np.array(image)
+
+        # for padded input
+        if I.shape[0] == I.shape[1] and I.shape[1] == self.width:
+            return Image.fromarray(I.astype(np.uint8))
+
+        width0 = np.size(I, 1)
+        height0 = np.size(I, 0)
+        old_size = [height0, width0]
+        height = self.height
+        width = self.width
+
+        ratio = height / height0
+        new_size = [int(x * ratio) for x in old_size]
+        I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+        width_new = np.size(I_resize, 1)
+        height_new = np.size(I_resize, 0)
+
+        # if exceed the expected width
+        if width_new > width:
+            I_resize = I_resize[:, math.floor(width_new - width) // 2 : (math.floor(width_new - width) // 2 + width), :]
+            width_new = np.size(I_resize, 1)
+            height_new = np.size(I_resize, 0)
+
+        # lines: 56~200
+        I_pad = np.zeros((width, width, 3))
+        start_height = (width - height_new) // 2
+        start_width = (width - width_new) // 2
+        I_pad[start_height : (start_height + height_new), start_width : (start_width + width_new), :] = I_resize
+
+        return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class CenterPad(object):
+    def __init__(self, image_size):
+        self.height = image_size[0]
+        self.width = image_size[1]
+
+    def __call__(self, image):
+        # pad the image to 16:9
+        # pad height
+        I = np.array(image)
+
+        # for padded input
+        height_old = np.size(I, 0)
+        width_old = np.size(I, 1)
+        old_size = [height_old, width_old]
+        height = self.height
+        width = self.width
+        I_pad = np.zeros((height, width, np.size(I, 2)))
+
+        ratio = height / width
+        if height_old / width_old == ratio:
+            if height_old == height:
+                return Image.fromarray(I.astype(np.uint8))
+            new_size = [int(x * height / height_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            return Image.fromarray(I_resize.astype(np.uint8))
+
+        if height_old / width_old > ratio:  # pad the width and crop
+            new_size = [int(x * width / width_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_height = (height_resize - height) // 2
+            I_pad[:, :, :] = I_resize[start_height : (start_height + height), :, :]
+        else:  # pad the height and crop
+            new_size = [int(x * height / height_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_width = (width_resize - width) // 2
+            I_pad[:, :, :] = I_resize[:, start_width : (start_width + width), :]
+
+        return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class CenterPad_threshold(object):
+    def __init__(self, image_size, threshold=3 / 4):
+        self.height = image_size[0]
+        self.width = image_size[1]
+        self.threshold = threshold
+
+    def __call__(self, image):
+        # pad the image to 16:9
+        # pad height
+        I = np.array(image)
+
+        # for padded input
+        height_old = np.size(I, 0)
+        width_old = np.size(I, 1)
+        old_size = [height_old, width_old]
+        height = self.height
+        width = self.width
+        I_pad = np.zeros((height, width, np.size(I, 2)))
+
+        ratio = height / width
+
+        if height_old / width_old == ratio:
+            if height_old == height:
+                return Image.fromarray(I.astype(np.uint8))
+            new_size = [int(x * height / height_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            return Image.fromarray(I_resize.astype(np.uint8))
+
+        if height_old / width_old > self.threshold:
+            width_new, height_new = width_old, int(width_old * self.threshold)
+            height_margin = height_old - height_new
+            height_crop_start = height_margin // 2
+            I_crop = I[height_crop_start : (height_crop_start + height_new), :, :]
+            I_resize = resize(
+                I_crop, [height, width], mode="reflect", preserve_range=True, clip=False, anti_aliasing=True
+            )
+
+            return Image.fromarray(I_resize.astype(np.uint8))
+
+        if height_old / width_old > ratio:  # pad the width and crop
+            new_size = [int(x * width / width_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_height = (height_resize - height) // 2
+            I_pad[:, :, :] = I_resize[start_height : (start_height + height), :, :]
+        else:  # pad the height and crop
+            new_size = [int(x * height / height_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_width = (width_resize - width) // 2
+            I_pad[:, :, :] = I_resize[:, start_width : (start_width + width), :]
+
+        return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class CenterPadCrop_numpy(object):
+    """
+    pad the image according to the height
+    """
+
+    def __init__(self, image_size):
+        self.height = image_size[0]
+        self.width = image_size[1]
+
+    def __call__(self, image, threshold=3 / 4):
+        # pad the image to 16:9
+        # pad height
+        I = np.array(image)
+        # for padded input
+        height_old = np.size(I, 0)
+        width_old = np.size(I, 1)
+        old_size = [height_old, width_old]
+        height = self.height
+        width = self.width
+        padding_size = width
+        if image.ndim == 2:
+            I_pad = np.zeros((width, width))
+        else:
+            I_pad = np.zeros((width, width, I.shape[2]))
+
+        ratio = height / width
+        if height_old / width_old == ratio:
+            return I
+
+        if height_old / width_old > threshold:
+            width_new, height_new = width_old, int(width_old * threshold)
+            height_margin = height_old - height_new
+            height_crop_start = height_margin // 2
+            I_crop = I[height_start : (height_start + height_new), :]
+            I_resize = resize(
+                I_crop, [height, width], mode="reflect", preserve_range=True, clip=False, anti_aliasing=True
+            )
+            return I_resize
+
+        if height_old / width_old > ratio:  # pad the width and crop
+            new_size = [int(x * width / width_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_height = (height_resize - height) // 2
+            start_height_block = (padding_size - height) // 2
+            if image.ndim == 2:
+                I_pad[start_height_block : (start_height_block + height), :] = I_resize[
+                    start_height : (start_height + height), :
+                ]
+            else:
+                I_pad[start_height_block : (start_height_block + height), :, :] = I_resize[
+                    start_height : (start_height + height), :, :
+                ]
+        else:  # pad the height and crop
+            new_size = [int(x * height / height_old) for x in old_size]
+            I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+            width_resize = np.size(I_resize, 1)
+            height_resize = np.size(I_resize, 0)
+            start_width = (width_resize - width) // 2
+            start_width_block = (padding_size - width) // 2
+            if image.ndim == 2:
+                I_pad[:, start_width_block : (start_width_block + width)] = I_resize[
+                    :, start_width : (start_width + width)
+                ]
+
+            else:
+                I_pad[:, start_width_block : (start_width_block + width), :] = I_resize[
+                    :, start_width : (start_width + width), :
+                ]
+
+        crop_start_height = (I_pad.shape[0] - height) // 2
+        crop_start_width = (I_pad.shape[1] - width) // 2
+
+        if image.ndim == 2:
+            return I_pad[
+                crop_start_height : (crop_start_height + height), crop_start_width : (crop_start_width + width)
+            ]
+        else:
+            return I_pad[
+                crop_start_height : (crop_start_height + height), crop_start_width : (crop_start_width + width), :
+            ]
+
+
+@jit(nopython=True)
+def iterSearchShader(padu, padv, xr, yr, W, H, maxIter, precision):
+    # print('processing location', (xr, yr))
+    #
+    if abs(padu[yr, xr]) < precision and abs(padv[yr, xr]) < precision:
+        return xr, yr
+
+        # Our initialize method in this paper, can see the overleaf for detail
+    if (xr + 1) <= (W - 1):
+        dif = padu[yr, xr + 1] - padu[yr, xr]
+    else:
+        dif = padu[yr, xr] - padu[yr, xr - 1]
+    u_next = padu[yr, xr] / (1 + dif)
+    if (yr + 1) <= (H - 1):
+        dif = padv[yr + 1, xr] - padv[yr, xr]
+    else:
+        dif = padv[yr, xr] - padv[yr - 1, xr]
+    v_next = padv[yr, xr] / (1 + dif)
+    i = xr - u_next
+    j = yr - v_next
+    i_int = int(i)
+    j_int = int(j)
+
+    # The same as traditional iterative search method
+    for _ in range(maxIter):
+        if not 0 <= i <= (W - 1) or not 0 <= j <= (H - 1):
+            return i, j
+
+        u11 = padu[j_int, i_int]
+        v11 = padv[j_int, i_int]
+
+        u12 = padu[j_int, i_int + 1]
+        v12 = padv[j_int, i_int + 1]
+
+        int1 = padu[j_int + 1, i_int]
+        v21 = padv[j_int + 1, i_int]
+
+        int2 = padu[j_int + 1, i_int + 1]
+        v22 = padv[j_int + 1, i_int + 1]
+
+        u = (
+            u11 * (i_int + 1 - i) * (j_int + 1 - j)
+            + u12 * (i - i_int) * (j_int + 1 - j)
+            + int1 * (i_int + 1 - i) * (j - j_int)
+            + int2 * (i - i_int) * (j - j_int)
+        )
+
+        v = (
+            v11 * (i_int + 1 - i) * (j_int + 1 - j)
+            + v12 * (i - i_int) * (j_int + 1 - j)
+            + v21 * (i_int + 1 - i) * (j - j_int)
+            + v22 * (i - i_int) * (j - j_int)
+        )
+
+        i_next = xr - u
+        j_next = yr - v
+
+        if abs(i - i_next) < precision and abs(j - j_next) < precision:
+            return i, j
+
+        i = i_next
+        j = j_next
+
+    # if the search doesn't converge within max iter, it will return the last iter result
+    return i_next, j_next
+
+
+# Bilinear interpolation
+@jit(nopython=True)
+def biInterpolation(distorted, i, j):
+    i = u2(i)
+    j = u2(j)
+    Q11 = distorted[j, i]
+    Q12 = distorted[j, i + 1]
+    Q21 = distorted[j + 1, i]
+    Q22 = distorted[j + 1, i + 1]
+
+    return u1(
+        Q11 * (i + 1 - i) * (j + 1 - j)
+        + Q12 * (i - i) * (j + 1 - j)
+        + Q21 * (i + 1 - i) * (j - j)
+        + Q22 * (i - i) * (j - j)
+    )
+
+
+@jit(nopython=True)
+def iterSearch(distortImg, resultImg, padu, padv, W, H, maxIter=5, precision=1e-2):
+    for xr in range(W):
+        for yr in range(H):
+            # (xr, yr) is the point in result image, (i, j) is the search result in distorted image
+            i, j = iterSearchShader(padu, padv, xr, yr, W, H, maxIter, precision)
+
+            # reflect the pixels outside the border
+            if i > W - 1:
+                i = 2 * W - 1 - i
+            if i < 0:
+                i = -i
+            if j > H - 1:
+                j = 2 * H - 1 - j
+            if j < 0:
+                j = -j
+
+            # Bilinear interpolation to get the pixel at (i, j) in distorted image
+            resultImg[yr, xr, 0] = biInterpolation(
+                distortImg[:, :, 0],
+                i,
+                j,
+            )
+            resultImg[yr, xr, 1] = biInterpolation(
+                distortImg[:, :, 1],
+                i,
+                j,
+            )
+            resultImg[yr, xr, 2] = biInterpolation(
+                distortImg[:, :, 2],
+                i,
+                j,
+            )
+    return None
+
+
+def forward_mapping(source_image, u, v, maxIter=5, precision=1e-2):
+    """
+    warp the image according to the forward flow
+    u: horizontal
+    v: vertical
+    """
+    H = source_image.shape[0]
+    W = source_image.shape[1]
+
+    distortImg = np.array(np.zeros((H + 1, W + 1, 3)), dtype=np.uint8)
+    distortImg[0:H, 0:W] = source_image[0:H, 0:W]
+    distortImg[H, 0:W] = source_image[H - 1, 0:W]
+    distortImg[0:H, W] = source_image[0:H, W - 1]
+    distortImg[H, W] = source_image[H - 1, W - 1]
+
+    padu = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+    padu[0:H, 0:W] = u[0:H, 0:W]
+    padu[H, 0:W] = u[H - 1, 0:W]
+    padu[0:H, W] = u[0:H, W - 1]
+    padu[H, W] = u[H - 1, W - 1]
+
+    padv = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+    padv[0:H, 0:W] = v[0:H, 0:W]
+    padv[H, 0:W] = v[H - 1, 0:W]
+    padv[0:H, W] = v[0:H, W - 1]
+    padv[H, W] = v[H - 1, W - 1]
+
+    resultImg = np.array(np.zeros((H, W, 3)), dtype=np.uint8)
+    iterSearch(distortImg, resultImg, padu, padv, W, H, maxIter, precision)
+    return resultImg
+
+
+def random_mask(H, W, mask_size=200):
+    """
+    mask: ranges in [0,1]
+    """
+    masked_image = np.zeros([H, W, 3])
+    mask = random_shapes(
+        [H, W],
+        max_shapes=1,
+        min_shapes=1,
+        max_size=mask_size,
+        min_size=mask_size / 2,
+        multichannel=False,
+        intensity_range=[0, 0],
+    )[0]
+    mask = np.stack((mask, mask, mask), axis=-1)
+    random_state = np.random.RandomState(None)
+    distortion_range = 50
+    alpha = np.random.rand() * 6
+    forward_dx = (
+        gaussian_filter((random_state.rand(H, W) * 2 - 1), distortion_range, mode="constant", cval=0) * alpha * 1000
+    )
+    forward_dy = (
+        gaussian_filter((random_state.rand(H, W) * 2 - 1), distortion_range, mode="constant", cval=0) * alpha * 1000
+    )
+    mask = forward_mapping(mask, forward_dy, forward_dx, maxIter=3, precision=1e-3) / 255
+    mask = 1 - gaussian(mask, sigma=0, preserve_range=True, multichannel=False, anti_aliasing=True)
+    mask = mask[:, :, 0]
+
+    return mask
diff --git a/BiSTNet-NTIRE2023/utils/util_tensorboard.py b/BiSTNet-NTIRE2023/utils/util_tensorboard.py
new file mode 100644
index 0000000..a0e68cc
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/util_tensorboard.py
@@ -0,0 +1,43 @@
+import threading
+
+import numpy as np
+
+from utils.util import to_np
+
+
+def histogram_logger(tb_writer, iter_index, net=None):
+    if net is not None:
+        for tag, value in net.named_parameters():
+            tag = tag.replace('.', '/')
+            tb_writer.add_histogram(tag, to_np(value), iter_index)
+            tb_writer.add_histogram(tag + '/grad', to_np(value.grad), iter_index)
+
+
+def value_logger(tb_writer, iter_index, loss_info):
+    for tag, value in loss_info.items():
+        tb_writer.add_scalar(tag, value, iter_index)
+
+
+class TBImageRecorder(threading.Thread):
+    """
+    TBImageRecorder
+    """
+
+    def __init__(self, tb_writer, func, queue):
+        super(TBImageRecorder, self).__init__()
+        self._tb_writer = tb_writer
+        self._func = func
+        self._queue = queue
+
+    def run(self):
+        while True:
+            msgs, iter_index = self._queue.get()
+            if msgs:
+                img_info = self._func(*msgs)
+
+                print("logging the images")
+                for tag, images in img_info.items():
+                    if images is not None:
+                        self._tb_writer.add_image(tag, np.clip(images, 0, 255).astype(np.uint8), iter_index)
+            else:
+                break
diff --git a/BiSTNet-NTIRE2023/utils/vgg_util.py b/BiSTNet-NTIRE2023/utils/vgg_util.py
new file mode 100644
index 0000000..05652fd
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/vgg_util.py
@@ -0,0 +1,133 @@
+import collections
+import os
+
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import torchvision.transforms as transforms
+from PIL import Image
+
+
+def preprocess(img, scale_size=None):
+    """PILimg: RGB: HxWxC"""
+    if scale_size is not None:
+        scale_transforms = MaxScale(scale_size)
+        img = scale_transforms(img)
+
+    prep_transforms = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]),  # turn to BGR
+            transforms.Normalize(mean=[0.40760392, 0.45795686, 0.48501961], std=[1, 1, 1]),  # subtract imagenet mean
+            transforms.Lambda(lambda x: x.mul_(255)),
+        ]
+    )
+    img = prep_transforms(img)
+    return img
+
+
+def deprocess(img):
+    post_transforms_a = transforms.Compose(
+        [
+            transforms.Lambda(lambda x: x.mul_(1.0 / 255)),
+            transforms.Normalize(mean=[-0.40760392, -0.45795686, -0.48501961], std=[1, 1, 1]),  # add imagenet mean
+            transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]),  # turn to RGB
+        ]
+    )
+    post_transforms_b = transforms.Compose([transforms.ToPILImage()])
+    img = post_transforms_a(img)
+    img[img > 1] = 1
+    img[img < 0] = 0
+    img = post_transforms_b(img)
+    return img
+
+
+def get_renamed_vgg():
+    cache_file = "data/vgg19_conv.pth"
+    vgg = models.vgg19().features
+    renamed_vgg = nn.Sequential()
+    part_idx, layer_idx = 1, 1
+    for layer in list(vgg):
+        if isinstance(layer, nn.Conv2d):
+            name = "conv{}_{}".format(part_idx, layer_idx)
+            renamed_vgg.add_module(name, layer)
+        elif isinstance(layer, nn.ReLU):
+            name = "relu{}_{}".format(part_idx, layer_idx)
+            renamed_vgg.add_module(name, layer)
+            layer_idx += 1
+        elif isinstance(layer, nn.MaxPool2d):
+            name = "pool{}".format(part_idx)
+            renamed_vgg.add_module(name, layer)
+            part_idx += 1
+            layer_idx = 1
+    renamed_vgg.load_state_dict(torch.load(cache_file))
+
+    return renamed_vgg
+
+
+def get_renamed_vgg_johnson():
+    cache_file = "data/vgg19-d01eb7cb.pth"
+    vgg = models.vgg19()
+    model_dict = vgg.state_dict()
+    pretrained_dict = torch.load(cache_file)
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
+    model_dict.update(pretrained_dict)
+    vgg.load_state_dict(model_dict)
+    vgg = vgg.features
+    renamed_vgg = nn.Sequential()
+    part_idx, layer_idx = 1, 1
+    for layer in list(vgg):
+        if isinstance(layer, nn.Conv2d):
+            name = "conv{}_{}".format(part_idx, layer_idx)
+            renamed_vgg.add_module(name, layer)
+        elif isinstance(layer, nn.ReLU):
+            name = "relu{}_{}".format(part_idx, layer_idx)
+            renamed_vgg.add_module(name, layer)
+            layer_idx += 1
+        elif isinstance(layer, nn.MaxPool2d):
+            name = "pool{}".format(part_idx)
+            renamed_vgg.add_module(name, layer)
+            part_idx += 1
+            layer_idx = 1
+
+    return renamed_vgg
+
+
+class MaxScale(object):
+    """Rescale the input PIL.Image to the given size.
+
+    Args:
+        size (sequence or int): Desired output size. If size is a sequence like
+            (w, h), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Desired interpolation. Default is
+            ``PIL.Image.BILINEAR``
+    """
+
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
+        self.size = size
+        self.interpolation = interpolation
+
+    def __call__(self, img):
+        """
+        Args:
+            img (PIL.Image): Image to be scaled.
+
+        Returns:
+            PIL.Image: Rescaled image.
+        """
+        if not isinstance(self.size, int):
+            return img.resize(self.size, self.interpolation)
+        w, h = img.size
+        if (w <= h == self.size) or (h <= w == self.size):
+            return img
+        if w < h:
+            oh = self.size
+            ow = int(self.size * w / h)
+        else:
+            ow = self.size
+            oh = int(self.size * h / w)
+        return img.resize((ow, oh), self.interpolation)
diff --git a/BiSTNet-NTIRE2023/utils/warping.py b/BiSTNet-NTIRE2023/utils/warping.py
new file mode 100644
index 0000000..7df783d
--- /dev/null
+++ b/BiSTNet-NTIRE2023/utils/warping.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def get_grid(x):
+    torchHorizontal = (
+        torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3))
+    )
+    torchVertical = (
+        torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3))
+    )
+    return torch.cat([torchHorizontal, torchVertical], 1).cuda()
+
+
+class WarpingLayer(nn.Module):
+    def __init__(self, device):
+        super(WarpingLayer, self).__init__()
+        self.device = device
+
+    def forward(self, x, flow):
+        # WarpingLayer uses F.grid_sample, which expects normalized grid
+        # we still output unnormalized flow for the convenience of comparing EPEs with FlowNet2 and original code
+        # so here we need to denormalize the flow
+        flow_for_grip = torch.zeros_like(flow).cuda()
+        flow_for_grip[:, 0, :, :] = flow[:, 0, :, :] / ((flow.size(3) - 1.0) / 2.0)
+        flow_for_grip[:, 1, :, :] = flow[:, 1, :, :] / ((flow.size(2) - 1.0) / 2.0)
+
+        grid = (get_grid(x) + flow_for_grip).permute(0, 2, 3, 1)
+        return F.grid_sample(x, grid, align_corners=True)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..060bca2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,74 @@
+## [NTIRE 2023 Video Colorization Challenge](https://tianchi.aliyun.com/competition/entrance/532054/rankingList) @ CVPR 2023
+## Track 1: Fréchet Inception Distance (FID) Optimization
+
+Please visit [test_NTIRE23_Track_1_FID.py](https://github.com/yyang181/NTIRE23-VIDEO-COLORIZATION/blob/main/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py) to evaluate our model.
+
+We provide the colorized images [HERE](https://drive.google.com/drive/folders/1jwVKK2IfAp01C6KpuqB3Wcm0uT4yXZBB?usp=share_link), and the reference images used to obtain the results [HERE](https://drive.google.com/drive/folders/1miA49ALEKDxsDlmsdZX38tik9V4ECqES?usp=share_link).
+
+## :briefcase: Dependencies and Installation
+
+- PyTorch >= 1.8.0
+
+- CUDA >= 10.2
+
+- Other required packages
+
+  ```
+  # git clone this repository
+  git clone https://github.com/yyang181/NTIRE23-VIDEO-COLORIZATION.git
+  cd NTIRE23-VIDEO-COLORIZATION
+  ```
+
+#### Environment configuration: 
+
+```
+cd BiSTNet-NTIRE2023
+
+# create a new anaconda env
+conda create -n bistnet python=3.6
+conda activate bistnet
+
+# install pytortch
+conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=11.3 -c pytorch -c conda-forge
+
+# mmcv install 
+pip install -U openmim
+mim install mmcv-full
+
+# install mmediting 
+git clone https://github.com/open-mmlab/mmediting.git
+cd mmediting
+pip3 install -e .
+
+# install other pip pkgs 
+cd .. && pip install -r pip_requirements.txt
+```
+
+
+
+
+## :gift: Checkpoints
+
+|  Name   |                             URL                              |                            Script                            |   FID   |   CDC    |
+| :-----: | :----------------------------------------------------------: | :----------------------------------------------------------: | :-----: | :------: |
+| BiSTNet | [model](https://drive.google.com/drive/folders/1nakixYiDq6qmP4MZw_gbAhtud4a9yz5h?usp=share_link) | [test_NTIRE23_Track_1_FID.py](https://github.com/yyang181/NTIRE23-VIDEO-COLORIZATION/blob/main/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py) | 21.5372 | 0.001717 |
+
+## :zap: Quick Inference
+
+- **Download Pre-trained Models**: download a pretrained colorization model from the tabulated links, and put it into the folder `./BiSTNet-NTIRE2023/`, like `./BiSTNet-NTIRE2023/checkpoints` , `./BiSTNet-NTIRE2023/data` and `./BiSTNet-NTIRE2023/models/protoseg_core/checkpoints` .
+- **Prepare Testing Data**: You can put the testing images in a folder, like `./demo_dataset`.
+  - `demo_dataset/input`: the directory of input grayscale images.
+  - `demo_dataset/ref`: the directory of reference images (only `f001.png, f050.png and f100.png` are colorful images).
+  - `demo_dataset/output`: the directory to save the colorization results.
+- **Test on Images**: 
+
+```
+conda activate bistnet && cd BiSTNet-NTIRE2023
+CUDA_VISIBLE_DEVICES=0 python test_NTIRE23_Track_1_FID.py
+```
+
+For more details please refer to [test_NTIRE23_Track_1_FID.py](https://github.com/yyang181/NTIRE23-VIDEO-COLORIZATION/blob/main/BiSTNet-NTIRE2023/test_NTIRE23_Track_1_FID.py).
+
+## Acknowledgement
+
+Part of our codes are taken from [DeepExemplar](https://github.com/zhangmozhe/Deep-Exemplar-based-Video-Colorization), [RAFT](https://github.com/princeton-vl/RAFT), [HED](https://github.com/sniklaus/pytorch-hed) and [ProtoSeg](https://github.com/tfzhou/ProtoSeg). Thanks for their awesome works.
diff --git a/demo_dataset/input/001/f001.png b/demo_dataset/input/001/f001.png
new file mode 100644
index 0000000..0d54ed8
Binary files /dev/null and b/demo_dataset/input/001/f001.png differ
diff --git a/demo_dataset/input/001/f002.png b/demo_dataset/input/001/f002.png
new file mode 100644
index 0000000..c2d4330
Binary files /dev/null and b/demo_dataset/input/001/f002.png differ
diff --git a/demo_dataset/input/001/f003.png b/demo_dataset/input/001/f003.png
new file mode 100644
index 0000000..4c4a85b
Binary files /dev/null and b/demo_dataset/input/001/f003.png differ
diff --git a/demo_dataset/input/001/f004.png b/demo_dataset/input/001/f004.png
new file mode 100644
index 0000000..7049dda
Binary files /dev/null and b/demo_dataset/input/001/f004.png differ
diff --git a/demo_dataset/input/001/f005.png b/demo_dataset/input/001/f005.png
new file mode 100644
index 0000000..60978a9
Binary files /dev/null and b/demo_dataset/input/001/f005.png differ
diff --git a/demo_dataset/input/001/f006.png b/demo_dataset/input/001/f006.png
new file mode 100644
index 0000000..9a2d228
Binary files /dev/null and b/demo_dataset/input/001/f006.png differ
diff --git a/demo_dataset/input/001/f007.png b/demo_dataset/input/001/f007.png
new file mode 100644
index 0000000..84be4f5
Binary files /dev/null and b/demo_dataset/input/001/f007.png differ
diff --git a/demo_dataset/input/001/f008.png b/demo_dataset/input/001/f008.png
new file mode 100644
index 0000000..287eca3
Binary files /dev/null and b/demo_dataset/input/001/f008.png differ
diff --git a/demo_dataset/input/001/f009.png b/demo_dataset/input/001/f009.png
new file mode 100644
index 0000000..c2abc7b
Binary files /dev/null and b/demo_dataset/input/001/f009.png differ
diff --git a/demo_dataset/input/001/f010.png b/demo_dataset/input/001/f010.png
new file mode 100644
index 0000000..1edc354
Binary files /dev/null and b/demo_dataset/input/001/f010.png differ
diff --git a/demo_dataset/input/001/f011.png b/demo_dataset/input/001/f011.png
new file mode 100644
index 0000000..a87b017
Binary files /dev/null and b/demo_dataset/input/001/f011.png differ
diff --git a/demo_dataset/input/001/f012.png b/demo_dataset/input/001/f012.png
new file mode 100644
index 0000000..432d048
Binary files /dev/null and b/demo_dataset/input/001/f012.png differ
diff --git a/demo_dataset/input/001/f013.png b/demo_dataset/input/001/f013.png
new file mode 100644
index 0000000..b493c9e
Binary files /dev/null and b/demo_dataset/input/001/f013.png differ
diff --git a/demo_dataset/input/001/f014.png b/demo_dataset/input/001/f014.png
new file mode 100644
index 0000000..9d4b7ff
Binary files /dev/null and b/demo_dataset/input/001/f014.png differ
diff --git a/demo_dataset/input/001/f015.png b/demo_dataset/input/001/f015.png
new file mode 100644
index 0000000..b5b4c49
Binary files /dev/null and b/demo_dataset/input/001/f015.png differ
diff --git a/demo_dataset/input/001/f016.png b/demo_dataset/input/001/f016.png
new file mode 100644
index 0000000..5dd58ee
Binary files /dev/null and b/demo_dataset/input/001/f016.png differ
diff --git a/demo_dataset/input/001/f017.png b/demo_dataset/input/001/f017.png
new file mode 100644
index 0000000..aabe19a
Binary files /dev/null and b/demo_dataset/input/001/f017.png differ
diff --git a/demo_dataset/input/001/f018.png b/demo_dataset/input/001/f018.png
new file mode 100644
index 0000000..feb374d
Binary files /dev/null and b/demo_dataset/input/001/f018.png differ
diff --git a/demo_dataset/input/001/f019.png b/demo_dataset/input/001/f019.png
new file mode 100644
index 0000000..650325b
Binary files /dev/null and b/demo_dataset/input/001/f019.png differ
diff --git a/demo_dataset/input/001/f020.png b/demo_dataset/input/001/f020.png
new file mode 100644
index 0000000..f28b088
Binary files /dev/null and b/demo_dataset/input/001/f020.png differ
diff --git a/demo_dataset/input/001/f021.png b/demo_dataset/input/001/f021.png
new file mode 100644
index 0000000..7045175
Binary files /dev/null and b/demo_dataset/input/001/f021.png differ
diff --git a/demo_dataset/input/001/f022.png b/demo_dataset/input/001/f022.png
new file mode 100644
index 0000000..8febcd3
Binary files /dev/null and b/demo_dataset/input/001/f022.png differ
diff --git a/demo_dataset/input/001/f023.png b/demo_dataset/input/001/f023.png
new file mode 100644
index 0000000..9071a53
Binary files /dev/null and b/demo_dataset/input/001/f023.png differ
diff --git a/demo_dataset/input/001/f024.png b/demo_dataset/input/001/f024.png
new file mode 100644
index 0000000..96cfed2
Binary files /dev/null and b/demo_dataset/input/001/f024.png differ
diff --git a/demo_dataset/input/001/f025.png b/demo_dataset/input/001/f025.png
new file mode 100644
index 0000000..d015bdb
Binary files /dev/null and b/demo_dataset/input/001/f025.png differ
diff --git a/demo_dataset/input/001/f026.png b/demo_dataset/input/001/f026.png
new file mode 100644
index 0000000..21ee589
Binary files /dev/null and b/demo_dataset/input/001/f026.png differ
diff --git a/demo_dataset/input/001/f027.png b/demo_dataset/input/001/f027.png
new file mode 100644
index 0000000..d3f7f64
Binary files /dev/null and b/demo_dataset/input/001/f027.png differ
diff --git a/demo_dataset/input/001/f028.png b/demo_dataset/input/001/f028.png
new file mode 100644
index 0000000..1bc16ef
Binary files /dev/null and b/demo_dataset/input/001/f028.png differ
diff --git a/demo_dataset/input/001/f029.png b/demo_dataset/input/001/f029.png
new file mode 100644
index 0000000..dd470dd
Binary files /dev/null and b/demo_dataset/input/001/f029.png differ
diff --git a/demo_dataset/input/001/f030.png b/demo_dataset/input/001/f030.png
new file mode 100644
index 0000000..0cc8a7a
Binary files /dev/null and b/demo_dataset/input/001/f030.png differ
diff --git a/demo_dataset/input/001/f031.png b/demo_dataset/input/001/f031.png
new file mode 100644
index 0000000..a2d64d0
Binary files /dev/null and b/demo_dataset/input/001/f031.png differ
diff --git a/demo_dataset/input/001/f032.png b/demo_dataset/input/001/f032.png
new file mode 100644
index 0000000..bd32ebd
Binary files /dev/null and b/demo_dataset/input/001/f032.png differ
diff --git a/demo_dataset/input/001/f033.png b/demo_dataset/input/001/f033.png
new file mode 100644
index 0000000..b05c096
Binary files /dev/null and b/demo_dataset/input/001/f033.png differ
diff --git a/demo_dataset/input/001/f034.png b/demo_dataset/input/001/f034.png
new file mode 100644
index 0000000..6371d2e
Binary files /dev/null and b/demo_dataset/input/001/f034.png differ
diff --git a/demo_dataset/input/001/f035.png b/demo_dataset/input/001/f035.png
new file mode 100644
index 0000000..00bc056
Binary files /dev/null and b/demo_dataset/input/001/f035.png differ
diff --git a/demo_dataset/input/001/f036.png b/demo_dataset/input/001/f036.png
new file mode 100644
index 0000000..3dd86cd
Binary files /dev/null and b/demo_dataset/input/001/f036.png differ
diff --git a/demo_dataset/input/001/f037.png b/demo_dataset/input/001/f037.png
new file mode 100644
index 0000000..f11fb3b
Binary files /dev/null and b/demo_dataset/input/001/f037.png differ
diff --git a/demo_dataset/input/001/f038.png b/demo_dataset/input/001/f038.png
new file mode 100644
index 0000000..4112233
Binary files /dev/null and b/demo_dataset/input/001/f038.png differ
diff --git a/demo_dataset/input/001/f039.png b/demo_dataset/input/001/f039.png
new file mode 100644
index 0000000..a7de825
Binary files /dev/null and b/demo_dataset/input/001/f039.png differ
diff --git a/demo_dataset/input/001/f040.png b/demo_dataset/input/001/f040.png
new file mode 100644
index 0000000..96c185f
Binary files /dev/null and b/demo_dataset/input/001/f040.png differ
diff --git a/demo_dataset/input/001/f041.png b/demo_dataset/input/001/f041.png
new file mode 100644
index 0000000..cc0cc6a
Binary files /dev/null and b/demo_dataset/input/001/f041.png differ
diff --git a/demo_dataset/input/001/f042.png b/demo_dataset/input/001/f042.png
new file mode 100644
index 0000000..c845c01
Binary files /dev/null and b/demo_dataset/input/001/f042.png differ
diff --git a/demo_dataset/input/001/f043.png b/demo_dataset/input/001/f043.png
new file mode 100644
index 0000000..17bc33e
Binary files /dev/null and b/demo_dataset/input/001/f043.png differ
diff --git a/demo_dataset/input/001/f044.png b/demo_dataset/input/001/f044.png
new file mode 100644
index 0000000..1245533
Binary files /dev/null and b/demo_dataset/input/001/f044.png differ
diff --git a/demo_dataset/input/001/f045.png b/demo_dataset/input/001/f045.png
new file mode 100644
index 0000000..b17f00e
Binary files /dev/null and b/demo_dataset/input/001/f045.png differ
diff --git a/demo_dataset/input/001/f046.png b/demo_dataset/input/001/f046.png
new file mode 100644
index 0000000..a872878
Binary files /dev/null and b/demo_dataset/input/001/f046.png differ
diff --git a/demo_dataset/input/001/f047.png b/demo_dataset/input/001/f047.png
new file mode 100644
index 0000000..46a7872
Binary files /dev/null and b/demo_dataset/input/001/f047.png differ
diff --git a/demo_dataset/input/001/f048.png b/demo_dataset/input/001/f048.png
new file mode 100644
index 0000000..15b2cbc
Binary files /dev/null and b/demo_dataset/input/001/f048.png differ
diff --git a/demo_dataset/input/001/f049.png b/demo_dataset/input/001/f049.png
new file mode 100644
index 0000000..6ead343
Binary files /dev/null and b/demo_dataset/input/001/f049.png differ
diff --git a/demo_dataset/input/001/f050.png b/demo_dataset/input/001/f050.png
new file mode 100644
index 0000000..a8a3830
Binary files /dev/null and b/demo_dataset/input/001/f050.png differ
diff --git a/demo_dataset/input/001/f051.png b/demo_dataset/input/001/f051.png
new file mode 100644
index 0000000..a01ffd5
Binary files /dev/null and b/demo_dataset/input/001/f051.png differ
diff --git a/demo_dataset/input/001/f052.png b/demo_dataset/input/001/f052.png
new file mode 100644
index 0000000..152ee22
Binary files /dev/null and b/demo_dataset/input/001/f052.png differ
diff --git a/demo_dataset/input/001/f053.png b/demo_dataset/input/001/f053.png
new file mode 100644
index 0000000..f742be4
Binary files /dev/null and b/demo_dataset/input/001/f053.png differ
diff --git a/demo_dataset/input/001/f054.png b/demo_dataset/input/001/f054.png
new file mode 100644
index 0000000..fbb004d
Binary files /dev/null and b/demo_dataset/input/001/f054.png differ
diff --git a/demo_dataset/input/001/f055.png b/demo_dataset/input/001/f055.png
new file mode 100644
index 0000000..114017c
Binary files /dev/null and b/demo_dataset/input/001/f055.png differ
diff --git a/demo_dataset/input/001/f056.png b/demo_dataset/input/001/f056.png
new file mode 100644
index 0000000..7f3d05b
Binary files /dev/null and b/demo_dataset/input/001/f056.png differ
diff --git a/demo_dataset/input/001/f057.png b/demo_dataset/input/001/f057.png
new file mode 100644
index 0000000..493c7b7
Binary files /dev/null and b/demo_dataset/input/001/f057.png differ
diff --git a/demo_dataset/input/001/f058.png b/demo_dataset/input/001/f058.png
new file mode 100644
index 0000000..68ef3a4
Binary files /dev/null and b/demo_dataset/input/001/f058.png differ
diff --git a/demo_dataset/input/001/f059.png b/demo_dataset/input/001/f059.png
new file mode 100644
index 0000000..53d0823
Binary files /dev/null and b/demo_dataset/input/001/f059.png differ
diff --git a/demo_dataset/input/001/f060.png b/demo_dataset/input/001/f060.png
new file mode 100644
index 0000000..62d79f9
Binary files /dev/null and b/demo_dataset/input/001/f060.png differ
diff --git a/demo_dataset/input/001/f061.png b/demo_dataset/input/001/f061.png
new file mode 100644
index 0000000..319d534
Binary files /dev/null and b/demo_dataset/input/001/f061.png differ
diff --git a/demo_dataset/input/001/f062.png b/demo_dataset/input/001/f062.png
new file mode 100644
index 0000000..0269431
Binary files /dev/null and b/demo_dataset/input/001/f062.png differ
diff --git a/demo_dataset/input/001/f063.png b/demo_dataset/input/001/f063.png
new file mode 100644
index 0000000..3ec2f92
Binary files /dev/null and b/demo_dataset/input/001/f063.png differ
diff --git a/demo_dataset/input/001/f064.png b/demo_dataset/input/001/f064.png
new file mode 100644
index 0000000..252b26b
Binary files /dev/null and b/demo_dataset/input/001/f064.png differ
diff --git a/demo_dataset/input/001/f065.png b/demo_dataset/input/001/f065.png
new file mode 100644
index 0000000..3c5ccbf
Binary files /dev/null and b/demo_dataset/input/001/f065.png differ
diff --git a/demo_dataset/input/001/f066.png b/demo_dataset/input/001/f066.png
new file mode 100644
index 0000000..1318ae3
Binary files /dev/null and b/demo_dataset/input/001/f066.png differ
diff --git a/demo_dataset/input/001/f067.png b/demo_dataset/input/001/f067.png
new file mode 100644
index 0000000..bb3b5dc
Binary files /dev/null and b/demo_dataset/input/001/f067.png differ
diff --git a/demo_dataset/input/001/f068.png b/demo_dataset/input/001/f068.png
new file mode 100644
index 0000000..dd983e4
Binary files /dev/null and b/demo_dataset/input/001/f068.png differ
diff --git a/demo_dataset/input/001/f069.png b/demo_dataset/input/001/f069.png
new file mode 100644
index 0000000..c1d1e6b
Binary files /dev/null and b/demo_dataset/input/001/f069.png differ
diff --git a/demo_dataset/input/001/f070.png b/demo_dataset/input/001/f070.png
new file mode 100644
index 0000000..160ccb7
Binary files /dev/null and b/demo_dataset/input/001/f070.png differ
diff --git a/demo_dataset/input/001/f071.png b/demo_dataset/input/001/f071.png
new file mode 100644
index 0000000..fd9f9e5
Binary files /dev/null and b/demo_dataset/input/001/f071.png differ
diff --git a/demo_dataset/input/001/f072.png b/demo_dataset/input/001/f072.png
new file mode 100644
index 0000000..ef42602
Binary files /dev/null and b/demo_dataset/input/001/f072.png differ
diff --git a/demo_dataset/input/001/f073.png b/demo_dataset/input/001/f073.png
new file mode 100644
index 0000000..6e34055
Binary files /dev/null and b/demo_dataset/input/001/f073.png differ
diff --git a/demo_dataset/input/001/f074.png b/demo_dataset/input/001/f074.png
new file mode 100644
index 0000000..0c83740
Binary files /dev/null and b/demo_dataset/input/001/f074.png differ
diff --git a/demo_dataset/input/001/f075.png b/demo_dataset/input/001/f075.png
new file mode 100644
index 0000000..84c5bc0
Binary files /dev/null and b/demo_dataset/input/001/f075.png differ
diff --git a/demo_dataset/input/001/f076.png b/demo_dataset/input/001/f076.png
new file mode 100644
index 0000000..5efffec
Binary files /dev/null and b/demo_dataset/input/001/f076.png differ
diff --git a/demo_dataset/input/001/f077.png b/demo_dataset/input/001/f077.png
new file mode 100644
index 0000000..7171cd0
Binary files /dev/null and b/demo_dataset/input/001/f077.png differ
diff --git a/demo_dataset/input/001/f078.png b/demo_dataset/input/001/f078.png
new file mode 100644
index 0000000..dfd5298
Binary files /dev/null and b/demo_dataset/input/001/f078.png differ
diff --git a/demo_dataset/input/001/f079.png b/demo_dataset/input/001/f079.png
new file mode 100644
index 0000000..75905fe
Binary files /dev/null and b/demo_dataset/input/001/f079.png differ
diff --git a/demo_dataset/input/001/f080.png b/demo_dataset/input/001/f080.png
new file mode 100644
index 0000000..dcc6433
Binary files /dev/null and b/demo_dataset/input/001/f080.png differ
diff --git a/demo_dataset/input/001/f081.png b/demo_dataset/input/001/f081.png
new file mode 100644
index 0000000..a22c2d5
Binary files /dev/null and b/demo_dataset/input/001/f081.png differ
diff --git a/demo_dataset/input/001/f082.png b/demo_dataset/input/001/f082.png
new file mode 100644
index 0000000..3840d46
Binary files /dev/null and b/demo_dataset/input/001/f082.png differ
diff --git a/demo_dataset/input/001/f083.png b/demo_dataset/input/001/f083.png
new file mode 100644
index 0000000..3f66eca
Binary files /dev/null and b/demo_dataset/input/001/f083.png differ
diff --git a/demo_dataset/input/001/f084.png b/demo_dataset/input/001/f084.png
new file mode 100644
index 0000000..b9ea81d
Binary files /dev/null and b/demo_dataset/input/001/f084.png differ
diff --git a/demo_dataset/input/001/f085.png b/demo_dataset/input/001/f085.png
new file mode 100644
index 0000000..bd79542
Binary files /dev/null and b/demo_dataset/input/001/f085.png differ
diff --git a/demo_dataset/input/001/f086.png b/demo_dataset/input/001/f086.png
new file mode 100644
index 0000000..cb6f5c2
Binary files /dev/null and b/demo_dataset/input/001/f086.png differ
diff --git a/demo_dataset/input/001/f087.png b/demo_dataset/input/001/f087.png
new file mode 100644
index 0000000..aad8051
Binary files /dev/null and b/demo_dataset/input/001/f087.png differ
diff --git a/demo_dataset/input/001/f088.png b/demo_dataset/input/001/f088.png
new file mode 100644
index 0000000..2f40f61
Binary files /dev/null and b/demo_dataset/input/001/f088.png differ
diff --git a/demo_dataset/input/001/f089.png b/demo_dataset/input/001/f089.png
new file mode 100644
index 0000000..7eb9878
Binary files /dev/null and b/demo_dataset/input/001/f089.png differ
diff --git a/demo_dataset/input/001/f090.png b/demo_dataset/input/001/f090.png
new file mode 100644
index 0000000..fdc98c0
Binary files /dev/null and b/demo_dataset/input/001/f090.png differ
diff --git a/demo_dataset/input/001/f091.png b/demo_dataset/input/001/f091.png
new file mode 100644
index 0000000..9362e38
Binary files /dev/null and b/demo_dataset/input/001/f091.png differ
diff --git a/demo_dataset/input/001/f092.png b/demo_dataset/input/001/f092.png
new file mode 100644
index 0000000..6e22225
Binary files /dev/null and b/demo_dataset/input/001/f092.png differ
diff --git a/demo_dataset/input/001/f093.png b/demo_dataset/input/001/f093.png
new file mode 100644
index 0000000..ca642df
Binary files /dev/null and b/demo_dataset/input/001/f093.png differ
diff --git a/demo_dataset/input/001/f094.png b/demo_dataset/input/001/f094.png
new file mode 100644
index 0000000..2d87995
Binary files /dev/null and b/demo_dataset/input/001/f094.png differ
diff --git a/demo_dataset/input/001/f095.png b/demo_dataset/input/001/f095.png
new file mode 100644
index 0000000..283a1c9
Binary files /dev/null and b/demo_dataset/input/001/f095.png differ
diff --git a/demo_dataset/input/001/f096.png b/demo_dataset/input/001/f096.png
new file mode 100644
index 0000000..461ef40
Binary files /dev/null and b/demo_dataset/input/001/f096.png differ
diff --git a/demo_dataset/input/001/f097.png b/demo_dataset/input/001/f097.png
new file mode 100644
index 0000000..4e2e687
Binary files /dev/null and b/demo_dataset/input/001/f097.png differ
diff --git a/demo_dataset/input/001/f098.png b/demo_dataset/input/001/f098.png
new file mode 100644
index 0000000..b4073da
Binary files /dev/null and b/demo_dataset/input/001/f098.png differ
diff --git a/demo_dataset/input/001/f099.png b/demo_dataset/input/001/f099.png
new file mode 100644
index 0000000..0b08ce7
Binary files /dev/null and b/demo_dataset/input/001/f099.png differ
diff --git a/demo_dataset/input/001/f100.png b/demo_dataset/input/001/f100.png
new file mode 100644
index 0000000..818c07f
Binary files /dev/null and b/demo_dataset/input/001/f100.png differ
diff --git a/demo_dataset/ref/001/f001.png b/demo_dataset/ref/001/f001.png
new file mode 100644
index 0000000..caffe5e
Binary files /dev/null and b/demo_dataset/ref/001/f001.png differ
diff --git a/demo_dataset/ref/001/f002.png b/demo_dataset/ref/001/f002.png
new file mode 100644
index 0000000..c2d4330
Binary files /dev/null and b/demo_dataset/ref/001/f002.png differ
diff --git a/demo_dataset/ref/001/f003.png b/demo_dataset/ref/001/f003.png
new file mode 100644
index 0000000..4c4a85b
Binary files /dev/null and b/demo_dataset/ref/001/f003.png differ
diff --git a/demo_dataset/ref/001/f004.png b/demo_dataset/ref/001/f004.png
new file mode 100644
index 0000000..7049dda
Binary files /dev/null and b/demo_dataset/ref/001/f004.png differ
diff --git a/demo_dataset/ref/001/f005.png b/demo_dataset/ref/001/f005.png
new file mode 100644
index 0000000..60978a9
Binary files /dev/null and b/demo_dataset/ref/001/f005.png differ
diff --git a/demo_dataset/ref/001/f006.png b/demo_dataset/ref/001/f006.png
new file mode 100644
index 0000000..9a2d228
Binary files /dev/null and b/demo_dataset/ref/001/f006.png differ
diff --git a/demo_dataset/ref/001/f007.png b/demo_dataset/ref/001/f007.png
new file mode 100644
index 0000000..84be4f5
Binary files /dev/null and b/demo_dataset/ref/001/f007.png differ
diff --git a/demo_dataset/ref/001/f008.png b/demo_dataset/ref/001/f008.png
new file mode 100644
index 0000000..287eca3
Binary files /dev/null and b/demo_dataset/ref/001/f008.png differ
diff --git a/demo_dataset/ref/001/f009.png b/demo_dataset/ref/001/f009.png
new file mode 100644
index 0000000..c2abc7b
Binary files /dev/null and b/demo_dataset/ref/001/f009.png differ
diff --git a/demo_dataset/ref/001/f010.png b/demo_dataset/ref/001/f010.png
new file mode 100644
index 0000000..1edc354
Binary files /dev/null and b/demo_dataset/ref/001/f010.png differ
diff --git a/demo_dataset/ref/001/f011.png b/demo_dataset/ref/001/f011.png
new file mode 100644
index 0000000..a87b017
Binary files /dev/null and b/demo_dataset/ref/001/f011.png differ
diff --git a/demo_dataset/ref/001/f012.png b/demo_dataset/ref/001/f012.png
new file mode 100644
index 0000000..432d048
Binary files /dev/null and b/demo_dataset/ref/001/f012.png differ
diff --git a/demo_dataset/ref/001/f013.png b/demo_dataset/ref/001/f013.png
new file mode 100644
index 0000000..b493c9e
Binary files /dev/null and b/demo_dataset/ref/001/f013.png differ
diff --git a/demo_dataset/ref/001/f014.png b/demo_dataset/ref/001/f014.png
new file mode 100644
index 0000000..9d4b7ff
Binary files /dev/null and b/demo_dataset/ref/001/f014.png differ
diff --git a/demo_dataset/ref/001/f015.png b/demo_dataset/ref/001/f015.png
new file mode 100644
index 0000000..b5b4c49
Binary files /dev/null and b/demo_dataset/ref/001/f015.png differ
diff --git a/demo_dataset/ref/001/f016.png b/demo_dataset/ref/001/f016.png
new file mode 100644
index 0000000..5dd58ee
Binary files /dev/null and b/demo_dataset/ref/001/f016.png differ
diff --git a/demo_dataset/ref/001/f017.png b/demo_dataset/ref/001/f017.png
new file mode 100644
index 0000000..aabe19a
Binary files /dev/null and b/demo_dataset/ref/001/f017.png differ
diff --git a/demo_dataset/ref/001/f018.png b/demo_dataset/ref/001/f018.png
new file mode 100644
index 0000000..feb374d
Binary files /dev/null and b/demo_dataset/ref/001/f018.png differ
diff --git a/demo_dataset/ref/001/f019.png b/demo_dataset/ref/001/f019.png
new file mode 100644
index 0000000..650325b
Binary files /dev/null and b/demo_dataset/ref/001/f019.png differ
diff --git a/demo_dataset/ref/001/f020.png b/demo_dataset/ref/001/f020.png
new file mode 100644
index 0000000..f28b088
Binary files /dev/null and b/demo_dataset/ref/001/f020.png differ
diff --git a/demo_dataset/ref/001/f021.png b/demo_dataset/ref/001/f021.png
new file mode 100644
index 0000000..7045175
Binary files /dev/null and b/demo_dataset/ref/001/f021.png differ
diff --git a/demo_dataset/ref/001/f022.png b/demo_dataset/ref/001/f022.png
new file mode 100644
index 0000000..8febcd3
Binary files /dev/null and b/demo_dataset/ref/001/f022.png differ
diff --git a/demo_dataset/ref/001/f023.png b/demo_dataset/ref/001/f023.png
new file mode 100644
index 0000000..9071a53
Binary files /dev/null and b/demo_dataset/ref/001/f023.png differ
diff --git a/demo_dataset/ref/001/f024.png b/demo_dataset/ref/001/f024.png
new file mode 100644
index 0000000..96cfed2
Binary files /dev/null and b/demo_dataset/ref/001/f024.png differ
diff --git a/demo_dataset/ref/001/f025.png b/demo_dataset/ref/001/f025.png
new file mode 100644
index 0000000..d015bdb
Binary files /dev/null and b/demo_dataset/ref/001/f025.png differ
diff --git a/demo_dataset/ref/001/f026.png b/demo_dataset/ref/001/f026.png
new file mode 100644
index 0000000..21ee589
Binary files /dev/null and b/demo_dataset/ref/001/f026.png differ
diff --git a/demo_dataset/ref/001/f027.png b/demo_dataset/ref/001/f027.png
new file mode 100644
index 0000000..d3f7f64
Binary files /dev/null and b/demo_dataset/ref/001/f027.png differ
diff --git a/demo_dataset/ref/001/f028.png b/demo_dataset/ref/001/f028.png
new file mode 100644
index 0000000..1bc16ef
Binary files /dev/null and b/demo_dataset/ref/001/f028.png differ
diff --git a/demo_dataset/ref/001/f029.png b/demo_dataset/ref/001/f029.png
new file mode 100644
index 0000000..dd470dd
Binary files /dev/null and b/demo_dataset/ref/001/f029.png differ
diff --git a/demo_dataset/ref/001/f030.png b/demo_dataset/ref/001/f030.png
new file mode 100644
index 0000000..0cc8a7a
Binary files /dev/null and b/demo_dataset/ref/001/f030.png differ
diff --git a/demo_dataset/ref/001/f031.png b/demo_dataset/ref/001/f031.png
new file mode 100644
index 0000000..a2d64d0
Binary files /dev/null and b/demo_dataset/ref/001/f031.png differ
diff --git a/demo_dataset/ref/001/f032.png b/demo_dataset/ref/001/f032.png
new file mode 100644
index 0000000..bd32ebd
Binary files /dev/null and b/demo_dataset/ref/001/f032.png differ
diff --git a/demo_dataset/ref/001/f033.png b/demo_dataset/ref/001/f033.png
new file mode 100644
index 0000000..b05c096
Binary files /dev/null and b/demo_dataset/ref/001/f033.png differ
diff --git a/demo_dataset/ref/001/f034.png b/demo_dataset/ref/001/f034.png
new file mode 100644
index 0000000..6371d2e
Binary files /dev/null and b/demo_dataset/ref/001/f034.png differ
diff --git a/demo_dataset/ref/001/f035.png b/demo_dataset/ref/001/f035.png
new file mode 100644
index 0000000..00bc056
Binary files /dev/null and b/demo_dataset/ref/001/f035.png differ
diff --git a/demo_dataset/ref/001/f036.png b/demo_dataset/ref/001/f036.png
new file mode 100644
index 0000000..3dd86cd
Binary files /dev/null and b/demo_dataset/ref/001/f036.png differ
diff --git a/demo_dataset/ref/001/f037.png b/demo_dataset/ref/001/f037.png
new file mode 100644
index 0000000..f11fb3b
Binary files /dev/null and b/demo_dataset/ref/001/f037.png differ
diff --git a/demo_dataset/ref/001/f038.png b/demo_dataset/ref/001/f038.png
new file mode 100644
index 0000000..4112233
Binary files /dev/null and b/demo_dataset/ref/001/f038.png differ
diff --git a/demo_dataset/ref/001/f039.png b/demo_dataset/ref/001/f039.png
new file mode 100644
index 0000000..a7de825
Binary files /dev/null and b/demo_dataset/ref/001/f039.png differ
diff --git a/demo_dataset/ref/001/f040.png b/demo_dataset/ref/001/f040.png
new file mode 100644
index 0000000..96c185f
Binary files /dev/null and b/demo_dataset/ref/001/f040.png differ
diff --git a/demo_dataset/ref/001/f041.png b/demo_dataset/ref/001/f041.png
new file mode 100644
index 0000000..cc0cc6a
Binary files /dev/null and b/demo_dataset/ref/001/f041.png differ
diff --git a/demo_dataset/ref/001/f042.png b/demo_dataset/ref/001/f042.png
new file mode 100644
index 0000000..c845c01
Binary files /dev/null and b/demo_dataset/ref/001/f042.png differ
diff --git a/demo_dataset/ref/001/f043.png b/demo_dataset/ref/001/f043.png
new file mode 100644
index 0000000..17bc33e
Binary files /dev/null and b/demo_dataset/ref/001/f043.png differ
diff --git a/demo_dataset/ref/001/f044.png b/demo_dataset/ref/001/f044.png
new file mode 100644
index 0000000..1245533
Binary files /dev/null and b/demo_dataset/ref/001/f044.png differ
diff --git a/demo_dataset/ref/001/f045.png b/demo_dataset/ref/001/f045.png
new file mode 100644
index 0000000..b17f00e
Binary files /dev/null and b/demo_dataset/ref/001/f045.png differ
diff --git a/demo_dataset/ref/001/f046.png b/demo_dataset/ref/001/f046.png
new file mode 100644
index 0000000..a872878
Binary files /dev/null and b/demo_dataset/ref/001/f046.png differ
diff --git a/demo_dataset/ref/001/f047.png b/demo_dataset/ref/001/f047.png
new file mode 100644
index 0000000..46a7872
Binary files /dev/null and b/demo_dataset/ref/001/f047.png differ
diff --git a/demo_dataset/ref/001/f048.png b/demo_dataset/ref/001/f048.png
new file mode 100644
index 0000000..15b2cbc
Binary files /dev/null and b/demo_dataset/ref/001/f048.png differ
diff --git a/demo_dataset/ref/001/f049.png b/demo_dataset/ref/001/f049.png
new file mode 100644
index 0000000..6ead343
Binary files /dev/null and b/demo_dataset/ref/001/f049.png differ
diff --git a/demo_dataset/ref/001/f050.png b/demo_dataset/ref/001/f050.png
new file mode 100644
index 0000000..57f4791
Binary files /dev/null and b/demo_dataset/ref/001/f050.png differ
diff --git a/demo_dataset/ref/001/f051.png b/demo_dataset/ref/001/f051.png
new file mode 100644
index 0000000..a01ffd5
Binary files /dev/null and b/demo_dataset/ref/001/f051.png differ
diff --git a/demo_dataset/ref/001/f052.png b/demo_dataset/ref/001/f052.png
new file mode 100644
index 0000000..152ee22
Binary files /dev/null and b/demo_dataset/ref/001/f052.png differ
diff --git a/demo_dataset/ref/001/f053.png b/demo_dataset/ref/001/f053.png
new file mode 100644
index 0000000..f742be4
Binary files /dev/null and b/demo_dataset/ref/001/f053.png differ
diff --git a/demo_dataset/ref/001/f054.png b/demo_dataset/ref/001/f054.png
new file mode 100644
index 0000000..fbb004d
Binary files /dev/null and b/demo_dataset/ref/001/f054.png differ
diff --git a/demo_dataset/ref/001/f055.png b/demo_dataset/ref/001/f055.png
new file mode 100644
index 0000000..114017c
Binary files /dev/null and b/demo_dataset/ref/001/f055.png differ
diff --git a/demo_dataset/ref/001/f056.png b/demo_dataset/ref/001/f056.png
new file mode 100644
index 0000000..7f3d05b
Binary files /dev/null and b/demo_dataset/ref/001/f056.png differ
diff --git a/demo_dataset/ref/001/f057.png b/demo_dataset/ref/001/f057.png
new file mode 100644
index 0000000..493c7b7
Binary files /dev/null and b/demo_dataset/ref/001/f057.png differ
diff --git a/demo_dataset/ref/001/f058.png b/demo_dataset/ref/001/f058.png
new file mode 100644
index 0000000..68ef3a4
Binary files /dev/null and b/demo_dataset/ref/001/f058.png differ
diff --git a/demo_dataset/ref/001/f059.png b/demo_dataset/ref/001/f059.png
new file mode 100644
index 0000000..53d0823
Binary files /dev/null and b/demo_dataset/ref/001/f059.png differ
diff --git a/demo_dataset/ref/001/f060.png b/demo_dataset/ref/001/f060.png
new file mode 100644
index 0000000..62d79f9
Binary files /dev/null and b/demo_dataset/ref/001/f060.png differ
diff --git a/demo_dataset/ref/001/f061.png b/demo_dataset/ref/001/f061.png
new file mode 100644
index 0000000..319d534
Binary files /dev/null and b/demo_dataset/ref/001/f061.png differ
diff --git a/demo_dataset/ref/001/f062.png b/demo_dataset/ref/001/f062.png
new file mode 100644
index 0000000..0269431
Binary files /dev/null and b/demo_dataset/ref/001/f062.png differ
diff --git a/demo_dataset/ref/001/f063.png b/demo_dataset/ref/001/f063.png
new file mode 100644
index 0000000..3ec2f92
Binary files /dev/null and b/demo_dataset/ref/001/f063.png differ
diff --git a/demo_dataset/ref/001/f064.png b/demo_dataset/ref/001/f064.png
new file mode 100644
index 0000000..252b26b
Binary files /dev/null and b/demo_dataset/ref/001/f064.png differ
diff --git a/demo_dataset/ref/001/f065.png b/demo_dataset/ref/001/f065.png
new file mode 100644
index 0000000..3c5ccbf
Binary files /dev/null and b/demo_dataset/ref/001/f065.png differ
diff --git a/demo_dataset/ref/001/f066.png b/demo_dataset/ref/001/f066.png
new file mode 100644
index 0000000..1318ae3
Binary files /dev/null and b/demo_dataset/ref/001/f066.png differ
diff --git a/demo_dataset/ref/001/f067.png b/demo_dataset/ref/001/f067.png
new file mode 100644
index 0000000..bb3b5dc
Binary files /dev/null and b/demo_dataset/ref/001/f067.png differ
diff --git a/demo_dataset/ref/001/f068.png b/demo_dataset/ref/001/f068.png
new file mode 100644
index 0000000..dd983e4
Binary files /dev/null and b/demo_dataset/ref/001/f068.png differ
diff --git a/demo_dataset/ref/001/f069.png b/demo_dataset/ref/001/f069.png
new file mode 100644
index 0000000..c1d1e6b
Binary files /dev/null and b/demo_dataset/ref/001/f069.png differ
diff --git a/demo_dataset/ref/001/f070.png b/demo_dataset/ref/001/f070.png
new file mode 100644
index 0000000..160ccb7
Binary files /dev/null and b/demo_dataset/ref/001/f070.png differ
diff --git a/demo_dataset/ref/001/f071.png b/demo_dataset/ref/001/f071.png
new file mode 100644
index 0000000..fd9f9e5
Binary files /dev/null and b/demo_dataset/ref/001/f071.png differ
diff --git a/demo_dataset/ref/001/f072.png b/demo_dataset/ref/001/f072.png
new file mode 100644
index 0000000..ef42602
Binary files /dev/null and b/demo_dataset/ref/001/f072.png differ
diff --git a/demo_dataset/ref/001/f073.png b/demo_dataset/ref/001/f073.png
new file mode 100644
index 0000000..6e34055
Binary files /dev/null and b/demo_dataset/ref/001/f073.png differ
diff --git a/demo_dataset/ref/001/f074.png b/demo_dataset/ref/001/f074.png
new file mode 100644
index 0000000..0c83740
Binary files /dev/null and b/demo_dataset/ref/001/f074.png differ
diff --git a/demo_dataset/ref/001/f075.png b/demo_dataset/ref/001/f075.png
new file mode 100644
index 0000000..84c5bc0
Binary files /dev/null and b/demo_dataset/ref/001/f075.png differ
diff --git a/demo_dataset/ref/001/f076.png b/demo_dataset/ref/001/f076.png
new file mode 100644
index 0000000..5efffec
Binary files /dev/null and b/demo_dataset/ref/001/f076.png differ
diff --git a/demo_dataset/ref/001/f077.png b/demo_dataset/ref/001/f077.png
new file mode 100644
index 0000000..7171cd0
Binary files /dev/null and b/demo_dataset/ref/001/f077.png differ
diff --git a/demo_dataset/ref/001/f078.png b/demo_dataset/ref/001/f078.png
new file mode 100644
index 0000000..dfd5298
Binary files /dev/null and b/demo_dataset/ref/001/f078.png differ
diff --git a/demo_dataset/ref/001/f079.png b/demo_dataset/ref/001/f079.png
new file mode 100644
index 0000000..75905fe
Binary files /dev/null and b/demo_dataset/ref/001/f079.png differ
diff --git a/demo_dataset/ref/001/f080.png b/demo_dataset/ref/001/f080.png
new file mode 100644
index 0000000..dcc6433
Binary files /dev/null and b/demo_dataset/ref/001/f080.png differ
diff --git a/demo_dataset/ref/001/f081.png b/demo_dataset/ref/001/f081.png
new file mode 100644
index 0000000..a22c2d5
Binary files /dev/null and b/demo_dataset/ref/001/f081.png differ
diff --git a/demo_dataset/ref/001/f082.png b/demo_dataset/ref/001/f082.png
new file mode 100644
index 0000000..3840d46
Binary files /dev/null and b/demo_dataset/ref/001/f082.png differ
diff --git a/demo_dataset/ref/001/f083.png b/demo_dataset/ref/001/f083.png
new file mode 100644
index 0000000..3f66eca
Binary files /dev/null and b/demo_dataset/ref/001/f083.png differ
diff --git a/demo_dataset/ref/001/f084.png b/demo_dataset/ref/001/f084.png
new file mode 100644
index 0000000..b9ea81d
Binary files /dev/null and b/demo_dataset/ref/001/f084.png differ
diff --git a/demo_dataset/ref/001/f085.png b/demo_dataset/ref/001/f085.png
new file mode 100644
index 0000000..bd79542
Binary files /dev/null and b/demo_dataset/ref/001/f085.png differ
diff --git a/demo_dataset/ref/001/f086.png b/demo_dataset/ref/001/f086.png
new file mode 100644
index 0000000..cb6f5c2
Binary files /dev/null and b/demo_dataset/ref/001/f086.png differ
diff --git a/demo_dataset/ref/001/f087.png b/demo_dataset/ref/001/f087.png
new file mode 100644
index 0000000..aad8051
Binary files /dev/null and b/demo_dataset/ref/001/f087.png differ
diff --git a/demo_dataset/ref/001/f088.png b/demo_dataset/ref/001/f088.png
new file mode 100644
index 0000000..2f40f61
Binary files /dev/null and b/demo_dataset/ref/001/f088.png differ
diff --git a/demo_dataset/ref/001/f089.png b/demo_dataset/ref/001/f089.png
new file mode 100644
index 0000000..7eb9878
Binary files /dev/null and b/demo_dataset/ref/001/f089.png differ
diff --git a/demo_dataset/ref/001/f090.png b/demo_dataset/ref/001/f090.png
new file mode 100644
index 0000000..fdc98c0
Binary files /dev/null and b/demo_dataset/ref/001/f090.png differ
diff --git a/demo_dataset/ref/001/f091.png b/demo_dataset/ref/001/f091.png
new file mode 100644
index 0000000..9362e38
Binary files /dev/null and b/demo_dataset/ref/001/f091.png differ
diff --git a/demo_dataset/ref/001/f092.png b/demo_dataset/ref/001/f092.png
new file mode 100644
index 0000000..6e22225
Binary files /dev/null and b/demo_dataset/ref/001/f092.png differ
diff --git a/demo_dataset/ref/001/f093.png b/demo_dataset/ref/001/f093.png
new file mode 100644
index 0000000..ca642df
Binary files /dev/null and b/demo_dataset/ref/001/f093.png differ
diff --git a/demo_dataset/ref/001/f094.png b/demo_dataset/ref/001/f094.png
new file mode 100644
index 0000000..2d87995
Binary files /dev/null and b/demo_dataset/ref/001/f094.png differ
diff --git a/demo_dataset/ref/001/f095.png b/demo_dataset/ref/001/f095.png
new file mode 100644
index 0000000..283a1c9
Binary files /dev/null and b/demo_dataset/ref/001/f095.png differ
diff --git a/demo_dataset/ref/001/f096.png b/demo_dataset/ref/001/f096.png
new file mode 100644
index 0000000..461ef40
Binary files /dev/null and b/demo_dataset/ref/001/f096.png differ
diff --git a/demo_dataset/ref/001/f097.png b/demo_dataset/ref/001/f097.png
new file mode 100644
index 0000000..4e2e687
Binary files /dev/null and b/demo_dataset/ref/001/f097.png differ
diff --git a/demo_dataset/ref/001/f098.png b/demo_dataset/ref/001/f098.png
new file mode 100644
index 0000000..b4073da
Binary files /dev/null and b/demo_dataset/ref/001/f098.png differ
diff --git a/demo_dataset/ref/001/f099.png b/demo_dataset/ref/001/f099.png
new file mode 100644
index 0000000..0b08ce7
Binary files /dev/null and b/demo_dataset/ref/001/f099.png differ
diff --git a/demo_dataset/ref/001/f100.png b/demo_dataset/ref/001/f100.png
new file mode 100644
index 0000000..ce55da3
Binary files /dev/null and b/demo_dataset/ref/001/f100.png differ