Skip to content

Commit

Permalink
add code, pic, requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
LuminosityX authored Jul 12, 2023
1 parent 7366ad9 commit 9209365
Show file tree
Hide file tree
Showing 53 changed files with 8,326 additions and 0 deletions.
549 changes: 549 additions & 0 deletions data.py

Large diffs are not rendered by default.

172 changes: 172 additions & 0 deletions evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import numpy as np
#import numpy


def i2t_SCAN(sims, npts=None, return_ranks=False):
"""
Images->Text (Image Annotation)
Images: (N, n_region, d) matrix of images
Captions: (5N, max_n_word, d) matrix of captions
CapLens: (5N) array of caption lengths
sims: (N, 5N) matrix of similarity im-cap
"""
npts = sims.shape[0]
ranks = np.zeros(npts)
top1 = np.zeros(npts)
for index in range(npts):
inds = np.argsort(sims[index])[::-1]
# Score
rank = 1e20
for i in range(5 * index, 5 * index + 5, 1):
tmp = np.where(inds == i)[0][0]
if tmp < rank:
rank = tmp
ranks[index] = rank
top1[index] = inds[0]

# Compute metrics
r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)

r20 = 100.0 * len(np.where(ranks < 20)[0]) / len(ranks)
r50 = 100.0 * len(np.where(ranks < 50)[0]) / len(ranks)
r70 = 100.0 * len(np.where(ranks < 70)[0]) / len(ranks)
r100 = 100.0 * len(np.where(ranks < 100)[0]) / len(ranks)

medr = np.floor(np.median(ranks)) + 1
meanr = ranks.mean() + 1
if return_ranks:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr), (ranks, top1)
else:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr)

def t2i_SCAN(sims, npts=None, return_ranks=False):
"""
Text->Images (Image Search)
Images: (N, n_region, d) matrix of images
Captions: (5N, max_n_word, d) matrix of captions
CapLens: (5N) array of caption lengths
sims: (N, 5N) matrix of similarity im-cap
"""
npts = sims.shape[0]
ranks = np.zeros(5 * npts)
top1 = np.zeros(5 * npts)

# --> (5N(caption), N(image))
sims = sims.T

for index in range(npts):
for i in range(5):
inds = np.argsort(sims[5 * index + i])[::-1]
ranks[5 * index + i] = np.where(inds == index)[0][0]
top1[5 * index + i] = inds[0]

# Compute metrics
r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)

r20 = 100.0 * len(np.where(ranks < 20)[0]) / len(ranks)
r50 = 100.0 * len(np.where(ranks < 50)[0]) / len(ranks)
r70 = 100.0 * len(np.where(ranks < 70)[0]) / len(ranks)
r100 = 100.0 * len(np.where(ranks < 100)[0]) / len(ranks)

medr = np.floor(np.median(ranks)) + 1
meanr = ranks.mean() + 1
if return_ranks:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr), (ranks, top1)
else:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr)

'''def i2t(images, captions, npts=None, measure='cosine', return_ranks=False):
"""
Images->Text (Image Annotation)
Images: (5N, K) matrix of images
Captions: (5N, K) matrix of captions
"""
if npts is None:
npts = int(images.shape[0] / 5)
index_list = []
ranks = numpy.zeros(npts)
top1 = numpy.zeros(npts)
for index in range(npts):
# Get query image
im = images[5 * index].reshape(1, images.shape[1])
# Compute scores
d = numpy.dot(im, captions.T).flatten()
inds = numpy.argsort(d)[::-1]
index_list.append(inds[0])
# Score
rank = 1e20
for i in range(5 * index, 5 * index + 5, 1):
tmp = numpy.where(inds == i)[0][0]
if tmp < rank:
rank = tmp
ranks[index] = rank
top1[index] = inds[0]
# Compute metrics
r1 = 100.0 * len(numpy.where(ranks < 1)[0]) / len(ranks)
r5 = 100.0 * len(numpy.where(ranks < 5)[0]) / len(ranks)
r10 = 100.0 * len(numpy.where(ranks < 10)[0]) / len(ranks)
r20 = 100.0 * len(numpy.where(ranks < 20)[0]) / len(ranks)
r50 = 100.0 * len(numpy.where(ranks < 50)[0]) / len(ranks)
r70 = 100.0 * len(numpy.where(ranks < 70)[0]) / len(ranks)
r100 = 100.0 * len(numpy.where(ranks < 100)[0]) / len(ranks)
medr = numpy.floor(numpy.median(ranks)) + 1
meanr = ranks.mean() + 1
if return_ranks:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr), (ranks, top1)
else:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr)
def t2i(images, captions, npts=None, measure='cosine', return_ranks=False):
"""
Text->Images (Image Search)
Images: (5N, K) matrix of images
Captions: (5N, K) matrix of captions
"""
if npts is None:
npts = int(images.shape[0] / 5)
ims = numpy.array([images[i] for i in range(0, len(images), 5)])
ranks = numpy.zeros(5 * npts)
top1 = numpy.zeros(5 * npts)
for index in range(npts):
# Get query captions
queries = captions[5 * index:5 * index + 5]
# Compute scores
d = numpy.dot(queries, ims.T)
inds = numpy.zeros(d.shape)
for i in range(len(inds)):
inds[i] = numpy.argsort(d[i])[::-1]
ranks[5 * index + i] = numpy.where(inds[i] == index)[0][0]
top1[5 * index + i] = inds[i][0]
# Compute metrics
r1 = 100.0 * len(numpy.where(ranks < 1)[0]) / len(ranks)
r5 = 100.0 * len(numpy.where(ranks < 5)[0]) / len(ranks)
r10 = 100.0 * len(numpy.where(ranks < 10)[0]) / len(ranks)
r20 = 100.0 * len(numpy.where(ranks < 20)[0]) / len(ranks)
r50 = 100.0 * len(numpy.where(ranks < 50)[0]) / len(ranks)
r70 = 100.0 * len(numpy.where(ranks < 70)[0]) / len(ranks)
r100 = 100.0 * len(numpy.where(ranks < 100)[0]) / len(ranks)
medr = numpy.floor(numpy.median(ranks)) + 1
meanr = ranks.mean() + 1
if return_ranks:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr), (ranks, top1)
else:
return (r1, r5, r10, r20, r50, r70, r100, medr, meanr)'''
Binary file added framework.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added meter/__init__.py
Empty file.
173 changes: 173 additions & 0 deletions meter/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
from sacred import Experiment

ex = Experiment("METER")


def _loss_names(d):
ret = {
"itm": 0,
"mlm": 0,
"mpp": 0,
"vqa": 0,
"vcr": 0,
"vcr_qar": 0,
"nlvr2": 0,
"irtr": 0,
"contras": 0,
"snli": 0,
}
ret.update(d)
return ret


@ex.config
def config():
exp_name = "finetune_irtr_f30k"
seed = 0
datasets = "f30k"
loss_names = _loss_names({"irtr": 1})
batch_size = 58 # this is a desired batch size; pl trainer will accumulate gradients when per step batch is smaller.
margin = 0.2

# Image setting
#train_transform_keys = ["clip"]
#val_transform_keys = ["clip"]
image_size = 224
patch_size = 32
#draw_false_image = 1
image_only = False

# Text Setting
#vqav2_label_size = 3129
max_text_len = 32
tokenizer = "bert-base-uncased"
vocab_size = 30522
whole_word_masking = False # note that whole_word_masking does not work for RoBERTa
mlm_prob = 0.15
#draw_false_text = 0

# Transformer Setting
num_top_layer = 6
input_image_embed_size = 1024
input_text_embed_size = 768
vit = "swin_base_patch4_window7_224_in22k"
hidden_size = 768
num_heads = 12
num_layers = 6
mlp_ratio = 4
drop_rate = 0.1

# Optimizer Setting
optim_type = "adamw"
learning_rate = 1e-4
lr_update = 10
weight_decay = 0.01
decay_power = 1
max_epoch = 100
max_steps = None
warmup_steps = 10000
end_lr = 0
lr_mult_head = 5 # multiply lr for downstream heads
lr_mult_cross_modal = 5 # multiply lr for the cross-modal module

# Downstream Setting
get_recall_metric = False

# PL Trainer Setting
resume_from = None
fast_dev_run = False
val_check_interval = 1.0
test_only = False
checkpoint = '/data3/lihaoxuan/New_Time/TKDE/github/runs/i2t_freeze/epoch=68-step=172499-v1.ckpt'

# below params varies with the environment
data_root = '/data1/lihaoxuan/orignal-datasets/'
log_dir = "result"
per_gpu_batchsize = 58 # you should define this manually with per_gpu_batch_size=#
num_gpus = 1
num_nodes = 1
load_path = ""
num_workers = 8
precision = 16

#SCAN
direction = 'i2t'
lambda_softmax = 9


@ex.named_config
def coco_config():
exp_name = "finetune_irtr_coco"
seed = 0
datasets = "coco"
loss_names = _loss_names({"irtr": 1})
batch_size = 58 # this is a desired batch size; pl trainer will accumulate gradients when per step batch is smaller.
margin = 0.2

# Image setting
#train_transform_keys = ["clip"]
#val_transform_keys = ["clip"]
image_size = 224
patch_size = 32
#draw_false_image = 1
image_only = False

# Text Setting
#vqav2_label_size = 3129
max_text_len = 32
tokenizer = "bert-base-uncased"
vocab_size = 30522
whole_word_masking = False # note that whole_word_masking does not work for RoBERTa
mlm_prob = 0.15
#draw_false_text = 0

# Transformer Setting
num_top_layer = 6
input_image_embed_size = 1024
input_text_embed_size = 768
vit = "swin_base_patch4_window7_224_in22k"
hidden_size = 768
num_heads = 12
num_layers = 6
mlp_ratio = 4
drop_rate = 0.1

# Optimizer Setting
optim_type = "adamw"
learning_rate = 1e-4
lr_update = 10
weight_decay = 0.01
decay_power = 1
max_epoch = 100
max_steps = None
warmup_steps = 10000
end_lr = 0
lr_mult_head = 5 # multiply lr for downstream heads
lr_mult_cross_modal = 5 # multiply lr for the cross-modal module

# Downstream Setting
get_recall_metric = False

# PL Trainer Setting
resume_from = None
fast_dev_run = False
val_check_interval = 1.0
test_only = False
checkpoint = '/data3/lihaoxuan/New_Time/TKDE/github/runs/i2t_freeze/last.ckpt'

# below params varies with the environment
data_root = '/data1/lihaoxuan/orignal-datasets/'
log_dir = "result"
per_gpu_batchsize = 58 # you should define this manually with per_gpu_batch_size=#
num_gpus = 1
num_nodes = 1
load_path = ""
num_workers = 8
precision = 16

#SCAN
direction = 'i2t'
lambda_softmax = 9



19 changes: 19 additions & 0 deletions meter/datamodules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from .vg_caption_datamodule import VisualGenomeCaptionDataModule
from .f30k_caption_karpathy_datamodule import F30KCaptionKarpathyDataModule
from .coco_caption_karpathy_datamodule import CocoCaptionKarpathyDataModule
from .conceptual_caption_datamodule import ConceptualCaptionDataModule
from .sbu_datamodule import SBUCaptionDataModule
from .vqav2_datamodule import VQAv2DataModule
from .nlvr2_datamodule import NLVR2DataModule
from .snli_datamodule import SNLIDataModule

_datamodules = {
"vg": VisualGenomeCaptionDataModule,
"f30k": F30KCaptionKarpathyDataModule,
"coco": CocoCaptionKarpathyDataModule,
"gcc": ConceptualCaptionDataModule,
"sbu": SBUCaptionDataModule,
"vqa": VQAv2DataModule,
"nlvr2": NLVR2DataModule,
"snli": SNLIDataModule,
}
19 changes: 19 additions & 0 deletions meter/datamodules/coco_caption_karpathy_datamodule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from ..datasets import CocoCaptionKarpathyDataset
from .datamodule_base import BaseDataModule


class CocoCaptionKarpathyDataModule(BaseDataModule):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@property
def dataset_cls(self):
return CocoCaptionKarpathyDataset

@property
def dataset_cls_no_false(self):
return CocoCaptionKarpathyDataset

@property
def dataset_name(self):
return "coco"
Loading

0 comments on commit 9209365

Please sign in to comment.