forked from YifanXu74/MQ-Det
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mq-glip-l.yaml
165 lines (142 loc) · 3.8 KB
/
mq-glip-l.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
MODEL:
META_ARCHITECTURE: "GeneralizedVLRCNN_New"
WEIGHT: "MODEL/glip_large_model.pth"
RPN_ONLY: True
RPN_ARCHITECTURE: "VLDYHEAD"
BACKBONE:
CONV_BODY: "SWINT-FPN-RETINANET"
OUT_CHANNELS: 256
SWINT:
EMBED_DIM: 192
DEPTHS: (2, 2, 18, 2)
NUM_HEADS: (6, 12, 24, 48)
WINDOW_SIZE: 12
OUT_CHANNELS: (192, 384, 768, 1536)
DROP_PATH_RATE: 0.4
LANGUAGE_BACKBONE:
FREEZE: False
TOKENIZER_TYPE: "bert-base-uncased"
MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
# TOKENIZER_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased"
# MODEL_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased" # "roberta-base", "clip"
MASK_SPECIAL: False
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625, 0.0078125) # TODO: check
POOLER_SAMPLING_RATIO: 0
RPN:
USE_FPN: True
ANCHOR_SIZES: (64, 128, 256, 512, 1024)
ANCHOR_STRIDE: (8, 16, 32, 64, 128)
ASPECT_RATIOS: (1.0,)
SCALES_PER_OCTAVE: 1
DYHEAD:
CHANNELS: 256
NUM_CONVS: 8
USE_GN: True
USE_DYRELU: True
USE_DFCONV: True
USE_DYFUSE: True
TOPK: 9 # topk for selecting candidate positive samples from each level
SCORE_AGG: "MEAN"
LOG_SCALE: 0.0
# USE_CHECKPOINT: True
USE_CHECKPOINT: False
FUSE_CONFIG:
USE_FUSED_FEATURES_DOT_PRODUCT: True
EARLY_FUSE_ON: True
TYPE: "MHA-B"
USE_CLASSIFICATION_LOSS: False
USE_TOKEN_LOSS: False
USE_CONTRASTIVE_ALIGN_LOSS: False
CONTRASTIVE_HIDDEN_DIM: 64
USE_DOT_PRODUCT_TOKEN_LOSS: True
USE_LAYER_SCALE: True
CLAMP_MIN_FOR_UNDERFLOW: True
CLAMP_MAX_FOR_OVERFLOW: True
CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
CLAMP_DOT_PRODUCT: True
TEST:
EVAL_TASK: 'detection'
DURING_TRAINING: False
IMS_PER_BATCH: 8
DATASETS:
TRAIN: ("object365_grounding_train", )
TEST: ("coco_2017_val", )
ONE_HOT: False
FLICKR_COPY: 8 # 0.15 * 8 = ~1.2M
MIXED_COPY: 4 # 0.6 * 4 = ~2.4M
OBJECT365_COPY: 2 # 1.4 * 2 = ~2.8M
VG_COPY: 3 # 0.4 * 3 = ~1.2M
IN_COPY: 2 # 0.67 * 2 = ~1.33M
OI_COPY: 1 # 2M * 1 = 2M
DISABLE_SHUFFLE: False
ADD_DET_PROMPT: False
RANDOM_SAMPLE_NEG: 85
CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
FURTHER_SCREEN: True
CAPTION_CONF: 0.5
CAPTION_NMS: -1.0
CAPTION_MIN_BOX: 1
SEPARATION_TOKENS: ". "
PACK_RANDOM_CAPTION_NUMBER: 20
NO_RANDOM_PACK_PROBABILITY: 0.4
RANDOM_PACK_PROB: 0.5
CAPTION_FORMAT_VERSION: "v2"
EXCLUDE_CROWD: True
SPECIAL_SAFEGUARD_FOR_COCO_GROUNDING: True
INPUT:
PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
PIXEL_STD: [ 57.375, 57.120, 58.395 ]
MIN_SIZE_TRAIN: 800
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
AUGMENT:
MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
DATALOADER:
SIZE_DIVISIBILITY: 32
NUM_WORKERS: 0
SOLVER:
OPTIMIZER: ADAMW
BASE_LR: 0.0001
#### should be modified during fine-tuning #######
GATE_LR: 0.0025
QUERY_LR: 0.00001
#################################################
LANG_LR: 0.00001
WEIGHT_DECAY: 0.01
WEIGHT_DECAY_SCHEDULE: True
# STEPS: (0.67, 0.89)
STEPS: (0.95,)
# MAX_ITER: 1000000
MAX_EPOCH: 1
# IMS_PER_BATCH: 64
IMS_PER_BATCH: 8
WARMUP_ITERS: 2000
WARMUP_FACTOR: 0.001
FIND_UNUSED_PARAMETERS: False
USE_AMP: True
CHECKPOINT_PERIOD: 99999999
CHECKPOINT_PER_EPOCH: 2.0
TUNING_HIGHLEVEL_OVERRIDE: "vision_query"
MAX_TO_KEEP: 4
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 1.0
NORM_TYPE: 2.0
VISION_QUERY:
ENABLED: True
QUERY_BANK_PATH: 'MODEL/object365_query_5000_pool7_sel_large.pth'
PURE_TEXT_RATE: 0.
TEXT_DROPOUT: 0.4
VISION_SCALE: 1.0
NUM_QUERY_PER_CLASS: 5
RANDOM_KSHOT: False
ADD_ADAPT_LAYER: False
CONDITION_GATE: True
NONLINEAR_GATE: True
NO_CAT: True
QUERY_ADDITION_NAME: '_L'