forked from Vision-CAIR/MiniGPT-4
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request Vision-CAIR#406 from junchen14/main
add evaluation script
- Loading branch information
Showing
27 changed files
with
2,051 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
model: | ||
arch: minigpt_v2 | ||
model_type: pretrain | ||
max_txt_len: 500 | ||
end_sym: "</s>" | ||
low_resource: False | ||
prompt_template: '[INST] {} [/INST]' | ||
llama_model: "" | ||
ckpt: "" | ||
lora_r: 64 | ||
lora_alpha: 16 | ||
|
||
|
||
datasets: | ||
cc_sbu_align: | ||
vis_processor: | ||
train: | ||
name: "blip2_image_eval" | ||
image_size: 448 | ||
text_processor: | ||
train: | ||
name: "blip_caption" | ||
|
||
evaluation_datasets: | ||
refcoco: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
refcocog: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
refcoco+: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
gqa: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
okvqa: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
vizwiz: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
iconvqa: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
vsr: | ||
eval_file_path: cambridgeltl/vsr_zeroshot | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 10 | ||
hm: | ||
eval_file_path: /path/to/eval/annotation/path | ||
img_path: /path/to/eval/image/path | ||
max_new_tokens: 20 | ||
batch_size: 100 | ||
|
||
run: | ||
task: image_text_pretrain | ||
name: minigptv2_evaluation | ||
save_path: /path/to/save/folder_path | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
## Evaluation Instruction for MiniGPT-v2 | ||
|
||
### Data preparation | ||
Images download | ||
Image source | Download path | ||
--- | :---: | ||
OKVQA| <a href="https://drive.google.com/drive/folders/1jxIgAhtaLu_YqnZEl8Ym11f7LhX3nptN?usp=sharing">annotations</a> <a href="http://images.cocodataset.org/zips/train2017.zip"> images</a> | ||
gqa | <a href="https://drive.google.com/drive/folders/1-dF-cgFwstutS4qq2D9CFQTDS0UTmIft?usp=drive_link">annotations</a> <a href="https://downloads.cs.stanford.edu/nlp/data/gqa/images.zip">images</a> | ||
hateful meme | <a href="https://github.com/faizanahemad/facebook-hateful-memes">images and annotations</a> | ||
iconqa | <a href="https://iconqa.github.io/#download">images and annotation</a> | ||
vizwiz | <a href="https://vizwiz.org/tasks-and-datasets/vqa/">images and annotation</a> | ||
RefCOCO | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco.zip"> annotations </a> | ||
RefCOCO+ | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco+.zip"> annotations </a> | ||
RefCOCOg | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcocog.zip"> annotations </a> | ||
|
||
### Evaluation dataset structure | ||
|
||
``` | ||
${MINIGPTv2_EVALUATION_DATASET} | ||
├── gqa | ||
│ └── test_balanced_questions.json | ||
│ ├── testdev_balanced_questions.json | ||
│ ├── gqa_images | ||
├── hateful_meme | ||
│ └── hm_images | ||
│ ├── dev.jsonl | ||
├── iconvqa | ||
│ └── iconvqa_images | ||
│ ├── choose_text_val.json | ||
├── vizwiz | ||
│ └── vizwiz_images | ||
│ ├── val.json | ||
├── vsr | ||
│ └── vsr_images | ||
├── okvqa | ||
│ ├── okvqa_test_split.json | ||
│ ├── mscoco_val2014_annotations_clean.json | ||
│ ├── OpenEnded_mscoco_val2014_questions_clean.json | ||
├── refcoco | ||
│ └── instances.json | ||
│ ├── refs(google).p | ||
│ ├── refs(unc).p | ||
├── refcoco+ | ||
│ └── instances.json | ||
│ ├── refs(unc).p | ||
├── refercocog | ||
│ └── instances.json | ||
│ ├── refs(google).p | ||
│ ├── refs(und).p | ||
... | ||
``` | ||
|
||
|
||
### environment setup | ||
|
||
``` | ||
export PYTHONPATH=$PYTHONPATH:/path/to/directory/of/MiniGPT-4 | ||
``` | ||
|
||
### config file setup | ||
|
||
Set **llama_model** to the path of LLaMA model. | ||
Set **ckpt** to the path of our pretrained model. | ||
Set **eval_file_path** to the path of the annotation files for each evaluation data. | ||
Set **img_path** to the img_path for each evaluation dataset. | ||
Set **save_path** to the save_path for evch evaluation dataset. | ||
|
||
in [minigpt4/eval_configs/minigptv2_benchmark_evaluation.yaml](../minigpt4/eval_configs/minigptv2_benchmark_evaluation.yaml) | ||
|
||
|
||
|
||
|
||
### start evalauting RefCOCO, RefCOCO+, RefCOCOg | ||
port=port_number | ||
cfg_path=/path/to/eval_configs/minigptv2_benchmark_evaluation.yaml | ||
|
||
dataset names: | ||
| refcoco | refcoco+ | refcocog | | ||
| ------- | -------- | -------- | | ||
|
||
``` | ||
torchrun --master-port ${port} --nproc_per_node 1 eval_ref.py \ | ||
--cfg-path ${cfg_path} --dataset refcoco,refcoco+,refcocog --resample | ||
``` | ||
|
||
|
||
### start evaluating visual question answering | ||
|
||
port=port_number | ||
cfg_path=/path/to/eval_configs/minigptv2_benchmark_evaluation.yaml | ||
|
||
dataset names: | ||
| okvqa | vizwiz | iconvqa | gqa | vsr | hm | | ||
| ------- | -------- | -------- |-------- | -------- | -------- | | ||
|
||
|
||
``` | ||
torchrun --master-port ${port} --nproc_per_node 1 eval_vqa.py \ | ||
--cfg-path ${cfg_path} --dataset okvqa,vizwiz,iconvqa,gqa,vsr,hm | ||
``` | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash --login | ||
|
||
export PYTHONPATH=$PYTHONPATH:$(pwd) | ||
|
||
cfg_path=eval_configs/minigpt4_llama2_eval.yaml | ||
CKPT=YOUR_CKPT_PATH | ||
NAME=EXP_NAME | ||
IMG_PATH=YOUR_IMG_PATH | ||
EVAL_FILE_PATH=YOUR_EVAL_FILE_PATH | ||
|
||
# torchrun --nproc_per_node 1 eval_scripts/eval_ref.py --name ${NAME} \ | ||
# --cfg-path ${cfg_path} \ | ||
# --ckpt ${CKPT} --dataset refcoco,refcoco+,refcocog --lora_r 64 --lora_alpha 16 \ | ||
# --batch_size 64 --max_new_tokens 20 --resample --img_path ${IMG_PATH} --eval_file_path ${EVAL_FILE_PATH} | ||
|
||
# torchrun --nproc_per_node 1 eval_scripts/eval_vqa.py --name ${NAME} \ | ||
# --cfg-path ${cfg_path} \ | ||
# --ckpt ${CKPT} --split val,test --dataset okvqa,vizwiz,iconqa,gqa,vsr,hm --lora_r 64 --lora_alpha 16 \ | ||
# --batch_size 32 --max_new_tokens 20 --resample | ||
|
||
|
||
torchrun --nproc_per_node 1 eval_scripts/eval_vqa.py --name ${NAME} \ | ||
--cfg-path ${cfg_path} \ | ||
--ckpt ${CKPT} --split val,test --dataset okvqa --lora_r 64 --lora_alpha 16 \ | ||
--batch_size 32 --max_new_tokens 20 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import os | ||
import re | ||
import json | ||
import argparse | ||
from collections import defaultdict | ||
import random | ||
import numpy as np | ||
from PIL import Image | ||
from tqdm import tqdm | ||
import torch | ||
from torch.utils.data import DataLoader | ||
from minigpt4.common.config import Config | ||
from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser, computeIoU | ||
from minigpt4.conversation.conversation import CONV_VISION_minigptv2 | ||
|
||
from minigpt4.datasets.datasets.coco_caption import RefCOCOEvalData | ||
|
||
def list_of_str(arg): | ||
return list(map(str, arg.split(','))) | ||
|
||
parser = eval_parser() | ||
parser.add_argument("--dataset", type=list_of_str, default='refcoco', help="dataset to evaluate") | ||
parser.add_argument("--res", type=float, default=100.0, help="resolution used in refcoco") | ||
parser.add_argument("--resample", action='store_true', help="resolution used in refcoco") | ||
args = parser.parse_args() | ||
|
||
cfg = Config(args) | ||
|
||
eval_dict = {'refcoco': ['val','testA','testB'], | ||
'refcoco+': ['val','testA','testB'], | ||
'refcocog': ['val','test']} | ||
|
||
|
||
model, vis_processor = init_model(args) | ||
model.eval() | ||
CONV_VISION = CONV_VISION_minigptv2 | ||
conv_temp = CONV_VISION.copy() | ||
conv_temp.system = "" | ||
|
||
# | ||
model.eval() | ||
save_path = cfg.run_cfg.save_path | ||
|
||
|
||
|
||
for dataset in args.dataset: | ||
for split in eval_dict[dataset]: | ||
|
||
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | ||
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | ||
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | ||
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | ||
|
||
with open(os.path.join(eval_file_path,f"{dataset}/{dataset}_{split}.json"), 'r') as f: | ||
refcoco = json.load(f) | ||
|
||
data = RefCOCOEvalData(refcoco, vis_processor, img_path) | ||
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | ||
minigpt4_predict = defaultdict(list) | ||
resamples = [] | ||
|
||
for images, questions, img_ids in tqdm(eval_dataloader): | ||
texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template | ||
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | ||
for answer, img_id, question in zip(answers, img_ids, questions): | ||
answer = answer.replace("<unk>","").replace(" ","").strip() | ||
pattern = r'\{<\d{1,3}><\d{1,3}><\d{1,3}><\d{1,3}>\}' | ||
if re.match(pattern, answer): | ||
minigpt4_predict[img_id].append(answer) | ||
else: | ||
resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] give me the location of','').strip()]}) | ||
if args.resample: | ||
for i in range(20): | ||
data = RefCOCOEvalData(resamples, vis_processor, img_path) | ||
resamples = [] | ||
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | ||
for images, questions, img_ids in tqdm(eval_dataloader): | ||
texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template | ||
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | ||
for answer, img_id, question in zip(answers, img_ids, questions): | ||
answer = answer.replace("<unk>","").replace(" ","").strip() | ||
pattern = r'\{<\d{1,3}><\d{1,3}><\d{1,3}><\d{1,3}>\}' | ||
if re.match(pattern, answer) or i == 4: | ||
minigpt4_predict[img_id].append(answer) | ||
else: | ||
resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] give me the location of','').strip()]}) | ||
|
||
if len(resamples) == 0: | ||
break | ||
|
||
file_save_path = os.path.join(save_path,f"{args.dataset}_{split}.json") | ||
with open(file_save_path,'w') as f: | ||
json.dump(minigpt4_predict, f) | ||
|
||
count=0 | ||
total=len(refcoco) | ||
res=args.res | ||
refcoco_dict = defaultdict() | ||
for item in refcoco: | ||
refcoco_dict[item['img_id']] = item | ||
for img_id in refcoco_dict: | ||
item = refcoco_dict[img_id] | ||
bbox = item['bbox'] | ||
outputs = minigpt4_predict[img_id] | ||
for output in outputs: | ||
try: | ||
integers = re.findall(r'\d+', output) | ||
pred_bbox = [int(num) for num in integers] | ||
height = item['height'] | ||
width = item['width'] | ||
pred_bbox[0] = pred_bbox[0] / res * width | ||
pred_bbox[1] = pred_bbox[1] / res * height | ||
pred_bbox[2] = pred_bbox[2] / res * width | ||
pred_bbox[3] = pred_bbox[3] / res * height | ||
|
||
gt_bbox = [0,0,0,0] | ||
gt_bbox[0] = bbox[0] | ||
gt_bbox[1] = bbox[1] | ||
gt_bbox[2] = bbox[0] + bbox[2] | ||
gt_bbox[3] = bbox[1] + bbox[3] | ||
|
||
iou_score = computeIoU(pred_bbox, gt_bbox) | ||
if iou_score > 0.5: | ||
count+=1 | ||
except: | ||
continue | ||
|
||
print(f'{dataset} {split}:', count / total * 100, flush=True) |
Oops, something went wrong.