-
Notifications
You must be signed in to change notification settings - Fork 24
/
mmagibench.py
68 lines (58 loc) · 2.07 KB
/
mmagibench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import base64
import io
import random
import pandas as pd
# from mmengine.dataset import Compose
from PIL import Image
from torch.utils.data import Dataset
def decode_base64_to_image(base64_string):
image_data = base64.b64decode(base64_string)
image = Image.open(io.BytesIO(image_data))
return image
class MMAGIBenchDataset(Dataset):
def __init__(self,
data_file,
sys_prompt='There are several options:'):
self.df = pd.read_csv(data_file, sep='\t')
# self.pipeline = Compose(pipeline)
self.sys_prompt = sys_prompt
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
index = self.df.iloc[idx]['index']
image = self.df.iloc[idx]['image']
image = decode_base64_to_image(image)
question = self.df.iloc[idx]['question']
answer = self.df.iloc[idx]['answer'] if 'answer' in self.df.iloc[0].keys() else None
catetory = self.df.iloc[idx]['category']
l2_catetory = self.df.iloc[idx]['l2-category']
option_candidate = ['A', 'B', 'C', 'D', 'E']
options = {
cand: self.load_from_df(idx, cand)
for cand in option_candidate
if self.load_from_df(idx, cand) is not None
}
options_prompt = f'{self.sys_prompt}\n'
for key, item in options.items():
options_prompt += f'{key}. {item}\n'
# trim last \n
options_prompt = options_prompt[:-1]
hint = self.load_from_df(idx, 'hint')
data = {
'img': image,
'question': question,
'answer': answer,
'options': options_prompt,
'category': catetory,
'l2-category': l2_catetory,
'options_dict': options,
'index': index,
'context': hint,
}
# data = self.pipeline(data)
return data
def load_from_df(self, idx, key):
if key in self.df.iloc[idx] and not pd.isna(self.df.iloc[idx][key]):
return self.df.iloc[idx][key]
else:
return None