Skip to content

Commit

Permalink
[Feat] add gemini api for video and continual mode to gemini api mode…
Browse files Browse the repository at this point in the history
…l, fix all top_p=0 to top_p=1. (EvolvingLMMs-Lab#96)

* Update GeminiAPI class to support continual mode and cache API responses

* Add review field to evaluated results

* Add delay before returning uploaded object in encode_video method

* Update generation_kwargs in YAML files

---------

Co-authored-by: Fanyi Pu <[email protected]>
  • Loading branch information
Luodian and pufanyi authored May 26, 2024
1 parent bc4b541 commit 174d8e1
Show file tree
Hide file tree
Showing 42 changed files with 79 additions and 48 deletions.
2 changes: 1 addition & 1 deletion docs/task_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
# The return value of process_results will be used by metrics
Expand Down
39 changes: 36 additions & 3 deletions lmms_eval/models/gemini_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import time
import logging
import json

from PIL import Image
from typing import List, Tuple
Expand All @@ -11,13 +12,12 @@
from lmms_eval.api.instance import Instance
from accelerate import Accelerator, DistributedType


eval_logger = logging.getLogger("lmms-eval")

try:
import google.generativeai as genai

NUM_SECONDS_TO_SLEEP = 5
NUM_SECONDS_TO_SLEEP = 30
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

Expand All @@ -33,15 +33,31 @@ def __init__(
model_version: str = "gemini-1.5-flash-latest",
modality: str = "image",
timeout: int = 120,
continual_mode: bool = False,
response_persistent_folder: str = None, # We will cache the Gemini API response in this path and use it for future requests
**kwargs,
) -> None:
super().__init__()
self.model_version = model_version
self.timeout = timeout
self.model = genai.GenerativeModel(model_version)
self.continual_mode = continual_mode
if self.continual_mode and response_persistent_folder is None:
raise ValueError("Continual mode requires a persistent path for the response. We will cache the Gemini API response in this path and use it for future requests. Please provide a valid path.")
self.response_persistent_folder = response_persistent_folder
self.response_persistent_file = os.path.join(self.response_persistent_folder, f"{self.model_version}_response.json")

if os.path.exists(self.response_persistent_file):
with open(self.response_persistent_file, "r") as f:
self.response_cache = json.load(f)
self.cache_mode = "resume"
else:
self.response_cache = {}
self.cache_mode = "start"

accelerator = Accelerator()
if accelerator.num_processes > 1:
assert self.continual_mode is False, "Continual mode is not supported with distributed inference."
assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."
self.accelerator = accelerator
if self.accelerator.is_local_main_process:
Expand Down Expand Up @@ -77,7 +93,9 @@ def get_image_size(self, image):
return img_size

def encode_video(self, video_path):
return genai.upload_file(path=video_path)
uploaded_obj = genai.upload_file(path=video_path)
time.sleep(5)
return uploaded_obj

def convert_video(self, images):
for idx, img in enumerate(images):
Expand Down Expand Up @@ -109,6 +127,14 @@ def generate_until(self, requests) -> List[str]:

message = [contexts] + visuals

if self.continual_mode is True and self.cache_mode == "resume":
if doc_id in self.response_cache:
doc_uuid = str(doc_id)
content = self.response_cache[doc_uuid]
res.append(content)
pbar.update(1)
continue

for attempt in range(5):
try:
content = self.model.generate_content(message, generation_config=config)
Expand All @@ -123,6 +149,13 @@ def generate_until(self, requests) -> List[str]:
content = ""
res.append(content)
pbar.update(1)

if self.continual_mode is True: # Cache the response
doc_uuid = str(doc_id)
self.response_cache[doc_uuid] = content
with open(self.response_persistent_file, "w") as f:
json.dump(self.response_cache, f)

pbar.close()
return res

Expand Down
3 changes: 0 additions & 3 deletions lmms_eval/models/llava_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ def _collate(x):
gen_kwargs["top_p"] = 1.0
if "num_beams" not in gen_kwargs:
gen_kwargs["num_beams"] = 1
if gen_kwargs["top_p"] == 0.0:
gen_kwargs["top_p"] = 1.0
gen_kwargs["temperature"] = 0.0
assert gen_kwargs["num_beams"] == 1

def save_image_to_temp_file(image):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ generation_kwargs:
image_aspect_ratio: original
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
2 changes: 1 addition & 1 deletion lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.coco_test_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/coco_cap/coco2014_cap_val.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.coco_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.coco_test_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/coco_cap/coco2017_cap_val.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.coco_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/ferret/ferret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ generation_kwargs:
image_aspect_ratio: original
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.ferret_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/flickr30k/flickr30k_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.flickr_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/gqa/gqa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
metric_list:
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/hallusion_bench/hallusion_bench_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ model_specific_prompt_kwargs:
generation_kwargs:
max_new_tokens: 128
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
metric_list:
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/internal_eval/d170_cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function d170_cn_utils.process_results # apply gpt eval here
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/internal_eval/d170_en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function d170_en_utils.process_results # apply gpt eval here
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/internal_eval/dc100_en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function dc100_en_utils.process_results # apply gpt eval here
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/internal_eval/dc200_cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function dc200_cn_utils.process_results # apply gpt eval here
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/livebench/livebench.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.livebench_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ generation_kwargs:
image_aspect_ratio: original
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
process_results: !function utils.llava_process_results
metric_list:
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ generation_kwargs:
image_aspect_ratio: original
max_new_tokens: 32768
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.llava_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/llava_wilder/_default_template_wilder_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ generation_kwargs:
image_aspect_ratio: original
max_new_tokens: 4096
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.llava_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mathvista/mathvista_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.mathvista_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mathvista/mathvista_testmini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.mathvista_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mmbench/_default_template_mmbench_cn_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ doc_to_text: !function cn_utils.mmbench_doc_to_text
generation_kwargs:
max_new_tokens: 256
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function cn_utils.mmbench_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mmbench/_default_template_mmbench_en_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mmbench/mmbench_cc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 256
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function cc_utils.mmbench_cn_cc_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mme/mme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
# The return value of process_results will be used by metrics
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/mmvet/mmvet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 32768
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.mmvet_process_results # apply gpt eval here
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/nocaps/nocaps_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "annotations_captions"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.nocaps_test_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/nocaps/nocaps_val.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "annotations_captions"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.nocaps_process_result
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/ocrbench/ocrbench.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.ocrbench_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/olympiadbench/olympiadbench_test_cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function cn_utils.olympiadbench_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/olympiadbench/olympiadbench_test_en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ generation_kwargs:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function en_utils.olympiadbench_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/pope/pope.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 128
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.pope_process_results
Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/realworldqa/realworldqa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false

Expand Down
2 changes: 1 addition & 1 deletion lmms_eval/tasks/synthdog/synthdog_en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ doc_to_target: !function utils.synthdog_doc_to_target
generation_kwargs:
max_new_tokens: 1024
temperature: 0
top_p: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.synthdog_process_results
Expand Down
Loading

0 comments on commit 174d8e1

Please sign in to comment.