Skip to content

Commit

Permalink
[Model] Udpate default prefill chunk size and max batch size (mlc-ai#…
Browse files Browse the repository at this point in the history
…2917)

This PR updates the default prefill chunk size from 2048 to 8192,
and the default max batch size from 80 to 128.
  • Loading branch information
MasterJH5574 authored Sep 18, 2024
1 parent 1eabc65 commit 66fd62d
Show file tree
Hide file tree
Showing 21 changed files with 79 additions and 79 deletions.
2 changes: 1 addition & 1 deletion python/mlc_llm/cli/gen_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _parse_output(path: Union[str, Path]) -> Path:
parser.add_argument(
"--max-batch-size",
type=int,
default=80,
default=128,
help=HELP["max_batch_size"] + ' (default: "%(default)s")',
)
parser.add_argument(
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/baichuan/baichuan_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/chatglm3/chatglm3_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/cohere/cohere_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)

if self.num_key_value_heads == 0 or self.num_key_value_heads is None:
self.num_key_value_heads = self.num_attention_heads
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/gemma/gemma_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/gpt2/gpt2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring,too-many-locals
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/gpt_bigcode/gpt_bigcode_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/gpt_neox/gpt_neox_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/internlm/internlm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/internlm2/internlm2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/llama/llama_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,17 @@ def __post_init__(self): # pylint: disable=too-many-branches
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/minicpm/minicpm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
4 changes: 2 additions & 2 deletions python/mlc_llm/model/mistral/mistral_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def __post_init__(self): # pylint: disable=too-many-branches
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(*prefill_chunk_size_candidates, 2048),
min(*prefill_chunk_size_candidates, 8192),
)
self.prefill_chunk_size = min(*prefill_chunk_size_candidates, 2048)
self.prefill_chunk_size = min(*prefill_chunk_size_candidates, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/orion/orion_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/phi/phi_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
if self.num_key_value_heads == 0 or self.num_key_value_heads is None:
self.num_key_value_heads = self.num_attention_heads
if self.intermediate_size == 0 or self.intermediate_size is None:
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/phi3/phi3_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)

if self.num_key_value_heads == 0 or self.num_key_value_heads is None:
self.num_key_value_heads = self.num_attention_heads
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/phi3v/phi3v_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)

if self.num_key_value_heads == 0 or self.num_key_value_heads is None:
self.num_key_value_heads = self.num_attention_heads
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/qwen/qwen_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring
Expand Down
8 changes: 4 additions & 4 deletions python/mlc_llm/model/qwen2/qwen2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ def __post_init__(self):
logger.info(
"%s defaults to %d",
bold("prefill_chunk_size"),
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)
elif self.prefill_chunk_size > self.context_window_size:
logger.info(
"Overriding %s from %d to %d",
bold("prefill_chunk_size"),
self.prefill_chunk_size,
min(self.context_window_size, 2048),
min(self.context_window_size, 8192),
)
self.prefill_chunk_size = min(self.context_window_size, 2048)
self.prefill_chunk_size = min(self.context_window_size, 8192)


# pylint: disable=invalid-name,missing-docstring,too-many-locals
Expand Down
Loading

0 comments on commit 66fd62d

Please sign in to comment.