Skip to content

Commit

Permalink
Deprecate Support for Dolly, Nous-Hermes, Redpajama-Incite, Vicuna an…
Browse files Browse the repository at this point in the history
…d H2O Danube Models. (Lightning-AI#1821)
  • Loading branch information
ParagEkbote authored Nov 11, 2024
1 parent ec02064 commit 33eab00
Show file tree
Hide file tree
Showing 8 changed files with 2 additions and 649 deletions.
7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ Every model is written from scratch to maximize performance and remove layers of
|----|----|----|----|
| CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
| Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
| Danube2 | 1.8B | H2O.ai | [H2O.ai](https://h2o.ai/platform/danube-1-8b/) |
| Dolly | 3B, 7B, 12B | Databricks | [Conover et al. 2023](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) |
| Falcon | 7B, 40B, 180B | TII UAE | [TII 2023](https://falconllm.tii.ae) |
| FreeWilly2 (Stable Beluga 2) | 70B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stable-beluga-large-instruction-fine-tuned-models) |
| Function Calling Llama 2 | 7B | Trelis | [Trelis et al. 2023](https://huggingface.co/Trelis/Llama-2-7b-chat-hf-function-calling-v2) |
Expand All @@ -126,23 +124,20 @@ Every model is written from scratch to maximize performance and remove layers of
| Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288) |
| Llama 3.1 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3) |
| Llama 3.2 | 1B, 3B | Meta AI | [Meta AI 2024](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/) |
| LongChat | 7B, 13B | LMSYS | [LongChat Team 2023](https://lmsys.org/blog/2023-06-29-longchat/) |
| Mathstral | 7B | Mistral AI | [Mistral AI 2024](https://mistral.ai/news/mathstral/) |
| MicroLlama | 300M | Ken Wang | [MicroLlama repo](https://github.com/keeeeenw/MicroLlama) |
| Mixtral MoE | 8x7B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/mixtral-of-experts/) |
| Mistral | 7B, 123B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/announcing-mistral-7b/) |
| Nous-Hermes | 7B, 13B, 70B | NousResearch | [Org page](https://huggingface.co/NousResearch) |
| OpenLLaMA | 3B, 7B, 13B | OpenLM Research | [Geng & Liu 2023](https://github.com/openlm-research/open_llama) |
| Phi 1.5 & 2 | 1.3B, 2.7B | Microsoft Research | [Li et al. 2023](https://arxiv.org/abs/2309.05463) |
| Phi 3 | 3.8B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2404.14219) |
| Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
| Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
| RedPajama-INCITE | 3B, 7B | Together | [Together 2023](https://together.ai/blog/redpajama-models-v1) |
| StableCode | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
| StableLM | 3B, 7B | Stability AI | [Stability AI 2023](https://github.com/Stability-AI/StableLM) |
| StableLM Zephyr | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
| TinyLlama | 1.1B | Zhang et al. | [Zhang et al. 2023](https://github.com/jzhang38/TinyLlama) |
| Vicuna | 7B, 13B, 33B | LMSYS | [Li et al. 2023](https://lmsys.org/blog/2023-03-30-vicuna/) |


**Tip**: You can list all available models by running the `litgpt download list` command.

Expand Down
332 changes: 0 additions & 332 deletions litgpt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,85 +368,6 @@ def norm_class(self) -> Type:
configs.append(copy)


###################
# databricks Dolly
###################
dolly = [
# https://huggingface.co/databricks/dolly-v2-3b/blob/main/config.json
dict(
name="dolly-v2-3b",
hf_config=dict(org="databricks", name="dolly-v2-3b"),
block_size=2048,
n_layer=32,
n_embd=2560,
padded_vocab_size=50280,
),
# https://huggingface.co/databricks/dolly-v2-7b/blob/main/config.json
dict(
name="dolly-v2-7b",
hf_config=dict(org="databricks", name="dolly-v2-7b"),
block_size=2048,
n_layer=32,
padded_vocab_size=50280,
),
# https://huggingface.co/databricks/dolly-v2-12b/blob/main/config.json
dict(
name="dolly-v2-12b",
hf_config=dict(org="databricks", name="dolly-v2-12b"),
block_size=2048,
n_layer=36,
n_embd=5120,
n_head=40,
padded_vocab_size=50280,
),
]
configs.extend(dolly)


####################################
# togethercomputer RedPajama INCITE
####################################
redpajama_incite = [
# https://huggingface.co/togethercomputer/RedPajama-INCITE-Base-3B-v1/blob/main/config.json
dict(
name="RedPajama-INCITE-{}-3B-v1",
hf_config=dict(org="togethercomputer", name="RedPajama-INCITE-{}-3B-v1"),
block_size=2048,
n_layer=32,
n_embd=2560,
padding_multiple=256,
rotary_percentage=1.0,
parallel_residual=False,
),
# https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Base/blob/main/config.json
dict(
name="RedPajama-INCITE-7B-{}",
hf_config=dict(org="togethercomputer", name="RedPajama-INCITE-7B-{}"),
block_size=2048,
n_layer=32,
padding_multiple=256,
rotary_percentage=1.0,
parallel_residual=False,
),
# this redirects to the checkpoint above. kept for those who had the old weights already downloaded
dict(
name="RedPajama-INCITE-{}-7B-v0.1",
hf_config=dict(org="togethercomputer", name="RedPajama-INCITE-{}-7B-v0.1"),
block_size=2048,
n_layer=32,
padding_multiple=256,
rotary_percentage=1.0,
parallel_residual=False,
),
]
for c in redpajama_incite:
for kind in ("Base", "Chat", "Instruct"):
copy = deepcopy(c)
copy["name"] = c["name"].format(kind)
copy["hf_config"]["name"] = c["hf_config"]["name"].format(kind)
configs.append(copy)


#################
# TII UAE Falcon
#################
Expand Down Expand Up @@ -569,232 +490,6 @@ def norm_class(self) -> Type:
]
configs.extend(open_LLaMA)


###############
# LMSYS Vicuna
###############
vicuna = [
# https://huggingface.co/lmsys/vicuna-7b-v1.3/blob/main/config.json
dict(
name="vicuna-7b-v1.3",
hf_config=dict(org="lmsys", name="vicuna-7b-v1.3"),
block_size=2048,
vocab_size=32000,
padding_multiple=64,
n_layer=32,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=11008,
),
# https://huggingface.co/lmsys/vicuna-13b-v1.3/blob/main/config.json
dict(
name="vicuna-13b-v1.3",
hf_config=dict(org="lmsys", name="vicuna-13b-v1.3"),
block_size=2048,
vocab_size=32000,
padding_multiple=64,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
),
# https://huggingface.co/lmsys/vicuna-33b-v1.3/blob/main/config.json
dict(
name="vicuna-33b-v1.3",
hf_config=dict(org="lmsys", name="vicuna-33b-v1.3"),
block_size=2048,
vocab_size=32000,
padding_multiple=64,
n_layer=60,
n_head=52,
n_embd=6656,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=17920,
),
# https://huggingface.co/lmsys/vicuna-7b-v1.5/blob/main/config.json
dict(
name="vicuna-7b-v1.5",
hf_config=dict(org="lmsys", name="vicuna-7b-v1.5"),
vocab_size=32000,
padding_multiple=64,
n_layer=32,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
mlp_class_name="LLaMAMLP",
intermediate_size=11008,
),
# https://huggingface.co/lmsys/vicuna-7b-v1.5-16k/blob/main/config.json
dict(
name="vicuna-7b-v1.5-16k",
hf_config=dict(org="lmsys", name="vicuna-7b-v1.5-16k"),
block_size=16384,
vocab_size=32000,
padding_multiple=64,
n_layer=32,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
mlp_class_name="LLaMAMLP",
intermediate_size=11008,
rope_condense_ratio=4,
),
# https://huggingface.co/lmsys/vicuna-13b-v1.5/blob/main/config.json
dict(
name="vicuna-13b-v1.5",
hf_config=dict(org="lmsys", name="vicuna-13b-v1.5"),
vocab_size=32000,
padding_multiple=64,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
),
# https://huggingface.co/lmsys/vicuna-13b-v1.5-16k/blob/main/config.json
dict(
name="vicuna-13b-v1.5-16k",
hf_config=dict(org="lmsys", name="vicuna-13b-v1.5-16k"),
block_size=16384,
vocab_size=32000,
padding_multiple=64,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
rope_condense_ratio=4,
),
]
configs.extend(vicuna)


#################
# LMSYS LongChat
#################
long_chat = [
# https://huggingface.co/lmsys/longchat-7b-16k/blob/main/config.json
dict(
name="longchat-7b-16k",
hf_config=dict(org="lmsys", name="longchat-7b-16k"),
block_size=16384,
vocab_size=32000,
padding_multiple=64,
n_layer=32,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=11008,
rope_condense_ratio=8,
),
# https://huggingface.co/lmsys/longchat-13b-16k/blob/main/config.json
dict(
name="longchat-13b-16k",
hf_config=dict(org="lmsys", name="longchat-13b-16k"),
block_size=16384,
vocab_size=32000,
padding_multiple=64,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
rope_condense_ratio=8,
),
]
configs.extend(long_chat)


######################
# NousResearch Hermes
######################
nous_research = [
# https://huggingface.co/NousResearch/Nous-Hermes-llama-2-7b/blob/main/config.json
dict(
name="Nous-Hermes-llama-2-7b",
hf_config=dict(org="NousResearch", name="Nous-Hermes-llama-2-7b"),
padded_vocab_size=32000,
n_layer=32,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-05,
mlp_class_name="LLaMAMLP",
intermediate_size=11008,
),
# https://huggingface.co/NousResearch/Nous-Hermes-13B/blob/main/config.json
dict(
name="Nous-Hermes-13b",
hf_config=dict(org="NousResearch", name="Nous-Hermes-13b"),
block_size=2048,
vocab_size=32000,
padded_vocab_size=32001,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-6,
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
),
# https://huggingface.co/NousResearch/Nous-Hermes-Llama2-13b
dict(
name="Nous-Hermes-Llama2-13b",
hf_config=dict(org="NousResearch", name="Nous-Hermes-Llama2-13b"),
vocab_size=32000,
padded_vocab_size=32032,
n_layer=40,
n_head=40,
n_embd=5120,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
norm_eps=1e-05,
mlp_class_name="LLaMAMLP",
intermediate_size=13824,
),
]
configs.extend(nous_research)


###############
# Meta LLaMA 2
###############
Expand Down Expand Up @@ -1189,33 +884,6 @@ def norm_class(self) -> Type:
]
configs.extend(codegemma)

################
# H2Oai Danube2
################
danube2 = [
# https://huggingface.co/h2oai/h2o-danube2-1.8b-chat/blob/main/config.json
dict(
name="Danube2-1.8b-chat",
hf_config=dict(org="h2oai", name="h2o-danube2-1.8b-chat"),
vocab_size=32000,
n_layer=24,
n_head=32,
n_embd=2560,
block_size=4096, # should be 8192 but sliding_window mechanism is not implemented
intermediate_size=6912,
padding_multiple=64,
norm_eps=1e-05,
rope_base=10000,
n_query_groups=8,
rotary_percentage=1.0,
parallel_residual=False,
bias=False,
norm_class_name="RMSNorm",
mlp_class_name="LLaMAMLP",
)
]
configs.extend(danube2)


##########################
# Stability AI FreeWilly2
Expand Down
Loading

0 comments on commit 33eab00

Please sign in to comment.