diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index b5bdc1af8b..94211a11d5 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -1,6 +1,6 @@
{
"name": "@janhq/model-extension",
- "version": "1.0.19",
+ "version": "1.0.20",
"description": "Model Management Extension provides model exploration and seamless downloads",
"main": "dist/index.js",
"module": "dist/module.js",
diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json
index 366d7e968c..521fb53018 100644
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Capybara 200k 34B Q5",
"version": "1.0",
- "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.",
+ "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
diff --git a/models/codeninja-1.0-7b/model.json b/models/codeninja-1.0-7b/model.json
index fbffaba6b8..c543f1a401 100644
--- a/models/codeninja-1.0-7b/model.json
+++ b/models/codeninja-1.0-7b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "CodeNinja 7B Q4",
"version": "1.0",
- "description": "CodeNinja is finetuned on openchat/openchat-3.5-1210. It is good for codding tasks",
+ "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Beowolx",
diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json
index 5ddd56ebbd..46f7a80eac 100644
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@@ -5,14 +5,19 @@
"object": "model",
"name": "Deepseek Coder 1.3B Q8",
"version": "1.0",
- "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.",
+ "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json
index e68ec03347..5944706264 100644
--- a/models/deepseek-coder-34b/model.json
+++ b/models/deepseek-coder-34b/model.json
@@ -4,19 +4,24 @@
"object": "model",
"name": "Deepseek Coder 33B Q5",
"version": "1.0",
- "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.",
+ "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Deepseek, The Bloke",
"tags": ["34B", "Foundational Model"],
- "size": 26040000000
+ "size": 19940000000
},
"engine": "nitro"
}
diff --git a/models/dolphin-2.7-mixtral-8x7b/model.json b/models/dolphin-2.7-mixtral-8x7b/model.json
index 01f42d9b34..4b16b50352 100644
--- a/models/dolphin-2.7-mixtral-8x7b/model.json
+++ b/models/dolphin-2.7-mixtral-8x7b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Dolphin 8x7B Q4",
"version": "1.0",
- "description": "This model is an uncensored model based on Mixtral-8x7b. Dolphin is really good at coding",
+ "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Cognitive Computations, TheBloke",
diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json
index 6fe68f6fd3..84e22d1f03 100644
--- a/models/llama2-chat-70b-q4/model.json
+++ b/models/llama2-chat-70b-q4/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Llama 2 Chat 70B Q4",
"version": "1.0",
- "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 70b model.",
+ "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json
index bf291a3867..ec8bb2cd39 100644
--- a/models/llama2-chat-7b-q4/model.json
+++ b/models/llama2-chat-7b-q4/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Llama 2 Chat 7B Q4",
"version": "1.0",
- "description": "This is a 4-bit quantized iteration of Meta AI's Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+ "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "MetaAI, The Bloke",
diff --git a/models/magicoder-s-ds-7b/model.json b/models/magicoder-s-ds-7b/model.json
deleted file mode 100644
index e9b8f2d3b7..0000000000
--- a/models/magicoder-s-ds-7b/model.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- "source_url": "https://huggingface.co/TheBloke/Magicoder-S-DS-6.7B-GGUF/resolve/main/magicoder-s-ds-6.7b.Q4_K_M.gguf",
- "id": "magicoder-s-ds7b",
- "object": "model",
- "name": "Magicoder 7B Q4",
- "version": "1.0",
- "description": "Magicoder is a model family, a novel approach to enlightening LLMs with open-source code snippets for generating low-bias and high-quality instruction data for code.",
- "format": "gguf",
- "settings": {
- "ctx_len": 4096,
- "prompt_template": "@@ Instruction\n{prompt}\n@@ Response"
- },
- "parameters": {
- "max_tokens": 4096
- },
- "metadata": {
- "author": "Ise-uiuc",
- "tags": ["7B", "Code"],
- "size": 4080000000
- },
- "engine": "nitro"
- }
-
\ No newline at end of file
diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json
index 5345b82320..f06bddaad0 100644
--- a/models/mistral-ins-7b-q4/model.json
+++ b/models/mistral-ins-7b-q4/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Mistral Instruct 7B Q4",
"version": "1.0",
- "description": "This is a 4-bit quantized iteration of MistralAI's Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+ "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
- "prompt_template": "[INST]{prompt}\n[/INST]"
+ "prompt_template": "[INST] {prompt} [/INST]"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "MistralAI, The Bloke",
diff --git a/models/mixtral-8x7b-instruct/model.json b/models/mixtral-8x7b-instruct/model.json
index 6377f63f97..5167e83356 100644
--- a/models/mixtral-8x7b-instruct/model.json
+++ b/models/mixtral-8x7b-instruct/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Mixtral 8x7B Instruct Q4",
"version": "1.0",
- "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested.",
+ "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "[INST] {prompt} [/INST]"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "MistralAI, TheBloke",
diff --git a/models/noromaid-7b/model.json b/models/noromaid-7b/model.json
index e088c77918..dd919bb656 100644
--- a/models/noromaid-7b/model.json
+++ b/models/noromaid-7b/model.json
@@ -4,19 +4,24 @@
"object": "model",
"name": "Noromaid 7B Q5",
"version": "1.0",
- "description": "The Noromaid 7b model is designed for role-playing and general use, featuring a unique touch with the no_robots dataset that enhances human-like behavior.",
+ "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:{prompt}\n### Response:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "NeverSleep",
"tags": ["7B", "Merged"],
- "size": 5130000000
+ "size": 4370000000
},
"engine": "nitro"
}
diff --git a/models/openchat-3.5-7b/model.json b/models/openchat-3.5-7b/model.json
index e1456b926b..09b95eb64e 100644
--- a/models/openchat-3.5-7b/model.json
+++ b/models/openchat-3.5-7b/model.json
@@ -11,11 +11,16 @@
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Openchat",
- "tags": ["7B", "Finetuned"],
+ "tags": ["Recommended", "7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json
index 636916d9e5..230ef65f28 100644
--- a/models/openhermes-neural-7b/model.json
+++ b/models/openhermes-neural-7b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.0",
- "description": "OpenHermes Neural is a merged model using the TIES method.",
+ "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Intel, Jan",
diff --git a/models/pandora-10.7b-v1/cover.png b/models/pandora-10.7b-v1/cover.png
deleted file mode 100644
index 84632ce750..0000000000
Binary files a/models/pandora-10.7b-v1/cover.png and /dev/null differ
diff --git a/models/pandora-10.7b-v1/model.json b/models/pandora-10.7b-v1/model.json
deleted file mode 100644
index 15c1839244..0000000000
--- a/models/pandora-10.7b-v1/model.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- "source_url": "https://huggingface.co/janhq/pandora-v1-10.7b-GGUF/resolve/main/pandora-v1-10.7b.Q4_K_M.gguf",
- "id": "pandora-10.7b-v1",
- "object": "model",
- "name": "Pandora 11B Q4",
- "version": "1.0",
- "description": "Pandora, our research model, employs the Passthrough merging technique to merge 2x7B models into 1.",
- "format": "gguf",
- "settings": {
- "ctx_len": 4096,
- "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
- },
- "parameters": {
- "max_tokens": 4096
- },
- "metadata": {
- "author": "Jan",
- "tags": ["13B","Merged"],
- "size": 6360000000
- },
- "engine": "nitro"
- }
-
\ No newline at end of file
diff --git a/models/phi-2-3b/model.json b/models/phi-2-3b/model.json
index 6a1521cc7f..10e39c2920 100644
--- a/models/phi-2-3b/model.json
+++ b/models/phi-2-3b/model.json
@@ -1,8 +1,8 @@
{
- "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf",
+ "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf",
"id": "phi-2-3b",
"object": "model",
- "name": "Phi-2 3B Q4",
+ "name": "Phi-2 3B Q8",
"version": "1.0",
"description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
"format": "gguf",
@@ -11,12 +11,17 @@
"prompt_template": "Intruct:\n{prompt}\nOutput:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Microsoft",
"tags": ["3B","Foundational Model"],
- "size": 1790000000
+ "size": 2960000000
},
"engine": "nitro"
}
diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json
index 3d7c326f86..29ef7572b4 100644
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@@ -4,19 +4,24 @@
"object": "model",
"name": "Phind 34B Q5",
"version": "1.0",
- "description": "Phind-CodeLlama-34B-v2 is an AI model fine-tuned on 1.5B tokens of high-quality programming data. It's a SOTA open-source model in coding. This multi-lingual model excels in various programming languages, including Python, C/C++, TypeScript, Java, and is designed to be steerable and user-friendly.",
+ "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Phind, The Bloke",
"tags": ["34B", "Finetuned"],
- "size": 24320000000
+ "size": 20220000000
},
"engine": "nitro"
}
diff --git a/models/solar-10.7b-slerp/model.json b/models/solar-10.7b-slerp/model.json
index ea1a5e93d3..7963bd05ff 100644
--- a/models/solar-10.7b-slerp/model.json
+++ b/models/solar-10.7b-slerp/model.json
@@ -11,7 +11,12 @@
"prompt_template": "### User: {prompt}\n### Assistant:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Jan",
diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json
index e34b369f2d..d5f5b57c80 100644
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@@ -4,18 +4,23 @@
"object": "model",
"name": "Starling alpha 7B Q4",
"version": "1.0",
- "description": "Starling-RM-7B-alpha is a language model finetuned with Reinforcement Learning from AI Feedback from Openchat 3.5. It stands out for its impressive performance using GPT-4 as a judge, making it one of the top-performing models in its category.",
+ "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Berkeley-nest, The Bloke",
- "tags": ["Recommended", "7B","Finetuned"],
+ "tags": ["7B","Finetuned"],
"size": 4370000000
},
"engine": "nitro"
diff --git a/models/stealth-v1.2-7b/model.json b/models/stealth-v1.2-7b/model.json
index 4153b0604f..dee5d68f59 100644
--- a/models/stealth-v1.2-7b/model.json
+++ b/models/stealth-v1.2-7b/model.json
@@ -2,7 +2,7 @@
"source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
"id": "stealth-v1.2-7b",
"object": "model",
- "name": "Stealth-v1.2 7B Q4",
+ "name": "Stealth 7B Q4",
"version": "1.0",
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf",
@@ -11,7 +11,12 @@
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Jan",
diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json
index bd59369f89..50a68f1a9b 100644
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@@ -4,19 +4,24 @@
"object": "model",
"name": "TinyLlama Chat 1.1B Q4",
"version": "1.0",
- "description": "The TinyLlama project, featuring a 1.1B parameter Llama model, is pretrained on an expansive 3 trillion token dataset. Its design ensures easy integration with various Llama-based open-source projects. Despite its smaller size, it efficiently utilizes lower computational and memory resources, drawing on GPT-4's analytical prowess to enhance its conversational abilities and versatility.",
+ "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
"format": "gguf",
"settings": {
"ctx_len": 2048,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
},
"parameters": {
- "max_tokens": 2048
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 2048,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "TinyLlama",
"tags": ["Tiny", "Foundation Model"],
- "size": 669000000
+ "size": 1170000000
},
"engine": "nitro"
}
\ No newline at end of file
diff --git a/models/trinity-v1.2-7b/model.json b/models/trinity-v1.2-7b/model.json
index 52a1c3630c..1532b1f09d 100644
--- a/models/trinity-v1.2-7b/model.json
+++ b/models/trinity-v1.2-7b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Trinity-v1.2 7B Q4",
"version": "1.0",
- "description": "Trinity is an experimental model merge of GreenNodeLM & LeoScorpius using the Slerp method. Recommended for daily assistance purposes.",
+ "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Jan",
diff --git a/models/tulu-2-70b/model.json b/models/tulu-2-70b/model.json
index c85da8223b..4437dcbbd1 100644
--- a/models/tulu-2-70b/model.json
+++ b/models/tulu-2-70b/model.json
@@ -4,14 +4,19 @@
"object": "model",
"name": "Tulu 2 70B Q4",
"version": "1.0",
- "description": "Tulu V2 DPO 70B is a fine-tuned version of Llama 2 using (DPO). This model is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
+ "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "Lizpreciatior, The Bloke",
diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json
index 77bf7050be..f73c93e8ee 100644
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@@ -4,19 +4,24 @@
"object": "model",
"name": "Wizard Coder Python 13B Q5",
"version": "1.0",
- "description": "WizardCoder-Python-13B is a Python coding model major models like ChatGPT-3.5. This model based on the Llama2 architecture, demonstrate high proficiency in specific domains like coding and mathematics.",
+ "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "WizardLM, The Bloke",
"tags": ["Recommended", "13B", "Finetuned"],
- "size": 9230000000
+ "size": 7870000000
},
"engine": "nitro"
}
diff --git a/models/yarn-mistral-7b/model.json b/models/yarn-mistral-7b/model.json
index 0d1af9c509..8b048dd3d0 100644
--- a/models/yarn-mistral-7b/model.json
+++ b/models/yarn-mistral-7b/model.json
@@ -11,7 +11,12 @@
"prompt_template": "{prompt}"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "NousResearch, The Bloke",
diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json
index 3fa06efed0..d5b90febe6 100644
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@@ -11,12 +11,17 @@
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
- "max_tokens": 4096
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 4096,
+ "frequency_penalty": 0,
+ "presence_penalty": 0
},
"metadata": {
"author": "01-ai, The Bloke",
"tags": ["34B", "Foundational Model"],
- "size": 24320000000
+ "size": 20660000000
},
"engine": "nitro"
}