fix(magicoder): correct the id and update parameters (janhq#1448)

fix(capybara): change description + parameters fix(codeninja): update description + add parameters delete(magicoder): this model is failed to load due to gguf fix(deepseek-1.3b): update description + add parameters fix(deepseek-34b): update description + add parameters fix(dolphin8x7b): update description + parameters fix(llama2-chat-7b): update description + add parameters fix(llama2-chat-70b): update description + add parameters fix(mistral-7b): update description + add parameters fix(mistral): degrade to v0.1 due to giberish output from v0.2 fix(mixtral): update desciption + parameters fix(noromaid): update description + parameters fix(mistral7b): correct the prompt template update(openchat): add parameters fix(openhermesneural): update parameter + description fix(openhermesneural): update max_tokens delete(pandora): remove pandora fix(phi2): update parameter update(phind34b): update description + change to q4 + add parameter update(solar-slerp): update parameter fix(starling): update description + parameter fix(openchat): add to recommended fix(stealth): add parameters fix(tinyllama): update description + parameters fix(trinity): update description + parameters fix(tulu2): update description + parameters fix(stealth): change max_tokens to 4096 update(wizardcoder): update parameter + description update(wizardcoder): update description update(wizardcoder): change to q4 fix(yarn-mistral): update parameters fix(yarn-mistral): change to q4 update(wizardcoder): correct the size of q4 model fix(model-extension): pump to 1.0.20 fix(tinyllama): change to q8 fix(phi-2): change to q8 fix(deepseek34b): change to q4 fix(mistral7b): update the stop token fix(starling): remove recommended
mvdt · Jan 8, 2024 · 0dd1295 · 0dd1295
1 parent efc3c48
commit 0dd1295
Show file tree

Hide file tree

Showing 27 changed files with 169 additions and 100 deletions.
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/model-extension",
-  "version": "1.0.19",
+  "version": "1.0.20",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
   "module": "dist/module.js",

diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Capybara 200k 34B Q5",
     "version": "1.0",
-    "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.",
+    "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "USER:\n{prompt}\nASSISTANT:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "NousResearch, The Bloke",

diff --git a/models/codeninja-1.0-7b/model.json b/models/codeninja-1.0-7b/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "CodeNinja 7B Q4",
     "version": "1.0",
-    "description": "CodeNinja is finetuned on openchat/openchat-3.5-1210. It is good for codding tasks",
+    "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Beowolx",

diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json
@@ -5,14 +5,19 @@
     "object": "model",
     "name": "Deepseek Coder 1.3B Q8",
     "version": "1.0",
-    "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.",
+    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "### Instruction:\n{prompt}\n### Response:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Deepseek, The Bloke",

diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json
@@ -4,19 +4,24 @@
     "object": "model",
     "name": "Deepseek Coder 33B Q5",
     "version": "1.0",
-    "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.",
+    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "### Instruction:\n{prompt}\n### Response:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Deepseek, The Bloke",
       "tags": ["34B", "Foundational Model"],
-      "size": 26040000000
+      "size": 19940000000
     },
     "engine": "nitro"
   }

diff --git a/models/dolphin-2.7-mixtral-8x7b/model.json b/models/dolphin-2.7-mixtral-8x7b/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Dolphin 8x7B Q4",
     "version": "1.0",
-    "description": "This model is an uncensored model based on Mixtral-8x7b. Dolphin is really good at coding",
+    "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Cognitive Computations, TheBloke",

diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Llama 2 Chat 70B Q4",
     "version": "1.0",
-    "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 70b model.",
+    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "MetaAI, The Bloke",

diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Llama 2 Chat 7B Q4",
     "version": "1.0",
-    "description": "This is a 4-bit quantized iteration of Meta AI's Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "MetaAI, The Bloke",

diff --git a/models/magicoder-s-ds-7b/model.json b/models/magicoder-s-ds-7b/model.json
diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Mistral Instruct 7B Q4",
     "version": "1.0",
-    "description": "This is a 4-bit quantized iteration of MistralAI's Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+    "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
-      "prompt_template": "<s>[INST]{prompt}\n[/INST]"
+      "prompt_template": "[INST] {prompt} [/INST]"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "MistralAI, The Bloke",

diff --git a/models/mixtral-8x7b-instruct/model.json b/models/mixtral-8x7b-instruct/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "Mixtral 8x7B Instruct Q4",
     "version": "1.0",
-    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested.",
+    "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "[INST] {prompt} [/INST]"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "MistralAI, TheBloke",

diff --git a/models/noromaid-7b/model.json b/models/noromaid-7b/model.json
@@ -4,19 +4,24 @@
     "object": "model",
     "name": "Noromaid 7B Q5",
     "version": "1.0",
-    "description": "The Noromaid 7b model is designed for role-playing and general use, featuring a unique touch with the no_robots dataset that enhances human-like behavior.",
+    "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "### Instruction:{prompt}\n### Response:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "NeverSleep",
       "tags": ["7B", "Merged"],
-      "size": 5130000000
+      "size": 4370000000
     },
     "engine": "nitro"
   }

diff --git a/models/openchat-3.5-7b/model.json b/models/openchat-3.5-7b/model.json
@@ -11,11 +11,16 @@
       "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Openchat",
-      "tags": ["7B", "Finetuned"],
+      "tags": ["Recommended", "7B", "Finetuned"],
       "size": 4370000000
     },
     "engine": "nitro"

diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json
@@ -4,14 +4,19 @@
     "object": "model",
     "name": "OpenHermes Neural 7B Q4",
     "version": "1.0",
-    "description": "OpenHermes Neural is a merged model using the TIES method.",
+    "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Intel, Jan",

diff --git a/models/pandora-10.7b-v1/cover.png b/models/pandora-10.7b-v1/cover.png
diff --git a/models/pandora-10.7b-v1/model.json b/models/pandora-10.7b-v1/model.json
diff --git a/models/phi-2-3b/model.json b/models/phi-2-3b/model.json
@@ -1,8 +1,8 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf",
+    "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf",
     "id": "phi-2-3b",
     "object": "model",
-    "name": "Phi-2 3B Q4",
+    "name": "Phi-2 3B Q8",
     "version": "1.0",
     "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
     "format": "gguf",
@@ -11,12 +11,17 @@
       "prompt_template": "Intruct:\n{prompt}\nOutput:"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Microsoft",
       "tags": ["3B","Foundational Model"],
-      "size": 1790000000
+      "size": 2960000000
     },
     "engine": "nitro"
   }

diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json
@@ -4,19 +4,24 @@
     "object": "model",
     "name": "Phind 34B Q5",
     "version": "1.0",
-    "description": "Phind-CodeLlama-34B-v2 is an AI model fine-tuned on 1.5B tokens of high-quality programming data. It's a SOTA open-source model in coding. This multi-lingual model excels in various programming languages, including Python, C/C++, TypeScript, Java, and is designed to be steerable and user-friendly.",
+    "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
     "format": "gguf",
     "settings": {
       "ctx_len": 4096,
       "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
     },
     "parameters": {
-      "max_tokens": 4096
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 4096,
+      "frequency_penalty": 0,
+      "presence_penalty": 0
     },
     "metadata": {
       "author": "Phind, The Bloke",
       "tags": ["34B", "Finetuned"],
-      "size": 24320000000
+      "size": 20220000000
     },
     "engine": "nitro"
   }