feat: Add DeepSeek-R1 (deepseek-reasoner) support (cline#1355)

* feat: Add DeepSeek-R1 (deepseek-reasoner) support - Add new deepseek-reasoner model with proper pricing info - Fix temperature parameter being sent to unsupported deepseek-reasoner model - Improve model selection logic in DeepSeekHandler - Update CHANGELOG with new features and fixes - Bump version to 3.1.11 * style: apply prettier formatting to deepseek provider and api definitions
cliekid · Jan 21, 2025 · f4df887 · f4df887
1 parent 3f35aab
commit f4df887
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Change Log
 
+## [3.2.X]
+
+- Add DeepSeek-R1 (deepseek-reasoner) model support with proper parameter handling
+- Fix temperature parameter being sent to unsupported deepseek-reasoner model
+- Update DeepSeek pricing info with new reasoner model rates
+
 ## [3.2.0]
 
 - Add Plan/Act mode toggle to let you plan tasks with Cline before letting him get to work

diff --git a/package-lock.json b/package-lock.json
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
@@ -18,13 +18,15 @@ export class DeepSeekHandler implements ApiHandler {
 	}
 
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		const model = this.getModel()
 		const stream = await this.client.chat.completions.create({
-			model: this.getModel().id,
-			max_completion_tokens: this.getModel().info.maxTokens,
-			temperature: 0,
+			model: model.id,
+			max_completion_tokens: model.info.maxTokens,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
 			stream_options: { include_usage: true },
+			// Only set temperature for non-reasoner models
+			...(model.id === "deepseek-reasoner" ? {} : { temperature: 0 }),
 		})
 
 		for await (const chunk of stream) {
@@ -52,13 +54,15 @@ export class DeepSeekHandler implements ApiHandler {
 
 	getModel(): { id: DeepSeekModelId; info: ModelInfo } {
 		const modelId = this.options.apiModelId
-		if (modelId && modelId in deepSeekModels) {
-			const id = modelId as DeepSeekModelId
-			return { id, info: deepSeekModels[id] }
+		if (!modelId || !(modelId in deepSeekModels)) {
+			return {
+				id: deepSeekDefaultModelId,
+				info: deepSeekModels[deepSeekDefaultModelId],
+			}
 		}
 		return {
-			id: deepSeekDefaultModelId,
-			info: deepSeekModels[deepSeekDefaultModelId],
+			id: modelId as DeepSeekModelId,
+			info: deepSeekModels[modelId as DeepSeekModelId],
 		}
 	}
 }
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -377,6 +377,16 @@ export const deepSeekModels = {
 		cacheWritesPrice: 0.14,
 		cacheReadsPrice: 0.014,
 	},
+	"deepseek-reasoner": {
+		maxTokens: 8_000,
+		contextWindow: 64_000,
+		supportsImages: false,
+		supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
+		inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
+		outputPrice: 2.19,
+		cacheWritesPrice: 0.55,
+		cacheReadsPrice: 0.14,
+	},
 } as const satisfies Record<string, ModelInfo>
 
 // Mistral