跟进上游

gtxy27 · Dec 22, 2024 · 8f469ff · 8f469ff
1 parent b8603d0
commit 8f469ff
Show file tree

Hide file tree

Showing 19 changed files with 431 additions and 162 deletions.
diff --git a/common/model-ratio.go b/common/model-ratio.go
@@ -8,10 +8,9 @@ import (
 
 // from songquanpeng/one-api
 const (
-	USD2RMB = 7.3           // 暂定 1 USD = 7.3 RMB
-	USD     = 500           // $0.002 = 1 -> $1 = 500 一美元所对应的基础倍率数
-	RMB2    = 1             // 锚定 人民币的定价
-	RMB     = USD / USD2RMB // 1 RMB 对应基础倍率
+	USD2RMB = 7.3 // 暂定 1 USD = 7.3 RMB
+	USD     = 500 // $0.002 = 1 -> $1 = 500
+	RMB     = USD / USD2RMB
 )
 
 // modelRatio
@@ -47,6 +46,8 @@ var defaultModelRatio = map[string]float64{
 	"gpt-4o-2024-08-06":               1.25, // $2.5 / 1M tokens
 	"gpt-4o-2024-11-20":               1.25, // $2.5 / 1M tokens
 	"gpt-4o-realtime-preview":         2.5,
+	"o1":                              7.5,
+	"o1-2024-12-17":                   7.5,
 	"o1-preview":                      7.5,
 	"o1-preview-2024-09-12":           7.5,
 	"o1-mini":                         1.5,
@@ -70,51 +71,116 @@ var defaultModelRatio = map[string]float64{
 	"text-curie-001":         1,
 	//"text-davinci-002":               10,
 	//"text-davinci-003":               10,
-	"text-davinci-edit-001":        10,
-	"code-davinci-edit-001":        10,
-	"whisper-1":                    15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
-	"tts-1":                        7.5, // 1k characters -> $0.015
-	"tts-1-1106":                   7.5, // 1k characters -> $0.015
-	"tts-1-hd":                     15,  // 1k characters -> $0.03
-	"tts-1-hd-1106":                15,  // 1k characters -> $0.03
-	"davinci":                      10,
-	"curie":                        10,
-	"babbage":                      10,
-	"ada":                          10,
-	"text-embedding-3-small":       0.01,
-	"text-embedding-3-large":       0.065,
-	"text-embedding-ada-002":       0.05,
-	"text-search-ada-doc-001":      10,
-	"text-moderation-stable":       0.1,
-	"text-moderation-latest":       0.1,
-	"claude-instant-1":             0.4,   // $0.8 / 1M tokens
-	"claude-2.0":                   4,     // $8 / 1M tokens
-	"claude-2.1":                   4,     // $8 / 1M tokens
-	"claude-3-haiku-20240307":      0.125, // $0.25 / 1M tokens
-	"claude-3-5-haiku-20241022":    0.5,   // $1 / 1M tokens
-	"claude-3-sonnet-20240229":     1.5,   // $3 / 1M tokens
-	"claude-3-5-sonnet-20240620":   1.5,
-	"claude-3-5-sonnet-20241022":   1.5,
-	"claude-3-opus-20240229":       7.5, // $15 / 1M tokens
-	"BLOOMZ-7B":                    0.004 * RMB,
-	"Embedding-V1":                 0.002 * RMB,
-	"bge-large-zh":                 0.002 * RMB,
-	"bge-large-en":                 0.002 * RMB,
-	"tao-8k":                       0.002 * RMB,
-	"PaLM-2":                       1,
-	"gemini-pro":                   1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":            1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-1.0-pro-vision-001":    1,
-	"gemini-1.0-pro-001":           1,
-	"gemini-1.5-pro-latest":        1.75, // $3.5 / 1M tokens
-	"gemini-1.5-pro-exp-0827":      1.75, // $3.5 / 1M tokens
-	"gemini-1.5-flash-latest":      1,
-	"gemini-1.5-flash-exp-0827":    1,
-	"gemini-1.0-pro-latest":        1,
-	"gemini-1.0-pro-vision-latest": 1,
-	"gemini-ultra":                 1,
-	// ￥0.002 / 1k tokens
-
+	"text-davinci-edit-001":          10,
+	"code-davinci-edit-001":          10,
+	"whisper-1":                      15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"tts-1":                          7.5, // 1k characters -> $0.015
+	"tts-1-1106":                     7.5, // 1k characters -> $0.015
+	"tts-1-hd":                       15,  // 1k characters -> $0.03
+	"tts-1-hd-1106":                  15,  // 1k characters -> $0.03
+	"davinci":                        10,
+	"curie":                          10,
+	"babbage":                        10,
+	"ada":                            10,
+	"text-embedding-3-small":         0.01,
+	"text-embedding-3-large":         0.065,
+	"text-embedding-ada-002":         0.05,
+	"text-search-ada-doc-001":        10,
+	"text-moderation-stable":         0.1,
+	"text-moderation-latest":         0.1,
+	"claude-instant-1":               0.4,   // $0.8 / 1M tokens
+	"claude-2.0":                     4,     // $8 / 1M tokens
+	"claude-2.1":                     4,     // $8 / 1M tokens
+	"claude-3-haiku-20240307":        0.125, // $0.25 / 1M tokens
+	"claude-3-5-haiku-20241022":      0.5,   // $1 / 1M tokens
+	"claude-3-sonnet-20240229":       1.5,   // $3 / 1M tokens
+	"claude-3-5-sonnet-20240620":     1.5,
+	"claude-3-5-sonnet-20241022":     1.5,
+	"claude-3-opus-20240229":         7.5, // $15 / 1M tokens
+	"ERNIE-4.0-8K":                   0.120 * RMB,
+	"ERNIE-3.5-8K":                   0.012 * RMB,
+	"ERNIE-3.5-8K-0205":              0.024 * RMB,
+	"ERNIE-3.5-8K-1222":              0.012 * RMB,
+	"ERNIE-Bot-8K":                   0.024 * RMB,
+	"ERNIE-3.5-4K-0205":              0.012 * RMB,
+	"ERNIE-Speed-8K":                 0.004 * RMB,
+	"ERNIE-Speed-128K":               0.004 * RMB,
+	"ERNIE-Lite-8K-0922":             0.008 * RMB,
+	"ERNIE-Lite-8K-0308":             0.003 * RMB,
+	"ERNIE-Tiny-8K":                  0.001 * RMB,
+	"BLOOMZ-7B":                      0.004 * RMB,
+	"Embedding-V1":                   0.002 * RMB,
+	"bge-large-zh":                   0.002 * RMB,
+	"bge-large-en":                   0.002 * RMB,
+	"tao-8k":                         0.002 * RMB,
+	"PaLM-2":                         1,
+	"gemini-pro":                     1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-pro-vision":              1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-1.0-pro-vision-001":      1,
+	"gemini-1.0-pro-001":             1,
+	"gemini-1.5-pro-latest":          1.75, // $3.5 / 1M tokens
+	"gemini-1.5-pro-exp-0827":        1.75, // $3.5 / 1M tokens
+	"gemini-1.5-flash-latest":        1,
+	"gemini-1.5-flash-exp-0827":      1,
+	"gemini-1.0-pro-latest":          1,
+	"gemini-1.0-pro-vision-latest":   1,
+	"gemini-ultra":                   1,
+	"chatglm_turbo":                  0.3572,     // ￥0.005 / 1k tokens
+	"chatglm_pro":                    0.7143,     // ￥0.01 / 1k tokens
+	"chatglm_std":                    0.3572,     // ￥0.005 / 1k tokens
+	"chatglm_lite":                   0.1429,     // ￥0.002 / 1k tokens
+	"glm-4":                          7.143,      // ￥0.1 / 1k tokens
+	"glm-4v":                         0.05 * RMB, // ￥0.05 / 1k tokens
+	"glm-4-alltools":                 0.1 * RMB,  // ￥0.1 / 1k tokens
+	"glm-3-turbo":                    0.3572,
+	"glm-4-plus":                     0.05 * RMB,
+	"glm-4-0520":                     0.1 * RMB,
+	"glm-4-air":                      0.001 * RMB,
+	"glm-4-airx":                     0.01 * RMB,
+	"glm-4-long":                     0.001 * RMB,
+	"glm-4-flash":                    0,
+	"glm-4v-plus":                    0.01 * RMB,
+	"qwen-turbo":                     0.8572, // ￥0.012 / 1k tokens
+	"qwen-plus":                      10,     // ￥0.14 / 1k tokens
+	"text-embedding-v1":              0.05,   // ￥0.0007 / 1k tokens
+	"SparkDesk-v1.1":                 1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v2.1":                 1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.1":                 1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.5":                 1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v4.0":                 1.2858,
+	"360GPT_S2_V9":                   0.8572, // ¥0.012 / 1k tokens
+	"360gpt-turbo":                   0.0858, // ¥0.0012 / 1k tokens
+	"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
+	"360gpt-pro":                     0.8572, // ¥0.012 / 1k tokens
+	"360gpt2-pro":                    0.8572, // ¥0.012 / 1k tokens
+	"embedding-bert-512-v1":          0.0715, // ¥0.001 / 1k tokens
+	"embedding_s1_v1":                0.0715, // ¥0.001 / 1k tokens
+	"semantic_similarity_s1_v1":      0.0715, // ¥0.001 / 1k tokens
+	"hunyuan":                        7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
+	// https://platform.lingyiwanwu.com/docs#-计费单元
+	// 已经按照 7.2 来换算美元价格
+	"yi-34b-chat-0205":       0.18,
+	"yi-34b-chat-200k":       0.864,
+	"yi-vl-plus":             0.432,
+	"yi-large":               20.0 / 1000 * RMB,
+	"yi-medium":              2.5 / 1000 * RMB,
+	"yi-vision":              6.0 / 1000 * RMB,
+	"yi-medium-200k":         12.0 / 1000 * RMB,
+	"yi-spark":               1.0 / 1000 * RMB,
+	"yi-large-rag":           25.0 / 1000 * RMB,
+	"yi-large-turbo":         12.0 / 1000 * RMB,
+	"yi-large-preview":       20.0 / 1000 * RMB,
+	"yi-large-rag-preview":   25.0 / 1000 * RMB,
+	"command":                0.5,
+	"command-nightly":        0.5,
+	"command-light":          0.5,
+	"command-light-nightly":  0.5,
+	"command-r":              0.25,
+	"command-r-plus":         1.5,
+	"command-r-08-2024":      0.075,
+	"command-r-plus-08-2024": 1.25,
+	"deepseek-chat":          0.07,
+	"deepseek-coder":         0.07,
 	// Perplexity online 模型对搜索额外收费，有需要应自行调整，此处不计入搜索费用
 	"llama-3-sonar-small-32k-chat":   0.2 / 1000 * USD,
 	"llama-3-sonar-small-32k-online": 0.2 / 1000 * USD,

diff --git a/constant/channel_setting.go b/constant/channel_setting.go
@@ -0,0 +1,5 @@
+package constant
+
+var (
+	ForceFormat = "force_format" // ForceFormat 强制格式化为OpenAI格式
+)
diff --git a/constant/env.go b/constant/env.go
@@ -23,6 +23,8 @@ var GeminiModelMap = map[string]string{
 	"gemini-1.0-pro": "v1",
 }
 
+var GeminiVisionMaxImageNum = common.GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
+
 func InitEnv() {
 	modelVersionMapStr := strings.TrimSpace(os.Getenv("GEMINI_MODEL_MAP"))
 	if modelVersionMapStr == "" {

diff --git a/constant/finish_reason.go b/constant/finish_reason.go
@@ -0,0 +1,6 @@
+package constant
+
+var (
+	FinishReasonStop      = "stop"
+	FinishReasonToolCalls = "tool_calls"
+)
diff --git a/controller/channel-test.go b/controller/channel-test.go
@@ -141,7 +141,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	milliseconds := tok.Sub(tik).Milliseconds()
 	consumedTime := float64(milliseconds) / 1000.0
 	other := service.GenerateTextOtherInfo(c, meta, modelRatio, 1, completionRatio, modelPrice)
-	model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, usage.PromptCacheHitTokens, testModel, "模型测试", quota, "模型测试", 0, quota, int(consumedTime), false, other)
+	model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, 0, testModel, "模型测试", quota, "模型测试", 0, quota, int(consumedTime), false, other)
 	common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))
 	return nil, nil
 }
@@ -153,6 +153,8 @@ func buildTestRequest(model string) *dto.GeneralOpenAIRequest {
 	}
 	if strings.HasPrefix(model, "o1-") {
 		testRequest.MaxCompletionTokens = 1
+	} else if strings.HasPrefix(model, "gemini-2.0-flash-thinking") {
+		testRequest.MaxTokens = 2
 	} else {
 		testRequest.MaxTokens = 1
 	}

diff --git a/dto/openai_request.go b/dto/openai_request.go
@@ -3,39 +3,48 @@ package dto
 import "encoding/json"
 
 type ResponseFormat struct {
-	Type string `json:"type,omitempty"`
+	Type       string            `json:"type,omitempty"`
+	JsonSchema *FormatJsonSchema `json:"json_schema,omitempty"`
+}
+
+type FormatJsonSchema struct {
+	Description string `json:"description,omitempty"`
+	Name        string `json:"name"`
+	Schema      any    `json:"schema,omitempty"`
+	Strict      any    `json:"strict,omitempty"`
 }
 
 type GeneralOpenAIRequest struct {
-	Model               string         `json:"model,omitempty"`
-	Messages            []Message      `json:"messages,omitempty"`
-	Prompt              any            `json:"prompt,omitempty"`
-	Stream              bool           `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions `json:"stream_options,omitempty"`
-	MaxTokens           uint           `json:"max_tokens,omitempty"`
-	MaxCompletionTokens uint           `json:"max_completion_tokens,omitempty"`
-	Temperature         float64        `json:"temperature,omitempty"`
-	TopP                float64        `json:"top_p,omitempty"`
-	TopK                int            `json:"top_k,omitempty"`
-	Stop                any            `json:"stop,omitempty"`
-	N                   int            `json:"n,omitempty"`
-	Input               any            `json:"input,omitempty"`
-	Instruction         string         `json:"instruction,omitempty"`
-	Size                string         `json:"size,omitempty"`
-	Functions           any            `json:"functions,omitempty"`
-	FrequencyPenalty    float64        `json:"frequency_penalty,omitempty"`
-	PresencePenalty     float64        `json:"presence_penalty,omitempty"`
-	ResponseFormat      any            `json:"response_format,omitempty"`
-	EncodingFormat      any            `json:"encoding_format,omitempty"`
-	Seed                float64        `json:"seed,omitempty"`
-	Tools               []ToolCall     `json:"tools,omitempty"`
-	ToolChoice          any            `json:"tool_choice,omitempty"`
-	User                string         `json:"user,omitempty"`
-	LogProbs            bool           `json:"logprobs,omitempty"`
-	TopLogProbs         int            `json:"top_logprobs,omitempty"`
-	Dimensions          int            `json:"dimensions,omitempty"`
-	Modalities          any            `json:"modalities,omitempty"`
-	Audio               any            `json:"audio,omitempty"`
+	Model               string          `json:"model,omitempty"`
+	Messages            []Message       `json:"messages,omitempty"`
+	Prompt              any             `json:"prompt,omitempty"`
+	Stream              bool            `json:"stream,omitempty"`
+	StreamOptions       *StreamOptions  `json:"stream_options,omitempty"`
+	MaxTokens           uint            `json:"max_tokens,omitempty"`
+	MaxCompletionTokens uint            `json:"max_completion_tokens,omitempty"`
+	ReasoningEffort     string          `json:"reasoning_effort,omitempty"`
+	Temperature         float64         `json:"temperature,omitempty"`
+	TopP                float64         `json:"top_p,omitempty"`
+	TopK                int             `json:"top_k,omitempty"`
+	Stop                any             `json:"stop,omitempty"`
+	N                   int             `json:"n,omitempty"`
+	Input               any             `json:"input,omitempty"`
+	Instruction         string          `json:"instruction,omitempty"`
+	Size                string          `json:"size,omitempty"`
+	Functions           any             `json:"functions,omitempty"`
+	FrequencyPenalty    float64         `json:"frequency_penalty,omitempty"`
+	PresencePenalty     float64         `json:"presence_penalty,omitempty"`
+	ResponseFormat      *ResponseFormat `json:"response_format,omitempty"`
+	EncodingFormat      any             `json:"encoding_format,omitempty"`
+	Seed                float64         `json:"seed,omitempty"`
+	Tools               []ToolCall      `json:"tools,omitempty"`
+	ToolChoice          any             `json:"tool_choice,omitempty"`
+	User                string          `json:"user,omitempty"`
+	LogProbs            bool            `json:"logprobs,omitempty"`
+	TopLogProbs         int             `json:"top_logprobs,omitempty"`
+	Dimensions          int             `json:"dimensions,omitempty"`
+	Modalities          any             `json:"modalities,omitempty"`
+	Audio               any             `json:"audio,omitempty"`
 }
 
 type OpenAITools struct {

diff --git a/model/user.go b/model/user.go
@@ -89,13 +89,18 @@ func SearchUsers(keyword string, group string) ([]*User, error) {
 	var users []*User
 	var err error
 
+	groupCol := "`group`"
+	if common.UsingPostgreSQL {
+		groupCol = `"group"`
+	}
+
 	// 尝试将关键字转换为整数ID
 	keywordInt, err := strconv.Atoi(keyword)
 	if err == nil {
 		// 如果转换成功，按照ID和可选的组别搜索用户
-		query := DB.Unscoped().Omit("password").Where("`id` = ?", keywordInt)
+		query := DB.Unscoped().Omit("password").Where("id = ?", keywordInt)
 		if group != "" {
-			query = query.Where("`group` = ?", group) // 使用反引号包围group
+			query = query.Where(groupCol+" = ?", group) // 使用反引号包围group
 		}
 		err = query.Find(&users).Error
 		if err != nil || len(users) > 0 {
@@ -106,9 +111,9 @@ func SearchUsers(keyword string, group string) ([]*User, error) {
 	err = nil
 
 	query := DB.Unscoped().Omit("password")
-	likeCondition := "`username` LIKE ? OR `email` LIKE ? OR `display_name` LIKE ?"
+	likeCondition := "username LIKE ? OR email LIKE ? OR display_name LIKE ?"
 	if group != "" {
-		query = query.Where("("+likeCondition+") AND `group` = ?", "%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%", group)
+		query = query.Where("("+likeCondition+") AND "+groupCol+" = ?", "%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%", group)
 	} else {
 		query = query.Where(likeCondition, "%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%")
 	}

diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go
@@ -57,7 +57,11 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, re
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	return CovertGemini2OpenAI(*request), nil
+	ai, err := CovertGemini2OpenAI(*request)
+	if err != nil {
+		return nil, err
+	}
+	return ai, nil
 }
 
 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {

diff --git a/relay/channel/gemini/constant.go b/relay/channel/gemini/constant.go
@@ -1,13 +1,19 @@
 package gemini
 
-const (
-	GeminiVisionMaxImageNum = 16
-)
-
 var ModelList = []string{
-	"gemini-1.0-pro-latest", "gemini-1.0-pro-001", "gemini-1.5-pro-latest", "gemini-1.5-flash-latest", "gemini-ultra",
-	"gemini-1.0-pro-vision-latest", "gemini-1.0-pro-vision-001", "gemini-1.5-pro-exp-0827", "gemini-1.5-flash-exp-0827",
-	"gemini-exp-1114",
+	// stable version
+	"gemini-1.5-pro", "gemini-1.5-flash", "gemini-1.5-flash-8b",
+	// latest version
+	"gemini-1.5-pro-latest", "gemini-1.5-flash-latest",
+	// legacy version
+	"gemini-1.5-pro-exp-0827", "gemini-1.5-flash-exp-0827",
+	// exp
+	"gemini-exp-1114", "gemini-exp-1121", "gemini-exp-1206",
+	// flash exp
+	"gemini-2.0-flash-exp",
+	// thinking exp
+	"gemini-2.0-flash-thinking-exp",
+	"gemini-2.0-flash-thinking-exp-1219",
 }
 
 var ChannelName = "google gemini"