Merge pull request devchat-ai#122 from devchat-ai/call_llm_with_litellm

Call llm with litellm
sycomix · Sep 13, 2023 · aabb1bc · aabb1bc
2 parents 38e21e2 + 62a6843
commit aabb1bc
Show file tree

Hide file tree

Showing 5 changed files with 416 additions and 33 deletions.
diff --git a/devchat/assistant.py b/devchat/assistant.py
@@ -1,3 +1,5 @@
+import json
+import time
 from typing import Optional, List, Iterator
 from devchat.message import Message
 from devchat.chat import Chat
@@ -90,8 +92,22 @@ def iterate_response(self) -> Iterator[str]:
         """
         if self._chat.config.stream:
             first_chunk = True
-            for chunk in self._chat.stream_response(self._prompt):
-                delta = self._prompt.append_response(str(chunk))
+            created_time = int(time.time())
+            config_params = self._chat.config.dict(exclude_unset=True)
+            chunks = list(self._chat.stream_response(self._prompt))
+            for index, chunk in enumerate(chunks):
+                if "index" not in chunk["choices"][0]:
+                    chunk["id"] = "chatcmpl-7vdfQI02x-" + str(created_time)
+                    chunk["object"] = "chat.completion.chunk"
+                    chunk["created"] = created_time
+                    chunk["model"] = config_params["model"]
+                    chunk["choices"][0]["index"] = 0
+                    stop_reason = "null"
+                    if index + 1 == len(chunks):
+                        stop_reason = "stop"
+                    chunk["choices"][0]["finish_reason"] = stop_reason
+
+                delta = self._prompt.append_response(json.dumps(chunk))
                 if first_chunk:
                     first_chunk = False
                     yield self._prompt.formatted_header()

diff --git a/devchat/openai/openai_chat.py b/devchat/openai/openai_chat.py
@@ -1,4 +1,5 @@
 from typing import Optional, Union, List, Dict, Iterator
+import os
 from pydantic import BaseModel, Field
 import openai
 from litellm import completion
@@ -67,10 +68,22 @@ def complete_response(self, prompt: OpenAIPrompt) -> str:
             config_params['function_call'] = 'auto'
         config_params['stream'] = False
 
-        response = completion(
-            messages=prompt.messages,
-            **config_params
-        )
+        api_key = os.environ.get("OPENAI_API_KEY")
+
+        if api_key.startswith("DC."):
+            response = openai.ChatCompletion.create(
+                messages=prompt.messages,
+                **config_params
+            )
+        else:
+            if config_params["model"].startswith("gpt-"):
+                # call gpt- model by openai api and openai api key
+                response = openai.ChatCompletion.create(
+                    messages=prompt.messages,
+                    **config_params
+                )
+            else:
+                response = completion(messages=prompt.messages, **config_params, api_key=api_key)
         return str(response)
 
     def stream_response(self, prompt: OpenAIPrompt) -> Iterator:
@@ -81,8 +94,21 @@ def stream_response(self, prompt: OpenAIPrompt) -> Iterator:
             config_params['function_call'] = 'auto'
         config_params['stream'] = True
 
-        response = completion(
-            messages=prompt.messages,
-            **config_params
-        )
+        # read environment variable
+        api_key = os.environ.get("OPENAI_API_KEY")
+
+        if api_key.startswith("DC."):
+            response = openai.ChatCompletion.create(
+                messages=prompt.messages,
+                **config_params
+            )
+        else:
+            if config_params["model"].startswith("gpt-"):
+                # call gpt- model by openai api and openai api key
+                response = openai.ChatCompletion.create(
+                    messages=prompt.messages,
+                    **config_params
+                )
+            else:
+                response = completion(**config_params, messages=prompt.messages, api_key=api_key)
         return response
diff --git a/devchat/utils.py b/devchat/utils.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import os
 import re
@@ -9,6 +8,8 @@
 import datetime
 import hashlib
 import tiktoken
+from litellm import token_counter
+
 
 log_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
@@ -207,27 +208,7 @@ def _count_tokens(encoding: tiktoken.Encoding, string: str) -> int:
 
 def openai_message_tokens(message: dict, model: str) -> int:
     """Returns the number of tokens used by a message."""
-    try:
-        encoding = tiktoken.encoding_for_model(model)
-    except KeyError as err:
-        raise ValueError(f"Invalid model {model} for tiktoken.") from err
-
-    num_tokens = 0
-    if model == "gpt-3.5-turbo-0301":
-        num_tokens += 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        tokens_per_name = -1  # if there's a name, the role is omitted
-    else:
-        num_tokens += 3
-        tokens_per_name = 1
-
-    for key, value in message.items():
-        if key == 'function_call':
-            value = json.dumps(value)
-        if value:
-            num_tokens += _count_tokens(encoding, value)
-        if key == "name":
-            num_tokens += tokens_per_name
-    return num_tokens
+    return token_counter(model=model, text=str(message))
 
 
 def openai_response_tokens(message: dict, model: str) -> int: