server: use configured system prompt, ignore system messages (nomic-a…

…i#2921) Signed-off-by: Adam Treat <[email protected]> Signed-off-by: Jared Van Bortel <[email protected]> Co-authored-by: Jared Van Bortel <[email protected]>
ldsxp · Aug 29, 2024 · e1d49d9 · e1d49d9
1 parent 82491fe
commit e1d49d9
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 3 deletions.
diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ### Added
 - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
+- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921))
 
 ### Changed
 - Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))

diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
@@ -719,8 +719,6 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
         processRestoreStateFromText();
     }
 
-    if (!m_processedSystemPrompt)
-        processSystemPrompt();
     const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo);
     const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
     const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
@@ -741,6 +739,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
     if (!isModelLoaded())
         return false;
 
+    if (!m_processedSystemPrompt)
+        processSystemPrompt();
+
     QList<ResultInfo> databaseResults;
     const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
     if (!collectionList.isEmpty()) {
@@ -1206,7 +1207,7 @@ void ChatLLM::restoreState()
 void ChatLLM::processSystemPrompt()
 {
     Q_ASSERT(isModelLoaded());
-    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer)
+    if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText)
         return;
 
     const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();

diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp
@@ -340,6 +340,10 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
         QList<QString> chats;
         for (int i = 0; i < messages.count();  ++i) {
             QJsonValue v = messages.at(i);
+            // FIXME: Deal with system messages correctly
+            QString role = v.toObject()["role"].toString();
+            if (role != "user")
+                continue;
             QString content = v.toObject()["content"].toString();
             if (!content.endsWith("\n") && i < messages.count() - 1)
                 content += "\n";