Merge branch 'i-am-alice:main' into main

icter1991 · Nov 21, 2024 · 9c4cde5 · 9c4cde5
2 parents 846b26a + b27d4ee
commit 9c4cde5
Show file tree

Hide file tree

Showing 23 changed files with 2,884 additions and 91 deletions.
diff --git a/context/OpenAIService.ts b/context/OpenAIService.ts
@@ -0,0 +1,44 @@
+import OpenAI, { toFile } from "openai";
+import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
+import fs from 'fs/promises';
+
+export interface ImageProcessingResult {
+  description: string;
+  source: string;
+}
+
+export class OpenAIService {
+  private openai: OpenAI;
+
+  constructor() {
+    this.openai = new OpenAI();
+  }
+
+  async completion(config: {
+    messages: ChatCompletionMessageParam[],
+    model?: string,
+    stream?: boolean,
+    jsonMode?: boolean,
+    maxTokens?: number
+  }): Promise<OpenAI.Chat.Completions.ChatCompletion | AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>> {
+    const { messages, model = "gpt-4o", stream = false, jsonMode = false, maxTokens = 4096 } = config;
+    try {
+      const chatCompletion = await this.openai.chat.completions.create({
+        messages,
+        model,
+        ...(model !== 'o1-mini' && model !== 'o1-preview' && {
+          stream,
+          max_tokens: maxTokens,
+          response_format: jsonMode ? { type: "json_object" } : { type: "text" }
+        })
+      });
+
+      return stream
+        ? chatCompletion as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>
+        : chatCompletion as OpenAI.Chat.Completions.ChatCompletion;
+    } catch (error) {
+      console.error("Error in OpenAI completion:", error);
+      throw error;
+    }
+  }
+}
diff --git a/context/app.ts b/context/app.ts
@@ -0,0 +1,36 @@
+import { join } from "path";
+import fs from 'fs/promises';
+import { OpenAIService } from "./OpenAIService";
+import type { ChatCompletion } from "openai/resources/chat/completions";
+
+const openAIService = new OpenAIService();
+
+const path = join(__dirname, 'long_context.md');
+const content = await fs.readFile(path, 'utf8');
+const content_uuid = '0398cf2c-110e-4b7f-ac8a-88ec6ae6f248';
+
+const query = 'Show me the list of available documents.'
+
+const documents = {
+  [content_uuid]: content,
+};
+
+const completion = await openAIService.completion({
+  messages: [
+    {role: 'system', content: `As an AI assistant, you can use the following documents in your responses by referencing them with the placeholder: [[uuid]] (double square brackets).
+
+    <rule>
+    - Placeholder is double square brackets. Make sure to use it correctly and carefully rewrite uuid of the document.
+    - Documents are long forms of text, so use them naturally within the text, like "here's your file: \n\n [[uuid]] \n\n".
+    </rule>
+
+    <available_documents>
+    Lesson 0302 — Wyszukiwanie hybrydowe:${content_uuid}
+    </available_documents>`},
+    {role: 'user', content: query}
+  ]
+}) as ChatCompletion;
+
+const answer = completion.choices[0].message.content?.replace(/\[\[([^\]]+)\]\]/g, (match, uuid) => documents[uuid] || match) || '';
+
+console.log(answer);
diff --git a/context/long_context.md b/context/long_context.md
@@ -0,0 +1,9 @@
+![](https://cloud.overment.com/S03E03-1728402281.png)
+
+Lekcja [S03E02](S03E02%20—%20Wyszukiwanie%20semantyczne.md) pokazała nam, że [bazy wektorowe](glossary/Vector%20Database.md) nie wystarczają do skutecznego przeszukiwania danych. Co prawda wsparcie ze strony [modelu](glossary/LLM.md) poprawia sytuację, ale nie rozwiązuje wszystkich problemów. Pierwszym z brzegu przykładem może być poszukiwanie akronimów, numerów serii czy zamówienia, lub wyrażeń, których model nie potrafi opisać w [embeddingu](glossary/Embedding.md). 
+
+Zatem gdy mamy gdy mamy do czynienia ze słowami kluczowymi i precyzyjnym dopasowaniem, bazy wektorowe okazują się niewystarczające, a wyszukiwanie semantyczne musi zostać uzupełnione wyszukiwaniem pełnotekstowym.
+
+Łączenie różnych technik wyszukiwania określamy mianem [wyszukiwania hybrydowego](glossary/Hybrid%20Search.md). Może ono przybierać różne formy i konfiguracje w zależności od potrzeb. Przykładowo, czasami wystarczy użyć [PostgreSQL](tools/PostgreSQL.md) z pgvector i pgsearch do przechowywania danych oraz do wyszukiwania semantycznego i pełnotekstowego, co powinno wystarczyć nam na potrzeby małych projektów. Innym razem będzie nam zależało na rozdzieleniu tych odpowiedzialności na różne narzędzia. 
+
+(cdn...)