forked from i-am-alice/3rd-devs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
609 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import OpenAI from "openai"; | ||
import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; | ||
import { createByModelName } from '@microsoft/tiktokenizer'; | ||
|
||
export class OpenAIService { | ||
private openai: OpenAI; | ||
private tokenizers: Map<string, Awaited<ReturnType<typeof createByModelName>>> = new Map(); | ||
private readonly IM_START = "<|im_start|>"; | ||
private readonly IM_END = "<|im_end|>"; | ||
private readonly IM_SEP = "<|im_sep|>"; | ||
|
||
constructor() { | ||
this.openai = new OpenAI(); | ||
} | ||
|
||
private async getTokenizer(modelName: string) { | ||
if (!this.tokenizers.has(modelName)) { | ||
const specialTokens: ReadonlyMap<string, number> = new Map([ | ||
[this.IM_START, 100264], | ||
[this.IM_END, 100265], | ||
[this.IM_SEP, 100266], | ||
]); | ||
const tokenizer = await createByModelName(modelName, specialTokens); | ||
this.tokenizers.set(modelName, tokenizer); | ||
} | ||
return this.tokenizers.get(modelName)!; | ||
} | ||
|
||
async countTokens(messages: ChatCompletionMessageParam[], model: string = 'gpt-4o'): Promise<number> { | ||
const tokenizer = await this.getTokenizer(model); | ||
|
||
let formattedContent = ''; | ||
messages.forEach((message) => { | ||
formattedContent += `${this.IM_START}${message.role}${this.IM_SEP}${message.content || ''}${this.IM_END}`; | ||
}); | ||
formattedContent += `${this.IM_START}assistant${this.IM_SEP}`; | ||
|
||
const tokens = tokenizer.encode(formattedContent, [this.IM_START, this.IM_END, this.IM_SEP]); | ||
return tokens.length; | ||
} | ||
|
||
async completion( | ||
messages: ChatCompletionMessageParam[], | ||
model: string = "gpt-4o", | ||
stream: boolean = false, | ||
jsonMode: boolean = false, | ||
maxTokens: number = 4096 | ||
): Promise<OpenAI.Chat.Completions.ChatCompletion | AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>> { | ||
try { | ||
const chatCompletion = await this.openai.chat.completions.create({ | ||
messages, | ||
model, | ||
...(model !== 'o1-mini' && model !== 'o1-preview' && { | ||
stream, | ||
max_tokens: maxTokens, | ||
response_format: jsonMode ? { type: "json_object" } : { type: "text" } | ||
}) | ||
}); | ||
|
||
if (stream) { | ||
return chatCompletion as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>; | ||
} else { | ||
return chatCompletion as OpenAI.Chat.Completions.ChatCompletion; | ||
} | ||
} catch (error) { | ||
console.error("Error in OpenAI completion:", error); | ||
throw error; | ||
} | ||
} | ||
|
||
async calculateImageTokens(width: number, height: number, detail: 'low' | 'high'): Promise<number> { | ||
let tokenCost = 0; | ||
|
||
if (detail === 'low') { | ||
tokenCost += 85; | ||
return tokenCost; | ||
} | ||
|
||
const MAX_DIMENSION = 2048; | ||
const SCALE_SIZE = 768; | ||
|
||
// Resize to fit within MAX_DIMENSION x MAX_DIMENSION | ||
if (width > MAX_DIMENSION || height > MAX_DIMENSION) { | ||
const aspectRatio = width / height; | ||
if (aspectRatio > 1) { | ||
width = MAX_DIMENSION; | ||
height = Math.round(MAX_DIMENSION / aspectRatio); | ||
} else { | ||
height = MAX_DIMENSION; | ||
width = Math.round(MAX_DIMENSION * aspectRatio); | ||
} | ||
} | ||
|
||
// Scale the shortest side to SCALE_SIZE | ||
if (width >= height && height > SCALE_SIZE) { | ||
width = Math.round((SCALE_SIZE / height) * width); | ||
height = SCALE_SIZE; | ||
} else if (height > width && width > SCALE_SIZE) { | ||
height = Math.round((SCALE_SIZE / width) * height); | ||
width = SCALE_SIZE; | ||
} | ||
|
||
// Calculate the number of 512px squares | ||
const numSquares = Math.ceil(width / 512) * Math.ceil(height / 512); | ||
|
||
// Calculate the token cost | ||
tokenCost += (numSquares * 170) + 85; | ||
|
||
return tokenCost; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
import type { ChatCompletion, ChatCompletionMessageParam } from "openai/resources/chat/completions"; | ||
import { OpenAIService } from './OpenAIService'; | ||
import { readFile, writeFile } from 'fs/promises'; | ||
import { join } from 'path'; | ||
import { extractImageContextSystemMessage, refineDescriptionSystemMessage, previewImageSystemMessage } from './prompts'; | ||
|
||
const openaiService = new OpenAIService(); | ||
|
||
// Update the type definition for Image | ||
export type Image = { | ||
alt: string; | ||
url: string; | ||
context: string; | ||
description: string; | ||
preview: string; | ||
base64: string; | ||
name: string; | ||
}; | ||
|
||
|
||
async function extractImages(article: string): Promise<Image[]> { | ||
const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; | ||
const matches = [...article.matchAll(imageRegex)]; | ||
|
||
const imagePromises = matches.map(async ([, alt, url]) => { | ||
try { | ||
const name = url.split('/').pop() || ''; | ||
const response = await fetch(url); | ||
if (!response.ok) throw new Error(`Failed to fetch ${url}: ${response.statusText}`); | ||
const arrayBuffer = await response.arrayBuffer(); | ||
const base64 = Buffer.from(arrayBuffer).toString('base64'); | ||
|
||
return { | ||
alt, | ||
url, | ||
context: '', | ||
description: '', | ||
preview: '', | ||
base64, | ||
name | ||
}; | ||
} catch (error) { | ||
console.error(`Error processing image ${url}:`, error); | ||
return null; | ||
} | ||
}); | ||
|
||
const results = await Promise.all(imagePromises); | ||
return results.filter((link): link is Image => link !== null); | ||
} | ||
|
||
|
||
// Update the previewImage function signature | ||
async function previewImage(image: Image): Promise<{ name: string; preview: string }> { | ||
const userMessage: ChatCompletionMessageParam = { | ||
role: 'user', | ||
content: [ | ||
{ | ||
type: "image_url", | ||
image_url: { url: `data:image/jpeg;base64,${image.base64}` } | ||
}, | ||
{ | ||
type: "text", | ||
text: `Describe the image ${image.name} concisely. Focus on the main elements and overall composition. Return the result in JSON format with only 'name' and 'preview' properties.` | ||
} | ||
] | ||
}; | ||
|
||
const response = await openaiService.completion([previewImageSystemMessage, userMessage], 'gpt-4o', false, true) as ChatCompletion; | ||
const result = JSON.parse(response.choices[0].message.content || '{}'); | ||
return { name: result.name || image.name, preview: result.preview || '' }; | ||
} | ||
|
||
async function getImageContext(title: string, article: string, images: Image[]): Promise<{ images: Array<{ name: string, context: string, preview: string }> }> { | ||
const userMessage: ChatCompletionMessageParam = { | ||
role: 'user', | ||
content: `Title: ${title}\n\n${article}` | ||
}; | ||
|
||
const response = await openaiService.completion([extractImageContextSystemMessage(images), userMessage], 'gpt-4o', false, true) as ChatCompletion; | ||
const result = JSON.parse(response.choices[0].message.content || '{}'); | ||
|
||
// Generate previews for all images simultaneously | ||
const previewPromises = images.map(image => previewImage(image)); | ||
const previews = await Promise.all(previewPromises); | ||
|
||
// Merge context and preview information | ||
const mergedResults = result.images.map((contextImage: { name: string, context: string }) => { | ||
const preview = previews.find(p => p.name === contextImage.name); | ||
return { | ||
...contextImage, | ||
preview: preview ? preview.preview : '' | ||
}; | ||
}); | ||
|
||
return { images: mergedResults }; | ||
} | ||
|
||
// Update the refineDescription function signature | ||
async function refineDescription(image: Image): Promise<Image> { | ||
const userMessage: ChatCompletionMessageParam = { | ||
role: 'user', | ||
content: [ | ||
{ | ||
type: "image_url", | ||
image_url: { url: `data:image/jpeg;base64,${image.base64}` } | ||
}, | ||
{ | ||
type: "text", | ||
text: `Write a description of the image ${image.name}. I have some <context>${image.context}</context> that should be useful for understanding the image in a better way. An initial preview of the image is: <preview>${image.preview}</preview>. A good description briefly describes what is on the image, and uses the context to make it more relevant to the article. The purpose of this description is for summarizing the article, so we need just an essence of the image considering the context, not a detailed description of what is on the image.` | ||
} | ||
] | ||
}; | ||
|
||
console.log(userMessage); | ||
|
||
const response = await openaiService.completion([refineDescriptionSystemMessage, userMessage], 'gpt-4o', false) as ChatCompletion; | ||
const result = response.choices[0].message.content || ''; | ||
return { ...image, description: result }; | ||
} | ||
|
||
/** | ||
* Generates a detailed summary by orchestrating all processing steps, including embedding relevant links and images within the content. | ||
*/ | ||
async function processAndSummarizeImages(title: string, path: string) { | ||
// Read the article file | ||
const article = await readFile(path, 'utf-8'); | ||
|
||
// Extract images from the article | ||
const images = await extractImages(article); | ||
console.log('Number of images found:', images.length); | ||
|
||
const contexts = await getImageContext(title, article, images); | ||
console.log('Number of image metadata found:', contexts.images.length); | ||
|
||
// Process each image: use context and preview from getImageContext, then refine description | ||
const processedImages = await Promise.all(images.map(async (image) => { | ||
const { context = '', preview = '' } = contexts.images.find(ctx => ctx.name === image.name) || {}; | ||
return await refineDescription({ ...image, preview, context }); | ||
})); | ||
|
||
// Prepare and save the summarized images (excluding base64 data) | ||
const describedImages = processedImages.map(({ base64, ...rest }) => rest); | ||
await writeFile(join(__dirname, 'descriptions.json'), JSON.stringify(describedImages, null, 2)); | ||
|
||
// Prepare and save the final data (only url and description) | ||
const captions = describedImages.map(({ url, description }) => ({ url, description })); | ||
await writeFile(join(__dirname, 'captions.json'), JSON.stringify(captions, null, 2)); | ||
|
||
// Log completion messages | ||
console.log('Final data saved to final.json'); | ||
} | ||
|
||
// Execute the main function | ||
processAndSummarizeImages('Lesson #0201 — Audio i interfejs głosowy', join(__dirname, 'article.md')) | ||
.catch(error => console.error('Error while processing and summarizing images:', error)); |
Oops, something went wrong.