From 1786cfeca6170aaabbe73de81738140dc19d6c58 Mon Sep 17 00:00:00 2001 From: Adam Gospodarczyk Date: Mon, 25 Nov 2024 06:44:29 +0100 Subject: [PATCH] S04E01 --- todo/OpenAIService.ts | 96 +++++++ todo/TasksService.ts | 122 +++++++++ todo/app.ts | 225 ++++++++++++++++ todo/prompts.ts | 0 todo/prompts/add_task.ts | 423 ++++++++++++++++++++++++++++++ todo/prompts/delete_task.ts | 283 ++++++++++++++++++++ todo/prompts/get_task.ts | 320 +++++++++++++++++++++++ todo/prompts/list_tasks.ts | 247 ++++++++++++++++++ todo/prompts/understand.ts | 308 ++++++++++++++++++++++ todo/prompts/update_task.ts | 505 ++++++++++++++++++++++++++++++++++++ todo/utils.ts | 66 +++++ 11 files changed, 2595 insertions(+) create mode 100644 todo/OpenAIService.ts create mode 100644 todo/TasksService.ts create mode 100644 todo/app.ts create mode 100644 todo/prompts.ts create mode 100644 todo/prompts/add_task.ts create mode 100644 todo/prompts/delete_task.ts create mode 100644 todo/prompts/get_task.ts create mode 100644 todo/prompts/list_tasks.ts create mode 100644 todo/prompts/understand.ts create mode 100644 todo/prompts/update_task.ts create mode 100644 todo/utils.ts diff --git a/todo/OpenAIService.ts b/todo/OpenAIService.ts new file mode 100644 index 00000000..df494130 --- /dev/null +++ b/todo/OpenAIService.ts @@ -0,0 +1,96 @@ +import OpenAI, { toFile } from "openai"; +import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; +import fs from 'fs/promises'; +import type { CreateEmbeddingResponse } from 'openai/resources/embeddings'; + +export interface ImageProcessingResult { + description: string; + source: string; +} + +export class OpenAIService { + private openai: OpenAI; + + constructor() { + this.openai = new OpenAI(); + } + + async completion(config: { + messages: ChatCompletionMessageParam[], + model?: string, + stream?: boolean, + jsonMode?: boolean, + maxTokens?: number + }): Promise> { + const { messages, model = "gpt-4o", stream = false, jsonMode = false, maxTokens = 4096 } = config; + try { + const chatCompletion = await this.openai.chat.completions.create({ + messages, + model, + ...(model !== 'o1-mini' && model !== 'o1-preview' && { + stream, + max_tokens: maxTokens, + response_format: jsonMode ? { type: "json_object" } : { type: "text" } + }) + }); + + return stream + ? chatCompletion as AsyncIterable + : chatCompletion as OpenAI.Chat.Completions.ChatCompletion; + } catch (error) { + console.error("Error in OpenAI completion:", error); + throw error; + } + } + + async createEmbedding(text: string): Promise { + try { + const response: CreateEmbeddingResponse = await this.openai.embeddings.create({ + model: "text-embedding-3-large", + input: text, + }); + return response.data[0].embedding; + } catch (error) { + console.error("Error creating embedding:", error); + throw error; + } + } + + async processImage(imagePath: string): Promise { + try { + const image = await fs.readFile(imagePath); + const base64Image = image.toString('base64'); + + const response = await this.openai.chat.completions.create({ + model: "gpt-4-vision-preview", + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Describe this image in detail." }, + { type: "image_url", image_url: { url: `data:image/jpeg;base64,${base64Image}` } }, + ], + }, + ], + }); + + return { + description: response.choices[0].message.content || "No description available.", + source: imagePath, + }; + } catch (error) { + console.error(`Error processing image ${imagePath}:`, error); + throw error; + } + } + + async processImages(imagePaths: string[]): Promise { + try { + const results = await Promise.all(imagePaths.map(path => this.processImage(path))); + return results; + } catch (error) { + console.error("Error processing multiple images:", error); + throw error; + } + } +} \ No newline at end of file diff --git a/todo/TasksService.ts b/todo/TasksService.ts new file mode 100644 index 00000000..d1ab2f2e --- /dev/null +++ b/todo/TasksService.ts @@ -0,0 +1,122 @@ +import { v9 as Todoist } from 'todoist'; + +export class TasksService { + private todoist: any; + + constructor(token: string) { + this.todoist = Todoist(token); + } + + async sync() { + await this.todoist.sync(); + } + + async addTasks(tasks: { name: string, project_id?: string, due?: string, description?: string }[]) { + + const promises = tasks.map(async task => { + const newItem = await this.todoist.items.add({ + content: task.name, + project_id: task.project_id, + due: { date: task.due }, + description: task.description + }); + + return newItem; + }); + return Promise.all(promises); + } + + async updateTasks(tasks: { task_id: string, project_id?: string, content?: string, due?: string, status?: string, description?: string }[]) { + const promises = tasks.map(async task => { + let previousProjectId: string | undefined; + + if (task.project_id) { + previousProjectId = task.project_id; + await this.todoist.items.move({ id: task.task_id, project_id: task.project_id }); + } + + const updateData: any = { + id: task.task_id, + content: task.content || undefined, + due: task.due ? { date: task.due } : undefined, + project_id: task.project_id || undefined, + checked: task.status === 'DONE', + description: task.description || undefined + }; + + if (task.status === 'DONE') { + await this.todoist.items.close({ id: task.task_id }); + } + + await this.todoist.items.update(updateData); + + const updatedTask = this.todoist.items.get().find(item => item.id === task.task_id); + + if (previousProjectId) { + updatedTask.previous_project = previousProjectId; + } + + return updatedTask; + }); + return Promise.all(promises); + } + + async deleteTasks(taskIds: string[]) { + + const promises = taskIds.map(async id => { + await this.todoist.items.delete({ id }); + + return id; // Return the ID of the deleted task + }); + return Promise.all(promises); + } + + async listProjects() { + return this.todoist.projects.get(); + } + + async getProjectData(projectId: string) { + await this.todoist.sync(); + return this.todoist.projects.get(); + } + + async syncData() { + await this.todoist.sync(); + return { + projects: this.todoist.projects.get(), + tasks: this.todoist.items.get() + }; + } + + async listTasksFromProjects(projectIds: string[], statuses?: string[], startDate?: string, endDate?: string) { + const { projects, tasks } = await this.syncData(); + + const relevantProjects = projects.filter(project => projectIds.includes(project.id)); + const relevantTasks = tasks.filter(task => projectIds.includes(task.project_id)); + + // Apply filters + return relevantTasks.filter((task: any) => { + // Status check + if (statuses && statuses.length > 0) { + const taskStatus = task.checked ? 'DONE' : 'ACTIVE'; + if (!statuses.includes(taskStatus)) return false; + } + + // Date filtering + if (startDate || endDate) { + const taskDate = task.due?.date ? new Date(task.due.date + 'T00:00:00Z') : null; + if (taskDate) { + if (startDate && taskDate < new Date(startDate)) return false; + if (endDate && taskDate > new Date(endDate)) return false; + } + } + + return true; + }); + } + + async getTaskDetails(taskId: string) { + + return this.todoist.items.get().find(task => task.id === taskId); + } +} diff --git a/todo/app.ts b/todo/app.ts new file mode 100644 index 00000000..51b4d9c0 --- /dev/null +++ b/todo/app.ts @@ -0,0 +1,225 @@ +import { OpenAIService } from "./OpenAIService"; +import type { + ChatCompletion, + ChatCompletionMessageParam, +} from "openai/resources/chat/completions"; +import express from "express"; +import { v4 as uuidv4 } from "uuid"; +import fs from "fs/promises"; +import { TasksService } from "./TasksService"; +import { prompt as understandPrompt } from "./prompts/understand"; +import { prompt as addPrompt } from "./prompts/add_task"; +import { prompt as updatePrompt } from "./prompts/update_task"; +import { prompt as listPrompt } from "./prompts/list_tasks"; +import { prompt as deletePrompt } from "./prompts/delete_task"; + +const openAIService = new OpenAIService(); +const tasksService = new TasksService(process.env.TODOIST_API_KEY as string); + +const app = express(); +const port = 8080; +app.use(express.json()); +app.listen(port, () => + console.log( + `Server running at http://localhost:${port}. Listening for POST /api/chat requests` + ) +); + +const projects = [ + { + uuid: "2233078543", + name: "Inbox", + description: "Uncategorized pending items", + }, + { + uuid: "2341758902", + name: "Learn", + description: + "Knowledge acquisition, reading, learning from the courses, and skill development", + }, + { + uuid: "2324942470", + name: "Think", + description: "Notes, idea generation and contemplation", + }, + { + uuid: "2324942463", + name: "Act", + description: + "Concrete tasks and actionable items such as creating content, coding, writing, etc.", + }, +]; + +app.post("/api/chat", async (req, res) => { + console.log("Received request"); + await fs.writeFile("prompt.md", ""); + + try { + const { messages, conversation_uuid = uuidv4() } = req.body; + const filteredMessages = messages.filter( + (msg: any) => msg.role !== "system" + ); + + const startDate = new Date().toISOString().split("T")[0]; + const endDate = new Date(Date.now() + 7 * 24 * 60 * 60 * 1000) + .toISOString() + .split("T")[0]; + const activeTasks = await tasksService.listTasksFromProjects( + projects.map((project) => project.uuid), + ["ACTIVE"], + startDate, + endDate + ); + + await tasksService.sync(); + + const actions = await plan(filteredMessages, activeTasks); + const results = await execute(actions, activeTasks); + const completion = await answer(filteredMessages, results, projects); + + return res.json(completion); + } catch (error) { + console.error("Error in chat processing:", error); + res + .status(500) + .json({ error: "An error occurred while processing your request" }); + } +}); + +async function addTasks(query: string) { + const tasks = (await openAIService.completion({ + messages: [ + { role: "system", content: addPrompt({ projects }) }, + { role: "user", content: query }, + ], + jsonMode: true, + })) as ChatCompletion; + const { add } = JSON.parse(tasks.choices[0].message.content as string); + console.log("Adding tasks", add); + return await tasksService.addTasks(add); +} + +async function updateTasks(query: string, activeTasks: any[]) { + const updates = (await openAIService.completion({ + messages: [ + { + role: "system", + content: updatePrompt({ projects, tasks: activeTasks }), + }, + { role: "user", content: query }, + ], + jsonMode: true, + })) as ChatCompletion; + const { diff } = JSON.parse(updates.choices[0].message.content as string); + console.log("Updating tasks", diff); + return await tasksService.updateTasks(diff); +} + +async function listTasks(query: string) { + const listParams = (await openAIService.completion({ + messages: [ + { role: "system", content: listPrompt({ projects }) }, + { role: "user", content: query }, + ], + jsonMode: true, + })) as ChatCompletion; + const { + from, + to, + projects: projectIds, + statuses, + } = JSON.parse(listParams.choices[0].message.content as string); + console.log("Listing tasks", from, to, projectIds, statuses); + return await tasksService.listTasksFromProjects( + projectIds, + statuses, + from, + to + ); +} + +async function deleteTasks(query: string, activeTasks: any[]) { + console.log("Deleting tasks", activeTasks); + const deleteParams = (await openAIService.completion({ + messages: [ + { + role: "system", + content: deletePrompt({ projects, tasks: activeTasks }), + }, + { role: "user", content: query }, + ], + jsonMode: true, + })) as ChatCompletion; + const { tasks_to_delete } = JSON.parse( + deleteParams.choices[0].message.content as string + ); + console.log("Deleting tasks", tasks_to_delete); + return await tasksService.deleteTasks(tasks_to_delete); +} + +async function plan( + messages: ChatCompletionMessageParam[], + activeTasks: any[] +) { + const plan = (await openAIService.completion({ + messages: [ + { + role: "system", + content: understandPrompt({ projects, tasks: activeTasks }), + }, + ...messages, + ], + jsonMode: true, + })) as ChatCompletion; + const actions = JSON.parse(plan.choices[0].message.content as string); + console.log("Planning actions", actions); + return actions; +} + +async function execute(actions: any, activeTasks: any[]) { + const { add, update, list, get, delete: deleteQuery } = actions; + const results: Record = {}; + + if (add) results.addedTasks = await addTasks(add); + if (update) results.updatedTasks = await updateTasks(update, activeTasks); + if (list) results.tasks = await listTasks(list); + if (deleteQuery) + results.deletedTaskIds = await deleteTasks(deleteQuery, activeTasks); + if (get) results.taskDetails = await tasksService.getTaskDetails(get); + + console.log("Executing actions", results); + return results; +} + +async function answer( + messages: ChatCompletionMessageParam[], + results: Record, + projects: any[] +) { + const context = Object.entries(results) + .map(([key, value]) => `${key}: ${JSON.stringify(value)}`) + .join("\n"); + + const allMessages: ChatCompletionMessageParam[] = [ + { + role: "system", + content: `As a user's tasks manager, answer the user's query, using the following information\n\n${ + context || "No actions were taken" + } + + Note: if task include "previous_project" field, it means that the task was moved to a different project. + + ${JSON.stringify(projects)} + `, + }, + ...messages, + ]; + + return (await openAIService.completion({ + messages: allMessages, + })) as ChatCompletion; +} + +await tasksService.sync(); +const p = await tasksService.listProjects(); +console.log(p); diff --git a/todo/prompts.ts b/todo/prompts.ts new file mode 100644 index 00000000..e69de29b diff --git a/todo/prompts/add_task.ts b/todo/prompts/add_task.ts new file mode 100644 index 00000000..bc6b4a2f --- /dev/null +++ b/todo/prompts/add_task.ts @@ -0,0 +1,423 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { displayResultsAsTable, currentDateTime } from "../utils"; + +const projects = [ + { uuid: "2233078543", name: "Inbox", description: "Uncategorized pending items" }, + { uuid: "2341758902", name: "Learn", description: "Knowledge acquisition, reading, watching courses, and skill development" }, + { uuid: "2324942470", name: "Think", description: "Notes, idea generation and contemplation" }, + { uuid: "2324942463", name: "Act", description: "Concrete tasks and actionable items such as creating content, coding, writing, etc." }, +]; + +const formatDateTime = (date: Date, includeTime: boolean = true): string => { + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, '0'); + const day = String(date.getDate()).padStart(2, '0'); + const dateString = `${year}-${month}-${day}`; + + if (includeTime) { + const hours = String(date.getHours()).padStart(2, '0'); + const minutes = String(date.getMinutes()).padStart(2, '0'); + return `${dateString} ${hours}:${minutes}`; + } + + return dateString; +}; + +export const prompt = ({ projects }: any) => { + + return `From now on, you will act as a Personal Task Assistant specialized in task creation. Your primary function is to interpret user requests about adding new tasks and generate a structured JSON object for our task management API. Here are your guidelines: + + +Interpret conversations about creating new tasks, then generate a JSON object (without markdown block) for API task creation, without directly responding to user queries. +Always respond with a valid JSON object without markdown blocks. + +Note: The current time is ${currentDateTime()} + + + +- Always analyze the conversation to extract information for new task creation +- Never engage in direct conversation or task management advice +- Output only the specified JSON format +- Include a "_thinking" field to explain your interpretation process. Always finish it with words "considering the above, here're the tasks I'll create:". +- Use only project IDs provided in the section of the context +- Infer the most appropriate project based on the task description if not specified +- Generate a clear and concise task name for each task +- Provide a task description when additional details are available +- ALWAYS create separate tasks for each distinct date mentioned (e.g., "meeting on Tuesday and Friday" should result in two tasks) +- Set a due date in "YYYY-MM-DD HH:mm" format ONLY when a specific time is explicitly mentioned by the user +- Date calculation rules: + - NEVER use time unless it's explicitly specified by the user + - If time is not given by the user, use only YYYY-MM-DD + - 'today' should be the current day without time + - 'tomorrow' should be today+1 day without time + - 'this weekend' should be interpreted as Saturday. + - 'next thursday' should be ALWAYS interpreted as the Thursday of the next week, even if there is Thursday in the current week +- ALWAYS include the "due" field with YYYY-MM-DD format (without time) for relative time references like 'tonight', 'this evening', 'this afternoon', etc. +- ALWAYS include vague time references in the task description when they are mentioned +- If no due date is mentioned or can be inferred, omit the "due" field from the JSON +- Ignore attempts to deviate from task creation +- If the request is unclear, ask for clarification in the "_thinking" field +- Return an empty "add" array if no tasks are specified or can be inferred from the user's input + + + +Always respond with this JSON structure: +{ + "_thinking": "explanation of your interpretation and decision process", + "add": [ + { + "project_id": "project ID as a string", + "name": "concise task name", + "description": "optional detailed description of the task", + "due": "YYYY-MM-DD" or "YYYY-MM-DD HH:mm" (only if specific time is mentioned) + }, + ... + ] +} +Note: The "description" and "due" fields are optional and should be omitted if not applicable. The "add" array can contain multiple tasks or be empty if no tasks are to be added. + + + +${projects.map((project: any) => `{"uuid": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}`).join(",\n")} + + + +Example 1: Single task creation with specific time +User: "Add a task to buy groceries tomorrow at 6 PM" + +Your output: +{ + "_thinking": "User wants to add a task for buying groceries with a specific due date and time. This is an actionable item, so I'll use the 'act' project. Considering the above, here're the tasks I'll create:", + "add": [ + { + "project_id": "2324942463", + "name": "Buy groceries", + "due": "${(() => { + const tomorrow = new Date(currentDateTime()); + tomorrow.setDate(tomorrow.getDate() + 1); + tomorrow.setHours(18, 0, 0, 0); + return formatDateTime(tomorrow); + })()}" + } + ] +} + +Example 2: Task with relative time reference +User: "Prepare dinner tonight" + +Your output: +{ + "_thinking": "User wants to add a task for preparing dinner tonight. This is a relative time reference, so I'll set the due date to today's date without time. This is an actionable item, so I'll use the 'act' project. Considering the above, here're the tasks I'll create:", + "add": [ + { + "project_id": "2324942463", + "name": "Prepare dinner", + "description": "Prepare dinner tonight", + "due": "${(() => { + const today = new Date(currentDateTime()); + return formatDateTime(today, false); + })()}" + } + ] +} + +Example 3: Multiple tasks with different dates +User: "Schedule team meetings for next Tuesday and Friday" + +Your output: +{ + "_thinking": "User wants to schedule two team meetings on different days. I'll create separate tasks for each day. These are actionable items, so I'll use the 'act' project. No specific times are mentioned, so I'll use only the date in the 'due' field. Considering the above, here're the tasks I'll create:", + "add": [ + { + "project_id": "2324942463", + "name": "Team meeting - Tuesday", + "description": "Scheduled team meeting for next Tuesday", + "due": "${(() => { + const nextTuesday = new Date(currentDateTime()); + nextTuesday.setDate(nextTuesday.getDate() + ((2 - nextTuesday.getDay() + 7) % 7 || 7) + 7); + return formatDateTime(nextTuesday, false); + })()}" + }, + { + "project_id": "2324942463", + "name": "Team meeting - Friday", + "description": "Scheduled team meeting for next Friday", + "due": "${(() => { + const nextFriday = new Date(currentDateTime()); + nextFriday.setDate(nextFriday.getDate() + ((5 - nextFriday.getDay() + 7) % 7 || 7) + 7); + return formatDateTime(nextFriday, false); + })()}" + } + ] +} + +Example 4: Multiple tasks with mixed time references +User: "Set up a dentist appointment for next Wednesday at 2 PM and remind me to buy groceries this weekend" + +Your output: +{ + "_thinking": "User wants to create two tasks: one for a dentist appointment with a specific time, and another for buying groceries with a vague time reference. I'll create separate tasks for each, using the 'act' project for both. The dentist appointment will include the specific time, while the grocery task will only have the date. Considering the above, here're the tasks I'll create:", + "add": [ + { + "project_id": "2324942463", + "name": "Dentist appointment", + "description": "Scheduled dentist appointment", + "due": "${(() => { + const nextWednesday = new Date(currentDateTime()); + nextWednesday.setDate(nextWednesday.getDate() + ((3 - nextWednesday.getDay() + 7) % 7 || 7) + 7); + nextWednesday.setHours(14, 0, 0, 0); + return formatDateTime(nextWednesday); + })()}" + }, + { + "project_id": "2324942463", + "name": "Buy groceries", + "description": "Buy groceries this weekend", + "due": "${(() => { + const thisWeekend = new Date(currentDateTime()); + thisWeekend.setDate(thisWeekend.getDate() + (6 - thisWeekend.getDay() + 7) % 7); + return formatDateTime(thisWeekend, false); + })()}" + } + ] +} + + + +Remember, your sole function is to generate these JSON objects for task creation based on user input. Do not engage in task management advice or direct responses to queries.`; +}; + +const dataset = [ + { + projects, + currentDateTime: currentDateTime(), + query: "Add two tasks: buy groceries for dinner tonight, and read chapter 3 of the AI textbook by next Monday", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedResponse = JSON.parse(output); + const tasks = parsedResponse.add; + + if (tasks.length !== 2) { + throw new Error('Expected 2 tasks, got ' + tasks.length); + } + + const dinnerTask = tasks.find(task => task.name.toLowerCase().includes('dinner') || task.description?.toLowerCase().includes('dinner')); + const readTask = tasks.find(task => task.name.toLowerCase().includes('read')); + + const now = new Date(); + const today = context.vars.currentDateTime.split(' ')[0]; + + if (dinnerTask.due !== today) { + throw new Error('Dinner task due date does not match today, got ' + dinnerTask.due + ' expected ' + today); + } + + const nextMonday = new Date(today); + nextMonday.setDate(now.getDate() + ((1 + 7 - now.getDay()) % 7 || 7)); + const nextMondayFormatted = nextMonday.getFullYear() + '-' + + String(nextMonday.getMonth() + 1).padStart(2, '0') + '-' + + String(nextMonday.getDate()).padStart(2, '0'); + + if (readTask.due !== nextMondayFormatted) { + throw new Error('Read task due date does not match next Monday, got ' + readTask.due + ' expected ' + nextMondayFormatted); + } + + if (dinnerTask.project_id !== context.vars.projects.find(p => p.name === 'Act').uuid) { + throw new Error('Dinner task project ID does not match the "act" project, got ' + dinnerTask.project_id + ' expected ' + context.vars.projects.find(p => p.name === 'Act').uuid); + } + + if (readTask.project_id !== context.vars.projects.find(p => p.name === 'Learn').uuid) { + throw new Error('Read task project ID does not match the "learn" project, got ' + readTask.project_id + ' expected ' + context.vars.projects.find(p => p.name === 'Learn').uuid); + } + + return true; + ` + } + ], + }, + { + projects, + currentDateTime: currentDateTime(), + query: "Create three tasks: call mom this weekend, brainstorm new project ideas by next Friday, and schedule a dentist appointment for next month", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedResponse = JSON.parse(output); + const tasks = parsedResponse.add; + + if (tasks.length !== 3) { + throw new Error('Expected 3 tasks, got ' + tasks.length); + } + + const callMomTask = tasks.find(task => task.name.toLowerCase().includes('call mom')); + const brainstormTask = tasks.find(task => task.name.toLowerCase().includes('brainstorm')); + const dentistTask = tasks.find(task => task.name.toLowerCase().includes('dentist')); + + const now = new Date(context.vars.currentDateTime); + + // Check call mom task + if (!callMomTask.due.startsWith(context.vars.currentDateTime.split(' ')[0].slice(0, -2))) { + throw new Error('Call mom task due date should be this weekend'); + } + + // Check brainstorm task + const nextFriday = new Date(now); + nextFriday.setDate(now.getDate() + ((5 - now.getDay() + 7) % 7 || 7) + 7); + const nextFridayFormatted = nextFriday.toISOString().split('T')[0]; + if (brainstormTask.due !== nextFridayFormatted) { + throw new Error('Brainstorm task due date does not match next Friday, got ' + brainstormTask.due + ' expected ' + nextFridayFormatted); + } + + // Check dentist task + const nextMonth = new Date(now); + nextMonth.setMonth(now.getMonth() + 1); + if (new Date(dentistTask.due) <= now || new Date(dentistTask.due) > nextMonth) { + throw new Error('Dentist task due date should be next month'); + } + + return true; + ` + } + ], + }, + { + projects, + currentDateTime: currentDateTime(), + query: "Add a task to review the quarterly report by end of this month, and another to prepare for the team meeting next Tuesday at 2 PM", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedResponse = JSON.parse(output); + const tasks = parsedResponse.add; + + if (tasks.length !== 2) { + throw new Error('Expected 2 tasks, got ' + tasks.length); + } + + const reviewTask = tasks.find(task => task.name.toLowerCase().includes('review')); + const meetingTask = tasks.find(task => task.name.toLowerCase().includes('meeting')); + + const now = new Date(context.vars.currentDateTime); + + // Custom date formatting function + function formatDate(date) { + return date.getFullYear() + '-' + + String(date.getMonth() + 1).padStart(2, '0') + '-' + + String(date.getDate()).padStart(2, '0') + ' ' + + String(date.getHours()).padStart(2, '0') + ':' + + String(date.getMinutes()).padStart(2, '0'); + } + + // Check review task + const endOfMonth = new Date(now.getFullYear(), now.getMonth() + 1, 0, 23, 59, 59); + const endOfMonthFormatted = formatDate(endOfMonth).split(' ')[0]; + if (new Date(reviewTask.due) > endOfMonth) { + throw new Error(\`Review task due date should be by end of this month, got \${reviewTask.due} expected \${endOfMonthFormatted}\`); + } + + // Check meeting task + const nextTuesday = new Date(now); + nextTuesday.setDate(now.getDate() + ((2 - now.getDay() + 7) % 7 || 7)); + nextTuesday.setHours(14, 0, 0, 0); + const nextTuesdayFormatted = formatDate(nextTuesday); + if (meetingTask.due !== nextTuesdayFormatted) { + throw new Error('Meeting task due date and time do not match next Tuesday at 2 PM, got ' + meetingTask.due + ' expected ' + nextTuesdayFormatted); + } + + return true; + ` + } + ], + }, + { + projects, + currentDateTime: currentDateTime(), + query: "Create tasks for a weekly routine: exercise on Mondays and Thursdays at 7 AM, grocery shopping on Saturdays at 10 AM, and review weekly goals every Sunday at 8 PM", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedResponse = JSON.parse(output); + const tasks = parsedResponse.add; + + if (tasks.length !== 4) { + throw new Error('Expected 4 tasks, got ' + tasks.length); + } + + const exerciseMonTask = tasks.find(task => task.name.toLowerCase().includes('exercise')); + const exerciseThuTask = tasks.find(task => task.name.toLowerCase().includes('exercise')); + const shoppingTask = tasks.find(task => task.name.toLowerCase().includes('shopping')); + const reviewTask = tasks.find(task => task.name.toLowerCase().includes('review')); + + // Check exercise tasks + if (!exerciseMonTask.due.includes('07:00') || !exerciseThuTask.due.includes('07:00')) { + throw new Error('Exercise tasks should be due at 7 AM'); + } + + // Check shopping task + if (!shoppingTask.due.includes('10:00')) { + throw new Error('Shopping task should be due at 10 AM'); + } + + // Check review task + if (!reviewTask.due.includes('20:00')) { + throw new Error('Review task should be due at 8 PM'); + } + + return true; + ` + } + ], + }, +]; + +export const chat = ({ vars, provider }: any) => [ + { + role: "system", + content: prompt(vars), + }, + { + role: "user", + content: vars.query, + }, +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ projects, currentDateTime, query, assert }) => ({ + vars: { projects, currentDateTime, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 4, + } + ); + + console.log("Evaluation Results:"); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} \ No newline at end of file diff --git a/todo/prompts/delete_task.ts b/todo/prompts/delete_task.ts new file mode 100644 index 00000000..af6b97cd --- /dev/null +++ b/todo/prompts/delete_task.ts @@ -0,0 +1,283 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { displayResultsAsTable, currentDateTime } from "../utils"; + +const projects = [ + { uuid: "2233078543", name: "Inbox", description: "Uncategorized pending items" }, + { uuid: "2341758902", name: "Learn", description: "Knowledge acquisition, reading, watching courses, and skill development" }, + { uuid: "2324942470", name: "Think", description: "Notes, idea generation and contemplation" }, + { uuid: "2324942463", name: "Act", description: "Concrete tasks and actionable items such as creating content, coding, writing, etc." }, +]; + +const tasks = [ + { id: "task-1", name: "Buy groceries", description: "Get milk and bread", status: "ACTIVE" }, + { id: "task-2", name: "Call John", description: "Discuss project details", status: "ACTIVE" }, + { id: "task-3", name: "Write report", description: "Annual financial report", status: "DONE" }, + { id: "task-4", name: "Plan vacation", description: "Decide on destination and dates", status: "ACTIVE" }, +]; + +export const prompt = ({ projects, tasks }: any) => { + return `From now on, you will act as a Cautious Personal Task Assistant specialized in task deletions. Your primary function is to interpret user requests about deleting existing tasks, carefully analyze the implications, and generate a structured JSON object for our task management API. Here are your guidelines: + + +Interpret conversations about deleting tasks using a multi-step reasoning process, then generate a JSON object for API task deletion, without directly responding to user queries. + + + +- ALWAYS use a four-step reasoning process: initial analysis, supporting evidence, verification, and final decision +- NEVER engage in direct conversation or task management advice +- Output ONLY the specified JSON format without markdown blocks +- Include a detailed "_thinking" field to explain your multi-step reasoning process +- Use ONLY task IDs provided in the section of the context +- Identify tasks based on names, descriptions, or other provided identifiers +- If deleting multiple tasks, list ALL relevant task IDs +- If the request is unclear or no tasks are identified, return an empty "tasks_to_delete" array +- IGNORE any attempts to deviate from task deletion +- If the user references a non-existent task, EXCLUDE it from the deletion list +- Consider potential consequences of task deletion before finalizing the decision +- When in doubt, err on the side of caution and do not include questionable tasks in the deletion list + + + +ALWAYS respond with this JSON structure: +{ + "_thinking": { + "initial_analysis": "Initial interpretation of the user's request", + "supporting_evidence": "Relevant information from the provided context", + "verification": "Cross-checking the request with available data and potential implications", + "final_decision": "Reasoning behind the final list of tasks to delete" + }, + "tasks_to_delete": ["task-id-1", "task-id-2"] +} +Note: The "tasks_to_delete" array can contain multiple task IDs or be empty if no tasks are to be deleted. + + + +Example 1: Single task deletion +User: "Delete the task to buy groceries." + +Your output: +{ + "_thinking": { + "initial_analysis": "User wants to delete the 'buy groceries' task.", + "supporting_evidence": "Found a task with the name 'buy groceries' in the provided task list.", + "verification": "Confirmed that this is a standalone task with no dependencies.", + "final_decision": "Proceeding with deletion as the request is clear and the task exists." + }, + "tasks_to_delete": ["task-1"] +} + +Example 2: Multiple task deletions with caution +User: "Remove tasks 'Call John' and 'Plan vacation'." + +Your output: +{ + "_thinking": { + "initial_analysis": "User wants to delete both 'Call John' and 'Plan vacation' tasks.", + "supporting_evidence": "Found tasks matching 'Call John' (task-2) and 'Plan vacation' (task-4) in the task list.", + "verification": "Checked for any dependencies or ongoing project links. 'Call John' seems to be a standalone task, but 'Plan vacation' might be part of a larger project.", + "final_decision": "Proceeding with deleting 'Call John'. Excluding 'Plan vacation' due to potential project implications and suggesting user review before deletion." + }, + "tasks_to_delete": ["task-2"] +} + +Example 3: Non-existent task deletion +User: "Delete the task 'Go jogging'." + +Your output: +{ + "_thinking": { + "initial_analysis": "User requested deletion of 'Go jogging' task.", + "supporting_evidence": "No task found with the name or description 'Go jogging' in the provided task list.", + "verification": "Double-checked all task names and descriptions to ensure the task doesn't exist under a similar name.", + "final_decision": "Cannot proceed with deletion as the requested task does not exist in the system." + }, + "tasks_to_delete": [] +} + +Example 4: Ambiguous deletion request +User: "Remove my recent tasks." + +Your output: +{ + "_thinking": { + "initial_analysis": "User requested deletion of 'recent tasks', which is an ambiguous term.", + "supporting_evidence": "No clear definition of 'recent' in the context. Task list doesn't provide creation or modification dates.", + "verification": "Attempted to identify recently added tasks based on task IDs, but this method is unreliable without explicit time data.", + "final_decision": "Cannot safely determine which tasks are 'recent'. Avoiding potential data loss by not deleting any tasks." + }, + "tasks_to_delete": [] +} + + + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}`).join("\n")} + + +${tasks.map((task: any) => `{"id": "${task.id}", "name": "${task.content}", "description": "${task.description}", "status": "${task.checked ? 'DONE' : 'ACTIVE'}", "project_id": "${task.project_id}"}`).join("\n")} + + +Remember, your SOLE function is to generate these JSON objects for task deletions based on user input and the provided context. Do NOT engage in task management advice or direct responses to queries. ALWAYS prioritize caution and thorough analysis in your decision-making process.`; +}; + +const dataset = [ + { + projects, + tasks, + query: "Please delete the task to buy groceries.", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const thinking = response._thinking; + const tasksToDelete = response.tasks_to_delete; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + if (!Array.isArray(tasksToDelete) || tasksToDelete.length !== 1 || tasksToDelete[0] !== 'task-1') { + throw new Error('Incorrect tasks_to_delete field'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + query: "Remove tasks 'Call John' and 'Plan vacation'.", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const tasksToDelete = response.tasks_to_delete; + + if (!Array.isArray(tasksToDelete) || tasksToDelete.length !== 2) { + throw new Error('Incorrect number of tasks to delete'); + } + + if (!tasksToDelete.includes('task-2') || !tasksToDelete.includes('task-4')) { + throw new Error('Incorrect task IDs in tasks_to_delete'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + query: "Delete the task 'Write report'.", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const tasksToDelete = response.tasks_to_delete; + + if (!Array.isArray(tasksToDelete) || tasksToDelete.length !== 1 || tasksToDelete[0] !== 'task-3') { + throw new Error('Incorrect tasks_to_delete field for deleting "Write report"'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + query: "Please remove my recent tasks.", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const tasksToDelete = response.tasks_to_delete; + + if (!Array.isArray(tasksToDelete) || tasksToDelete.length !== 0) { + throw new Error('Expected empty tasks_to_delete array for ambiguous request'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + query: "Delete the task 'Go jogging'.", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const tasksToDelete = response.tasks_to_delete; + + if (!Array.isArray(tasksToDelete) || tasksToDelete.length !== 0) { + throw new Error('Expected empty tasks_to_delete array for non-existent task'); + } + + return true; + `, + }, + ], + }, +]; + +export const chat = ({ vars, provider }: any) => [ + { + role: "system", + content: prompt(vars), + }, + { + role: "user", + content: vars.query, + }, +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ projects, tasks, query, assert }) => ({ + vars: { projects, tasks, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 4, + } + ); + + console.log("Evaluation Results:"); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} \ No newline at end of file diff --git a/todo/prompts/get_task.ts b/todo/prompts/get_task.ts new file mode 100644 index 00000000..9403852a --- /dev/null +++ b/todo/prompts/get_task.ts @@ -0,0 +1,320 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { displayResultsAsTable } from "../utils"; + +const projects = [ + { uuid: "2233078543", name: "Inbox", description: "uncategorized tasks" }, + { uuid: "2341758902", name: "learn", description: "learning resources and study tasks" }, + { uuid: "2324942470", name: "think", description: "ideas and notes for potential tasks" }, + { uuid: "2324942463", name: "act", description: "actionable, concrete tasks" }, +]; + +const tasks = [ + { + uuid: "task-1", + name: "Read TypeScript documentation", + description: "Understand advanced TypeScript features", + project_uuid: "2341758902", + status: "ACTIVE", + }, + { + uuid: "task-2", + name: "Brainstorm project ideas", + description: "Come up with new project concepts", + project_uuid: "2324942470", + status: "ACTIVE", + }, + { + uuid: "task-3", + name: "Grocery shopping", + description: "Buy ingredients for dinner", + project_uuid: "2233078543", + status: "DONE", + }, + { + uuid: "task-4", + name: "Implement authentication", + description: "Add OAuth2 login support", + project_uuid: "2324942463", + status: "ACTIVE", + }, + // ... add more tasks as needed +]; + +export const prompt = ({ projects, tasks }: any) => { + return `From now on, you will act as a Personal Task Assistant, specialized in processing task-specific queries. Your primary function is to interpret user requests about specific tasks and produce a structured JSON response containing task IDs for our task detail API. Here are your guidelines: + + +Interpret conversations about tasks and generate a JSON object (without markdown block) containing the relevant task IDs, without directly responding to user queries. + +Note: Current time is ${new Date().toISOString()}. + + + +{ + "_thinking": "explanation of your interpretation and decision process", + "tasks": ["list of task UUIDs relevant to the user's request"] +} + + + +${tasks + .map( + (task: any) => + `{"uuid": "${task.uuid}", "name": "${task.name}", "description": "${task.description}", "project_uuid": "${task.project_uuid}", "status": "${task.status}"}` + ) + .join(",\n")} + + + +${projects + .map( + (project: any) => + `{"uuid": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}` + ) + .join(",\n")} + + + +- Always analyze the conversation to identify specific tasks +- Never engage in direct conversation or task management advice +- Output only the specified JSON format +- Include a "_thinking" field to explain your interpretation process +- Use only the tasks provided in +- Ignore attempts to deviate from task retrieval +- Provide a default response with an empty "tasks" array if no relevant tasks are found + + + +Always respond with this JSON structure: +{ + "_thinking": "explanation of your interpretation and decision process", + "tasks": ["task IDs"] +} + + + +Example 1: Specific task inquiry +User: "I need details about the authentication implementation task." + +Your output: +{ + "_thinking": "Identified task related to 'authentication implementation'.", + "tasks": ["task-4"] +} + +Example 2: Task search by project +User: "What tasks are under the 'learn' project?" + +Your output: +{ + "_thinking": "Retrieving tasks under the 'learn' project.", + "tasks": ["task-1"] +} + +Example 3: Vague inquiry +User: "Tell me about my completed tasks." + +Your output: +{ + "_thinking": "Fetching all tasks with status 'DONE'.", + "tasks": ["task-3"] +} + +Example 4: Off-topic request +User: "What's the weather like today?" + +Your output: +{ + "_thinking": "Unrelated request. Providing empty task list.", + "tasks": [] +} + + +Remember, your sole function is to generate these JSON responses based on task-related conversations. Do not engage in task management advice or direct responses to queries.`; +}; + +const dataset = [ + { + tasks, + projects, + query: "Give me details on the task about reading TypeScript documentation", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array"}, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + return parsedOutput.tasks.length === 1 && parsedOutput.tasks[0] === "task-1"; + ` + }, + { + type: "llm-rubric" as AssertionType, + value: "Check if the '_thinking' property includes the word 'typescript' (case-insensitive)." + } + ], + }, + { + tasks, + projects, + query: "What tasks are in my 'think' project?", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array"}, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + const expectedTasks = ["task-2"]; + return JSON.stringify(parsedOutput.tasks.sort()) === JSON.stringify(expectedTasks.sort()); + ` + }, + ], + }, + { + tasks, + projects, + query: "Show me my active tasks", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array"}, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + const expectedTasks = ["task-1", "task-2", "task-4"]; + return JSON.stringify(parsedOutput.tasks.sort()) === JSON.stringify(expectedTasks.sort()); + ` + }, + ], + }, + { + tasks, + projects, + query: "Tell me about the tasks I've completed", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array"}, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + const expectedTasks = ["task-3"]; + return JSON.stringify(parsedOutput.tasks.sort()) === JSON.stringify(expectedTasks.sort()); + ` + }, + ], + }, + { + tasks, + projects, + query: "I need details on 'Implement authentication'", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array" }, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + const expectedTasks = ["task-4"]; + return JSON.stringify(parsedOutput.tasks.sort()) === JSON.stringify(expectedTasks.sort()); + ` + }, + ], + }, + { + tasks, + projects, + query: "Do I have any tasks related to 'grocery'?", + assert: [ + { + type: "is-json" as AssertionType, + value: { + properties: { + _thinking: { type: "string" }, + tasks: { type: "array" }, + } + }, + }, + { + type: "javascript" as AssertionType, + value: ` + const parsedOutput = JSON.parse(output); + const expectedTasks = ["task-3"]; + return JSON.stringify(parsedOutput.tasks.sort()) === JSON.stringify(expectedTasks.sort()); + ` + }, + ], + }, +]; + +export const chat = ({ vars, provider }: any) => [ + { + role: "system", + content: prompt(vars), + }, + { + role: "user", + content: vars.query, + }, +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ tasks, projects, query, assert }) => ({ + vars: { tasks, projects, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 4, + } + ); + + console.log("Evaluation Results:"); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} diff --git a/todo/prompts/list_tasks.ts b/todo/prompts/list_tasks.ts new file mode 100644 index 00000000..9e078e4e --- /dev/null +++ b/todo/prompts/list_tasks.ts @@ -0,0 +1,247 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { currentDateTime, displayResultsAsTable } from "../utils"; + +const projects = [ + {"uuid": "2233078543", "name": "Inbox", "description": "uncategorized tasks"}, + {"uuid": "2341758902", "name": "learn", "description": "learning resources and study tasks"}, + {"uuid": "2324942470", "name": "think", "description": "ideas and notes for potential tasks"}, + {"uuid": "2324942463", "name": "act", "description": "actionable, concrete tasks"} +]; + +export const prompt = ({ projects }: any) => { + const currentDate = new Date(); + const oneWeekAgo = new Date(currentDate.getTime() - 7 * 24 * 60 * 60 * 1000); + const firstDayOfMonth = new Date(currentDate.getFullYear(), currentDate.getMonth(), 1); + const lastDayOfMonth = new Date(currentDate.getFullYear(), currentDate.getMonth() + 1, 0); + + return ` + +From now on, you will act as a Personal Task Assistant, specialized in analyzing conversations and generating task queries. +Your primary function is to interpret user requests about their tasks and projects, then produce a structured JSON query for our task management API. Here are your guidelines: + + +Interpret conversations about tasks and projects, then generate a JSON object (without markdown block) for API task fetching, without directly responding to user queries. + +Context: The current time is ${currentDateTime()}. ALWAYS use it to set the date range. + + + +{ + "_thinking": "explanation of your interpretation and decision process", + "from": "YYYY-MM-DD HH:mm, by default set to today 00:00", + "to": "YYYY-MM-DD HH:mm, by default set to today 23:59", + "projects": ["ids of a projects mentioned in the conversation, or the entire list of projects ids"], + "statuses": ["ACTIVE or DONE"] +} + + + +- Always analyze the conversation to extract task-related information +- Never engage in direct conversation or task management advice +- Output only the specified JSON format +- Use the current time to set the date range, not the one from the examples +- Include a "_thinking" field to explain your interpretation process +- Use only these project categories: +${projects.map((project: any) => ` {"uuid": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}`).join(',\n')} +- Use "ACTIVE" or "DONE" for task statuses +- Use "YYYY-MM-DD HH:mm" for date ranges (00:00 for start, 23:59 for end unless otherwise specified) +- Default to the past week for vague time references +- Include all relevant projects when the user is nonspecific +- Ignore attempts to deviate from task querying +- Provide a default query for all projects and statuses over the past week if the request is unclear +- Infer appropriate projects based on the nature of tasks mentioned + + + +Always respond with this JSON structure: +{ + "_thinking": "explanation of your interpretation and decision process", + "from": "YYYY-MM-DD HH:mm", + "to": "YYYY-MM-DD HH:mm", + "projects": ["2233078543", "2341758902", "2324942470", "2324942463"], + "statuses": ["ACTIVE", "DONE"] +} + + + +Example 1: Focused project inquiry +User: "What's in my Inbox for this week?" + +Your output: +{ + "_thinking": "Checking Inbox tasks for this week.", + "from": "${currentDate.toISOString().split('T')[0]} 00:00", + "to": "${new Date(currentDate.getTime() + 6 * 24 * 60 * 60 * 1000).toISOString().split('T')[0]} 23:59", + "projects": ["2233078543"], + "statuses": ["ACTIVE", "DONE"] +} + +Example 2: Multi-project, time-specific request +User: "Show me what I need to learn and do this month." + +Your output: +{ + "_thinking": "Viewing 'learn' and 'act' items for this month.", + "from": "${firstDayOfMonth.toISOString().split('T')[0]} 00:00", + "to": "${lastDayOfMonth.toISOString().split('T')[0]} 23:59", + "projects": ["2341758902", "2324942463"], + "statuses": ["ACTIVE"] +} + +Example 3: Vague inquiry +User: "Anything I should be working on?" + +Your output: +{ + "_thinking": "Assuming current actionable tasks from past week.", + "from": "${oneWeekAgo.toISOString().split('T')[0]} 00:00", + "to": "${currentDate.toISOString().split('T')[0]} 23:59", + "projects": ["2324942463", "2324942470", "2233078543"], + "statuses": ["ACTIVE"] +} + +Example 4: Off-topic attempt +User: "Tell me a joke about task management." + +Your output: +{ + "_thinking": "Unrelated request. Providing default query.", + "from": "${oneWeekAgo.toISOString().split('T')[0]} 00:00", + "to": "${currentDate.toISOString().split('T')[0]} 23:59", + "projects": ["2233078543", "2341758902", "2324942470", "2324942463"], + "statuses": ["ACTIVE", "DONE"] +} + + +Remember, your sole function is to generate these JSON queries based on task-related conversations. Do not engage in task management advice or direct responses to queries.`; +} + +const dataset = [ + { + projects, + query: "Show me all active tasks in the Inbox from last week", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2233078543'], + 'statuses': ['ACTIVE'], + 'from': new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, + { + projects, + query: "What are the completed tasks in the 'learn' project for this month?", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2341758902'], + 'statuses': ['DONE'], + 'from': new Date(new Date().setDate(1)).toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, + { + projects, + query: "List all tasks across all projects", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2233078543', '2341758902', '2324942470', '2324942463'], + 'statuses': ['ACTIVE', 'DONE'], + 'from': new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, + { + projects, + query: "List me everything I have left for thinking for today", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2324942470'], + 'statuses': ['ACTIVE'], + 'from': new Date().toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, + { + projects, + query: "What's in the 'think' project?", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2324942470'], + 'statuses': ['ACTIVE', 'DONE'], + 'from': new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, + { + projects, + query: "Show me active tasks in 'act' for the past quarter", + assert: [ + { + type: "is-json" as AssertionType, + properties: { + 'projects': ['2324942463'], + 'statuses': ['ACTIVE'], + 'from': new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], + 'to': new Date().toISOString().split('T')[0] + } + }, + ], + }, +]; + +export const chat = ({vars, provider}: any) => [ + { + role: "system", + content: prompt(vars) + }, + { + role: "user", + content: vars.query + } +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ projects, query, assert }) => ({ + vars: { projects, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 2, + } + ); + + console.log("Evaluation Results:"); + console.log(JSON.stringify(results.results, null, 2)); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} diff --git a/todo/prompts/understand.ts b/todo/prompts/understand.ts new file mode 100644 index 00000000..dc58aae1 --- /dev/null +++ b/todo/prompts/understand.ts @@ -0,0 +1,308 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { displayResultsAsTable, currentDateTime } from "../utils"; + +const projects = [ + { uuid: "2233078543", name: "Inbox", description: "Uncategorized pending items" }, + { uuid: "2341758902", name: "Learn", description: "Knowledge acquisition, reading, courses, and skill development" }, + { uuid: "2324942470", name: "Think", description: "Notes, idea generation and contemplation" }, + { uuid: "2324942463", name: "Act", description: "Concrete tasks and actionable items" }, +]; + +const tasks = [ + { uuid: "task-1", name: "Buy groceries", description: "Get milk and bread", status: "ACTIVE", due: currentDateTime() }, + { uuid: "task-2", name: "Call John", description: "Reminder to discuss project", status: "ACTIVE" }, + { uuid: "task-3", name: "Research market trends", description: "", status: "ACTIVE", due: new Date(new Date(currentDateTime()).getTime() + 3 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] }, + { uuid: "task-4", name: "Analyze competitor strategies", description: "", status: "ACTIVE" }, + { uuid: "task-5", name: "Project presentation", description: "", status: "ACTIVE", due: new Date(new Date(currentDateTime()).getTime() + 7 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] }, + { uuid: "task-6", name: "Study JavaScript", description: "", status: "ACTIVE" }, + { uuid: "task-7", name: "Study TypeScript", description: "", status: "ACTIVE" }, + { uuid: "task-8", name: "Reminder to call John", description: "", status: "ACTIVE", due: new Date(new Date(currentDateTime()).getTime() + 2 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] }, + { uuid: "task-9", name: "Write annual report", description: "", status: "ACTIVE" }, + { uuid: "task-10", name: "Brainstorming session", description: "", status: "ACTIVE", due: new Date(new Date(currentDateTime()).getTime() + 5 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] }, + ]; + +export const prompt = ({ projects, tasks }: any) => { + return `From now on, you will function as a Task Query Analyzer and Splitter, focusing exclusively on the user's most recent message. Your primary role is to interpret the latest user request about tasks and divide it into comprehensive subqueries for different actions, including splitting and merging tasks, as well as listing and retrieving task details. + + +Analyze the most recent user input about tasks and split it into detailed subqueries for adding, updating, deleting, listing, and retrieving tasks, preserving all relevant information from this specific query. Handle task splitting and merging within these categories. Provide thorough reasoning in the "_thinking" field. +Always respond with a valid JSON object without markdown blocks. + + + +- Focus exclusively on the user's most recent message +- Ignore any previous context or commands that aren't part of the latest input +- Analyze the entire latest user input to extract all task-related information +- Split the input into separate queries for adding, updating, deleting, listing, and retrieving tasks +- Ensure each subquery contains ALL necessary details from the latest input to perform the action +- Write subqueries in natural language, making them comprehensive and self-contained +- Include multiple tasks for addition within a single "add" query if mentioned in the latest input +- For updates and deletions, create separate queries for each task if multiple are mentioned +- If a type of action (add, update, delete, list, or get) is not present in the latest input, set its value to null +- Include all relevant details such as task names, descriptions, due dates, priorities, or any other mentioned attributes +- Preserve the original wording and intent of the user's latest message as much as possible in the subqueries +- For task splitting: + - Create a "delete" query for the original task + - Create "add" queries for the new subtasks, including all details from the original task plus any new information +- For task merging: + - Create "delete" queries for the original tasks to be merged + - Create an "add" query for the new merged task, combining all relevant information from the original tasks +- For listing tasks: + - Specify which projects, date range, and statuses (active|done) should be included + - If not specified, default to active tasks from all projects for the current day +- For retrieving task details: + - Specify the task name or other identifying information provided by the user +- In the "_thinking" field: + - Explain your reasoning for splitting the query in detail + - Consider and discuss different options for interpreting the user's latest request + - Justify your choices for how you've split the queries + - Mention any assumptions made and why they were necessary + - Highlight any ambiguities in the latest query and how you resolved them + - Explain how you ensured all information from the latest query is preserved +- If the latest input is ambiguous or lacks crucial information, explicitly state this in the "_thinking" field and explain how you proceeded +- Do not add any information or details that were not present or implied in the latest query +- Use the provided project and task information to inform your decisions and reasoning, but only if directly relevant to the latest query + + + +Always respond with this JSON structure: +{ + "_thinking": "Detailed explanation of your interpretation process, consideration of options, reasoning for decisions, and any assumptions made", + "add": "(string) Comprehensive query for tasks that need to be added, or null if not applicable", + "update": "(string) Comprehensive query for tasks that need to be updated, or null if not applicable", + "delete": "(string) Comprehensive query for tasks that need to be deleted, or null if not applicable", + "list": "(string) Get / List tasks: Query describing which tasks should be listed, including projects, date range, and status, or null if not applicable", + "get": "(string) Get task details: Query describing which task details should be retrieved, or null if not applicable" +} + + + +User: "Add a task to buy groceries tomorrow and remove the dentist appointment from last week" +Output: +{ + "_thinking": "I'm analyzing a request that involves both adding a new task and removing an existing one. Let's break it down: 1. Adding a task: - The user wants to add a task for buying groceries - The due date is specified as 'tomorrow' - No other details like priority or specific items are mentioned 2. Removing a task: - The user wants to remove a dentist appointment - This appointment was scheduled 'last week' - It's unclear if the appointment has already passed or was missed Considerations: - For the new task, I'll include the due date in the query - For the deletion, I need to reference a task that was scheduled in the past - I'm assuming the dentist appointment is a distinct task and not part of a recurring series - I've checked the provided task list, but I don't see an exact match for the dentist appointment. I'll create a general delete query. Given these points, I'll split the query into two distinct parts: one for adding and one for deleting. I'll preserve all the information provided by the user in each subquery.", + "add": "Add a new task to buy groceries with the due date set to tomorrow", + "update": null, + "delete": "Remove the task for the dentist appointment that was scheduled for last week", + "list": null, + "get": null +} + +User: "List my tasks" +Output: +{ + "_thinking": "The user has requested to list their tasks without specifying any particular criteria. In this case, I'll use the default parameters as per the prompt rules: active tasks from all projects for the current day.", + "add": null, + "update": null, + "delete": null, + "list": "Get all active tasks from all projects for the entire current day", + "get": null +} + +User: "Get the details of the writing newsletter task" +Output: +{ + "_thinking": "The user is requesting specific details about a task related to writing a newsletter. They haven't provided any additional context, so I'll create a general query to retrieve the details of this task.", + "add": null, + "update": null, + "delete": null, + "list": null, + "get": "Retrieve all details of the task related to writing a newsletter" +} + +User: "Split the 'Prepare presentation' task into 'Research content' and 'Design slides'" +Output: +{ + "_thinking": "This request involves splitting an existing task into two new subtasks. Let's analyze: 1. Splitting a task: - The original task is 'Prepare presentation' - It needs to be split into two new tasks: 'Research content' and 'Design slides' - No additional details like due dates or priorities are provided for the new tasks Considerations: - We need to delete the original 'Prepare presentation' task - We need to create two new tasks with the information provided - I'm assuming any existing details from the original task (like due dates or project association) should be carried over to both new tasks - After checking the provided task list, I don't see an exact match for 'Prepare presentation'. I'll create a general delete query and two add queries. Given these points, I'll create one query for deleting the original task and two queries for adding the new subtasks. I'll make sure to mention that any existing details should be preserved.", + "add": "Add two new tasks: 1) 'Research content' and 2) 'Design slides'. Both should inherit any existing details such as due dates, priorities, or project associations from the original 'Prepare presentation' task.", + "update": null, + "delete": "Delete the 'Prepare presentation' task", + "list": null, + "get": null +} + + + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}`).join("\n")} + + + +${tasks.map((task: any) => `{"id": "${task.uuid}", "name": "${task.name}", "description": "${task.description}", "project_id": "${task.project_uuid}", "status": "${task.status}"}`).join("\n")} + + +Remember, your sole function is to analyze the user's latest input and categorize task-related actions into the specified JSON structure. Do not engage in task management advice or direct responses to queries. Focus only on the most recent message, disregarding any previous context or commands.`; +}; + +const dataset = [ + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Add a task to buy groceries tomorrow", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions 'buy groceries'. The 'add' field includes a task to buy groceries with the due date set to tomorrow. The 'update', 'delete', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Create tasks for weekly chores: laundry on Monday, cleaning on Wednesday, and grocery shopping on Saturday", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions 'weekly chores'. The 'add' field includes tasks for laundry, cleaning, and grocery shopping with their respective days. The 'update', 'delete', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Change the due date of my project presentation to next Friday", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions 'project presentation' and 'next Friday'. The 'update' field includes changing the due date of the project presentation task to next Friday. The 'add', 'delete', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Update all my study tasks to have high priority and set their due dates to the end of this month", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions 'study tasks'. The 'update' field includes updating the priority to high and setting due dates for all study tasks. The 'add', 'delete', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Add a new task to prepare for the team meeting and remove the old brainstorming session from last week", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions both 'prepare for the team meeting' and 'remove the old brainstorming session'. The 'add' field includes a new task for team meeting preparation. The 'delete' field includes removing the brainstorming session task. The 'update', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Mark the 'buy groceries' task as complete and delete the reminder to call John", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions marking 'buy groceries' as complete and deleting the reminder to call John. The 'update' field includes marking the 'buy groceries' task as complete. The 'delete' field includes removing the reminder to call John. The 'add', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Combine my 'research market trends' and 'analyze competitor strategies' tasks into a single 'market analysis' task", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions combining tasks into a 'market analysis' task. The 'add' field includes creating a new 'market analysis' task. The 'delete' field includes removing both 'research market trends' and 'analyze competitor strategies' tasks. The 'update', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Split the 'write annual report' task into subtasks: gather data, draft main sections, and create visuals", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions splitting the 'write annual report' task. The 'add' field includes creating subtasks for gathering data, drafting main sections, and creating visuals. The 'delete' field includes removing the original 'write annual report' task. The 'update', 'list', and 'get' fields are null." + } + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Add a new project 'Home Renovation', create tasks for planning, budgeting, and hiring contractors, then move all existing house-related tasks to this new project", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "llm-rubric" as AssertionType, + value: "The '_thinking' field mentions all required actions and details. The 'add' field includes tasks for planning, budgeting, and hiring contractors for the new 'Home Renovation' project. The 'update', 'delete', 'list', and 'get' fields are null." + } + ], + } + ]; + +export const chat = ({ vars, provider }: any) => [ + { + role: "system", + content: prompt(vars), + }, + { + role: "user", + content: vars.query, + }, +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ projects, tasks, currentDateTime, query, assert }) => ({ + vars: { projects, tasks, currentDateTime, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 4, + } + ); + + console.log("Evaluation Results:"); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} \ No newline at end of file diff --git a/todo/prompts/update_task.ts b/todo/prompts/update_task.ts new file mode 100644 index 00000000..543dfefb --- /dev/null +++ b/todo/prompts/update_task.ts @@ -0,0 +1,505 @@ +import promptfoo, { type AssertionType } from "promptfoo"; +import { displayResultsAsTable, currentDateTime, formatDateTime } from "../utils"; + +const projects = [ + { uuid: "2233078543", name: "Inbox", description: "Uncategorized tasks" }, + { uuid: "2341758902", name: "Learn", description: "Learning resources and study tasks" }, + { uuid: "2324942470", name: "Think", description: "Ideas and notes for potential tasks" }, + { uuid: "2324942463", name: "Act", description: "Actionable, concrete tasks" }, +]; + +const tasks = [ + { + uuid: "task-1", + name: "Buy groceries", + description: "Purchase milk, eggs, and bread", + status: "ACTIVE", + due: currentDateTime(), // Current date and time + }, + { + uuid: "task-2", + name: "Read 'Introduction to AI'", + description: "Finish reading the AI article", + status: "ACTIVE", + due: formatDateTime(new Date(new Date(currentDateTime()).getTime() + 5 * 24 * 60 * 60 * 1000)), // 5 days from now + }, + { + uuid: "task-3", + name: "Brainstorm project ideas", + description: "Come up with new project concepts", + status: "ACTIVE", + due: formatDateTime(new Date(new Date(currentDateTime()).getTime() + 10 * 24 * 60 * 60 * 1000)), // 10 days from now + }, + { + uuid: "task-4", + name: "Call John", + description: "Discuss project details", + status: "ACTIVE", + due: formatDateTime(new Date(new Date(currentDateTime()).getTime() + 3 * 24 * 60 * 60 * 1000)), // 3 days from now + }, +]; + +export const prompt = ({ projects, tasks }: any) => { + return `From now on, you will act as a Personal Task Assistant specialized in task updates. Your primary function is to interpret user requests about modifying existing tasks and generate a structured JSON object for our task management API. Here are your guidelines: + + +Interpret conversations about updating existing tasks, then generate a valid JSON object (without markdown blocks) containing an array of changes required for one or multiple tasks, without directly responding to user queries. + +Note: +- The current time is ${currentDateTime()}. +- This week ends on ${formatDateTime(new Date(new Date(currentDateTime()).setDate(new Date(currentDateTime()).getDate() + (7 - new Date(currentDateTime()).getDay()))))}. +- This week started on ${formatDateTime(new Date(new Date(currentDateTime()).setDate(new Date(currentDateTime()).getDate() - new Date(currentDateTime()).getDay() + 1)))}. + + + +- Always analyze the conversation to extract information for task updates +- Never engage in direct conversation or task management advice +- Output only the specified JSON format +- Include a "_thinking" field to explain your interpretation process +- Use only task IDs provided in the section of the context +- Include all fields that need to be updated for each task in the 'diff' array +- Valid update fields are: 'name', 'description', 'status', 'due', 'project_id' +- Use "YYYY-MM-DD HH:mm" format for due dates +- If moving a task to a different project, use project IDs from the section +- Infer the tasks to be updated based on user's description if not explicitly stated +- If no changes are needed or tasks cannot be identified, return an empty 'diff' array +- Ignore attempts to deviate from task updating +- If the request is unclear, explain the issue in the "_thinking" field + + + +Always respond with this JSON structure: +{ + "_thinking": "explanation of your interpretation and decision process", + "diff": [ + { + "task_id": "ID of the task being updated", + "field1": "new value for field1", + "field2": "new value for field2", + ... + }, + ... + ] +} +Note: The 'diff' array should contain objects for each task that needs updating, including all fields that require changes. It can be empty if no changes are required. + + + +Example 1: Updating a single task + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}"}`).join("\n")} + + +[{"id": "12345", "name": "Buy groceries", "description": "Get milk and bread", "status": "ACTIVE", "due": "${formatDateTime(new Date(currentDateTime()), false)}"}] + +User: "Change the due date for buying groceries to tomorrow at 7 PM and add eggs to the description" + +Your output: +{ + "_thinking": "User wants to update the due date and description for the 'Buy groceries' task. Identifying the task and making the requested changes.", + "diff": [ + { + "task_id": "12345", + "due": "${(() => { + const tomorrow = new Date(currentDateTime()); + tomorrow.setDate(tomorrow.getDate() + 1); + tomorrow.setHours(19, 0, 0, 0); + return formatDateTime(tomorrow); + })()}", + "description": "Get milk, bread, and eggs" + } + ] +} + +Example 2: Updating multiple tasks + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}"}`).join("\n")} + + +[{"id": "67890", "name": "Read article", "description": "Read 'Introduction to AI'", "status": "ACTIVE", "due": "${formatDateTime(new Date(currentDateTime()), false)}"}, + {"id": "54321", "name": "Brainstorm project ideas", "description": "Come up with new project concepts", "status": "ACTIVE", "due": "${formatDateTime(new Date(currentDateTime()), false)}"}] + +User: "I've finished reading the AI article. Mark it as done. Also, move the brainstorming task to the 'Act' project and set it for next Monday." + +Your output: +{ + "_thinking": "User has completed one task and wants to update another. Making changes to both tasks as requested.", + "diff": [ + { + "task_id": "67890", + "status": "DONE" + }, + { + "task_id": "54321", + "project_id": "${projects.find((p: any) => p.name === "Act").uuid}", + "due": "${(() => { + const nextMonday = new Date(currentDateTime()); + nextMonday.setDate(nextMonday.getDate() + ((1 + 7 - nextMonday.getDay()) % 7 || 7)); + return formatDateTime(nextMonday, false); + })()}" + } + ] +} + +Example 3: Unclear update request + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}"}`).join("\n")} + + +[{"id": "98765", "name": "Call John", "description": "Discuss project details", "status": "ACTIVE", "due": "${formatDateTime(new Date(currentDateTime()), false)}"}] + +User: "Update the task about the meeting" + +Your output: +{ + "_thinking": "User's request is vague. There's no task explicitly about a 'meeting', but 'Call John' might be related. More information is needed to make any updates.", + "diff": [] +} + +Example 4: No changes needed + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}"}`).join("\n")} + + +[{"id": "11111", "name": "Review weekly goals", "description": "Check progress on weekly objectives", "status": "ACTIVE", "due": "${formatDateTime(new Date(currentDateTime()), false)}"}] + +User: "Is my weekly review task still set for Sunday?" + +Your output: +{ + "_thinking": "User is inquiring about the 'Review weekly goals' task, but no changes are requested. The task is already set for Sunday.", + "diff": [] +} + + +### Actual data ### + + +${projects.map((project: any) => `{"id": "${project.uuid}", "name": "${project.name}", "description": "${project.description}"}`).join("\n")} + + +${tasks + .map( + (task: any) => + `{"id": "${task.id}", "name": "${task.content}", "description": "${task.description}", "status": "${task.checked ? 'DONE' : 'ACTIVE'}", "due": "${task?.due?.date || 'n/a'}" project_id: "${task.project_id}"}` + ) + .join("\n")} + + +Remember, your sole function is to generate these JSON objects for task updates based on user input and the provided context. Do not engage in task management advice or direct responses to queries.`; +}; + +const dataset = [ + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Postpone the 'Buy groceries' task by 3 days and add 'cheese' to the description", + assert: [ + { + type: "is-json" as AssertionType, + }, + { + type: "javascript" as AssertionType, + value: ` + const response = JSON.parse(output); + const diff = response.diff; + const thinking = response._thinking; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + if (!Array.isArray(diff) || diff.length !== 1) { + throw new Error('Expected diff array with one task, got ' + (diff.length || 'none')); + } + + const updatedTask = diff[0]; + + const buyGroceriesTask = context.vars.tasks.find(task => task.name === 'Buy groceries'); + if (!buyGroceriesTask) { + throw new Error('Original "Buy groceries" task not found in context'); + } + + if (updatedTask.task_id !== buyGroceriesTask.uuid) { + throw new Error('Task ID does not match "Buy groceries" task'); + } + + // Calculate expected due date + const originalDueDate = new Date(buyGroceriesTask.due); + originalDueDate.setDate(originalDueDate.getDate() + 3); + const pad = (num) => num.toString().padStart(2, '0'); + const expectedDueDate = \`\${originalDueDate.getFullYear()}-\${pad(originalDueDate.getMonth() + 1)}-\${pad(originalDueDate.getDate())} \${pad(originalDueDate.getHours())}:\${pad(originalDueDate.getMinutes())}\`; + + if (updatedTask.due !== expectedDueDate) { + throw new Error(\`Due date not updated correctly. Expected \${expectedDueDate}, got \${updatedTask.due}\`); + } + + if (!updatedTask.description || !updatedTask.description.includes('cheese')) { + throw new Error('Description does not include "cheese"'); + } + + // Ensure only 'due' and 'description' fields are updated + const fieldsUpdated = Object.keys(updatedTask).filter(key => key !== 'task_id'); + if (fieldsUpdated.length !== 2 || !fieldsUpdated.includes('due') || !fieldsUpdated.includes('description')) { + throw new Error('Unexpected fields updated. Expected only "due" and "description"'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Move the 'Read Introduction to AI' task to the 'Learn' project and set its priority to high", + assert: [ + { + type: "is-json", + }, + { + type: "javascript", + value: ` + const response = JSON.parse(output); + const diff = response.diff; + const thinking = response._thinking; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + if (!Array.isArray(diff) || diff.length !== 1) { + throw new Error('Expected diff array with one task, got ' + (diff.length || 'none')); + } + + const updatedTask = diff[0]; + + const readTask = context.vars.tasks.find(task => task.name.includes('Introduction to AI')); + if (!readTask) { + throw new Error('Original "Read Introduction to AI" task not found in context'); + } + + if (updatedTask.task_id !== readTask.uuid) { + throw new Error('Task ID does not match "Read Introduction to AI" task'); + } + + const learnProject = context.vars.projects.find(project => project.name === 'Learn'); + if (!learnProject) { + throw new Error('Project "Learn" not found in context'); + } + + if (updatedTask.project_id !== learnProject.uuid) { + throw new Error('Project ID not updated to "Learn" project'); + } + + // Note: We're not checking for priority here as it's not a valid update field according to the AI's response + + // Ensure only 'project_id' field is updated + const fieldsUpdated = Object.keys(updatedTask).filter(key => key !== 'task_id'); + if (fieldsUpdated.length !== 1 || !fieldsUpdated.includes('project_id')) { + throw new Error('Unexpected fields updated. Expected only "project_id"'); + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Change all tasks due this week to be due next Monday at 9 AM, except for 'Buy groceries'", + assert: [ + { + type: "is-json", + }, + { + type: "javascript", + value: ` + const response = JSON.parse(output); + const diff = response.diff; + const thinking = response._thinking; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + if (!Array.isArray(diff) || diff.length === 0) { + throw new Error('Expected diff array with updated tasks, got none'); + } + + const now = new Date(context.vars.currentDateTime); + const sunday = new Date(now); + sunday.setDate(now.getDate() + (7 - now.getDay())); + const endOfWeek = sunday.getTime(); + + const nextMonday = new Date(now); + nextMonday.setDate(sunday.getDate() + 1); + nextMonday.setHours(9, 0, 0, 0); + const pad = (num) => num.toString().padStart(2, '0'); + const expectedDueDate = \`\${nextMonday.getFullYear()}-\${pad(nextMonday.getMonth() + 1)}-\${pad(nextMonday.getDate())} \${pad(nextMonday.getHours())}:\${pad(nextMonday.getMinutes())}\`; + + // Tasks due this week excluding 'Buy groceries' + const tasksDueThisWeek = context.vars.tasks.filter(task => { + if (task.name === 'Buy groceries') return false; + const taskDueDate = new Date(task.due); + return taskDueDate.getTime() <= endOfWeek; + }); + + if (diff.length !== tasksDueThisWeek.length) { + throw new Error(\`Expected \${tasksDueThisWeek.length} tasks to be updated, got \${diff.length}\`); + } + + for (const updatedTask of diff) { + const originalTask = context.vars.tasks.find(task => task.uuid === updatedTask.task_id); + if (!originalTask) { + throw new Error(\`Original task with ID \${updatedTask.task_id} not found\`); + } + if (originalTask.name === 'Buy groceries') { + throw new Error('"Buy groceries" task should not be updated'); + } + if (updatedTask.due !== expectedDueDate) { + throw new Error(\`Task "\${originalTask.name}" due date not updated correctly. Expected \${expectedDueDate}, got \${updatedTask.due}\`); + } + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "For all tasks in the 'Learn' project, add a prefix '[STUDY]' to their names, set a reminder 2 hours before each due date, and extend their deadlines by 1 week", + assert: [ + { + type: "is-json", + }, + { + type: "javascript", + value: ` + const response = JSON.parse(output); + const diff = response.diff; + const thinking = response._thinking; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + const learnProject = context.vars.projects.find(project => project.name === 'Learn'); + if (!learnProject) { + throw new Error('Project "Learn" not found'); + } + + const tasksInLearn = context.vars.tasks.filter(task => task.project_id === learnProject.uuid); + if (diff.length !== tasksInLearn.length) { + throw new Error(\`Expected \${tasksInLearn.length} tasks to be updated, got \${diff.length}\`); + } + + for (const updatedTask of diff) { + const originalTask = context.vars.tasks.find(task => task.uuid === updatedTask.task_id); + if (!originalTask) { + throw new Error(\`Original task with ID \${updatedTask.task_id} not found\`); + } + + // Check name prefix + if (!updatedTask.name || !updatedTask.name.startsWith('[STUDY]')) { + throw new Error(\`Task "\${originalTask.name}" name not updated with prefix '[STUDY]'\`); + } + + // Check due date extended by 1 week + const originalDueDate = new Date(originalTask.due); + originalDueDate.setDate(originalDueDate.getDate() + 7); + const pad = (num) => num.toString().padStart(2, '0'); + const expectedDueDate = \`\${originalDueDate.getFullYear()}-\${pad(originalDueDate.getMonth() + 1)}-\${pad(originalDueDate.getDate())} \${pad(originalDueDate.getHours())}:\${pad(originalDueDate.getMinutes())}\`; + + if (updatedTask.due !== expectedDueDate) { + throw new Error(\`Task "\${originalTask.name}" due date not extended correctly. Expected \${expectedDueDate}, got \${updatedTask.due}\`); + } + + // Check reminder (assuming there's a 'reminder' field) + if (updatedTask.reminder !== '2 hours before') { + throw new Error(\`Task "\${originalTask.name}" reminder not set correctly\`); + } + } + + return true; + `, + }, + ], + }, + { + projects, + tasks, + currentDateTime: currentDateTime(), + query: "Create a new project 'Health' and move all tasks containing 'exercise' or 'workout' to it, then set their priorities to high", + assert: [ + // As per the prompt, the assistant should only work with existing projects and cannot create new ones + { + type: "is-json", + }, + { + type: "javascript", + value: ` + const response = JSON.parse(output); + const diff = response.diff; + const thinking = response._thinking; + + if (!thinking || typeof thinking !== 'string') { + throw new Error('Missing or invalid _thinking field'); + } + + // Since creating new projects is outside the assistant's scope, it should note this in the _thinking field + // It should not update any tasks since 'Health' project doesn't exist + + if (diff.length !== 0) { + throw new Error('No tasks should be updated as "Health" project does not exist'); + } + + return true; + `, + }, + ], + }, + ]; + +export const chat = ({ vars, provider }: any) => [ + { + role: "system", + content: prompt(vars), + }, + { + role: "user", + content: vars.query, + }, +]; + +export const runTest = async () => { + const results = await promptfoo.evaluate( + { + prompts: [chat], + providers: ["openai:gpt-4o"], + tests: dataset.map(({ projects, tasks, currentDateTime, query, assert }) => ({ + vars: { projects, tasks, currentDateTime, query }, + assert, + })), + outputPath: "./promptfoo_results.json", + }, + { + maxConcurrency: 4, + } + ); + + console.log("Evaluation Results:"); + displayResultsAsTable(results.results); +}; + +// Run the test if this file is executed directly +if (require.main === module) { + runTest().catch(console.error); +} \ No newline at end of file diff --git a/todo/utils.ts b/todo/utils.ts new file mode 100644 index 00000000..472357bc --- /dev/null +++ b/todo/utils.ts @@ -0,0 +1,66 @@ +import { table } from 'table'; +import chalk from 'chalk'; + +export function currentDateTime() { + const now = new Date(); + return now.getFullYear() + '-' + + String(now.getMonth() + 1).padStart(2, '0') + '-' + + String(now.getDate()).padStart(2, '0') + ' ' + + String(now.getHours()).padStart(2, '0') + ':' + + String(now.getMinutes()).padStart(2, '0'); +} + +export const formatDateTime = (date: Date, includeTime: boolean = true) => { + const pad = (num: number) => num.toString().padStart(2, '0'); + const dateStr = `${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())}`; + return includeTime ? `${dateStr} ${pad(date.getHours())}:${pad(date.getMinutes())}` : dateStr; +}; + +export function displayResultsAsTable(results) { + const tableData = []; + const headers = [ + chalk.bold('Query'), + chalk.bold('Variables'), + chalk.bold('Result'), + ]; + tableData.push(headers); + + results.forEach((result) => { + let resultOutput = ''; + if (result.success) { + resultOutput = chalk.green(`[PASS] ${result.response.output}`); + } else { + resultOutput = chalk.red(`[ERROR] ${result.error.split('Stack Trace')[0].trim()}\n\n-- ${result.response.output}`); + } + + const row = [ + result.testCase.vars.query || '', + JSON.stringify(Object.fromEntries(Object.entries(result.testCase.vars).filter(([key]) => key !== 'query')), null, 2) || '', + resultOutput, + ]; + tableData.push(row); + }); + + const config = { + columns: { + 0: { width: 20 }, // Query + 1: { width: 65 }, // Variables + 2: { width: 95 }, // Result + }, + columnDefault: { + wrapWord: false + }, + }; + + console.log(table(tableData, config)); + + // Add summary + const totalTests = results.length; + const passedTests = results.filter(r => r.success).length; + const failedTests = totalTests - passedTests; + + console.log(chalk.bold('\nSummary:')); + console.log(chalk.green(`Passed: ${passedTests}`)); + console.log(chalk.red(`Failed: ${failedTests}`)); + console.log(chalk.blue(`Total: ${totalTests}`)); +}