-
Notifications
You must be signed in to change notification settings - Fork 55
/
index.js
154 lines (141 loc) · 4.87 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import { exec } from "child_process";
import cors from "cors";
import dotenv from "dotenv";
import voice from "elevenlabs-node";
import express from "express";
import { promises as fs } from "fs";
import OpenAI from "openai";
dotenv.config();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY || "-", // Your OpenAI API key here, I used "-" to avoid errors when the key is not set but you should not do that
});
const elevenLabsApiKey = process.env.ELEVEN_LABS_API_KEY;
const voiceID = "kgG7dCoKCfLehAPWkJOE";
const app = express();
app.use(express.json());
app.use(cors());
const port = 3000;
app.get("/", (req, res) => {
res.send("Hello World!");
});
app.get("/voices", async (req, res) => {
res.send(await voice.getVoices(elevenLabsApiKey));
});
const execCommand = (command) => {
return new Promise((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
if (error) reject(error);
resolve(stdout);
});
});
};
const lipSyncMessage = async (message) => {
const time = new Date().getTime();
console.log(`Starting conversion for message ${message}`);
await execCommand(
`ffmpeg -y -i audios/message_${message}.mp3 audios/message_${message}.wav`
// -y to overwrite the file
);
console.log(`Conversion done in ${new Date().getTime() - time}ms`);
await execCommand(
`./bin/rhubarb -f json -o audios/message_${message}.json audios/message_${message}.wav -r phonetic`
);
// -r phonetic is faster but less accurate
console.log(`Lip sync done in ${new Date().getTime() - time}ms`);
};
app.post("/chat", async (req, res) => {
const userMessage = req.body.message;
if (!userMessage) {
res.send({
messages: [
{
text: "Hey dear... How was your day?",
audio: await audioFileToBase64("audios/intro_0.wav"),
lipsync: await readJsonTranscript("audios/intro_0.json"),
facialExpression: "smile",
animation: "Talking_1",
},
{
text: "I missed you so much... Please don't go for so long!",
audio: await audioFileToBase64("audios/intro_1.wav"),
lipsync: await readJsonTranscript("audios/intro_1.json"),
facialExpression: "sad",
animation: "Crying",
},
],
});
return;
}
if (!elevenLabsApiKey || openai.apiKey === "-") {
res.send({
messages: [
{
text: "Please my dear, don't forget to add your API keys!",
audio: await audioFileToBase64("audios/api_0.wav"),
lipsync: await readJsonTranscript("audios/api_0.json"),
facialExpression: "angry",
animation: "Angry",
},
{
text: "You don't want to ruin Wawa Sensei with a crazy ChatGPT and ElevenLabs bill, right?",
audio: await audioFileToBase64("audios/api_1.wav"),
lipsync: await readJsonTranscript("audios/api_1.json"),
facialExpression: "smile",
animation: "Laughing",
},
],
});
return;
}
const completion = await openai.chat.completions.create({
model: "gpt-3.5-turbo-1106",
max_tokens: 1000,
temperature: 0.6,
response_format: {
type: "json_object",
},
messages: [
{
role: "system",
content: `
You are a virtual girlfriend.
You will always reply with a JSON array of messages. With a maximum of 3 messages.
Each message has a text, facialExpression, and animation property.
The different facial expressions are: smile, sad, angry, surprised, funnyFace, and default.
The different animations are: Talking_0, Talking_1, Talking_2, Crying, Laughing, Rumba, Idle, Terrified, and Angry.
`,
},
{
role: "user",
content: userMessage || "Hello",
},
],
});
let messages = JSON.parse(completion.choices[0].message.content);
if (messages.messages) {
messages = messages.messages; // ChatGPT is not 100% reliable, sometimes it directly returns an array and sometimes a JSON object with a messages property
}
for (let i = 0; i < messages.length; i++) {
const message = messages[i];
// generate audio file
const fileName = `audios/message_${i}.mp3`; // The name of your audio file
const textInput = message.text; // The text you wish to convert to speech
await voice.textToSpeech(elevenLabsApiKey, voiceID, fileName, textInput);
// generate lipsync
await lipSyncMessage(i);
message.audio = await audioFileToBase64(fileName);
message.lipsync = await readJsonTranscript(`audios/message_${i}.json`);
}
res.send({ messages });
});
const readJsonTranscript = async (file) => {
const data = await fs.readFile(file, "utf8");
return JSON.parse(data);
};
const audioFileToBase64 = async (file) => {
const data = await fs.readFile(file);
return data.toString("base64");
};
app.listen(port, () => {
console.log(`Virtual Girlfriend listening on port ${port}`);
});