-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
142 lines (118 loc) · 5.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import argparse
import logging
import sys
import yaml
from modules.transcripter import Transcripter
from modules.ragmodelapp import RagModelApp
def setup_logging(level):
"""
Set up the logging configuration.
"""
logging.basicConfig(
level=level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
def load_prompt_file(filename):
# Open and read the YAML file
with open(filename, 'r') as file:
data = yaml.safe_load(file)
return data
def saveTranscriptFile(filename, script):
with open(filename, 'w') as file:
file.write(script)
print(f"Text saved to {filename}")
def main(args):
"""
Main function to execute the script logic.
"""
setup_logging(logging.ERROR)
logger = logging.getLogger(__name__)
logger.info("Starting the script...")
if args.file:
t = Transcripter(os.path.join('',args.file))
script = t.transcribe()
else:
logger.error("You must provide an input file using the 'file' command line parameter.")
sys.exit(0)
if args.outputfolder:
output_folder = args.outputfolder
else:
output_folder = "outputs"
if args.config:
config_file = args.config
else:
config_file = "config.yaml"
if not os.path.exists(output_folder):
try:
os.makedirs(output_folder)
except NotADirectoryError:
logger.error(f"The content of the outputfolder parameter ( {output_folder} ) was not valid. A directory cannot be constructed there.")
sys.exit(0)
saveTranscriptFile(os.path.join(output_folder,"transcript.txt"), script)
print("Preparing the LLM...")
rag = RagModelApp(script)
rag.prepare_chain()
config = load_prompt_file(config_file)
for prompt in config['prompts']:
if prompt['type'] == 'SimpleText':
saveTranscriptFile(os.path.join(output_folder,prompt['outputfilename']), f"{prompt['instructions']}:\n\n{rag.invoke(prompt['instructions'])}")
if prompt['type'] == 'VideoClipArray':
instructions = prompt['instructions'] + " Respond with a pipe-delimited list of the direct transcript quotations ONLY. Do NOT format or add notes or alter the original text in any way."
response = rag.invoke(instructions)
saveTranscriptFile(os.path.join(output_folder,prompt['outputfilename']), f"{instructions}:\n\n{response}")
y = response.split("|")
for index, quote in enumerate(y):
print(f"Index: {index}, Quote: {quote}")
if quote:
q = quote.replace(",","").replace(".","").replace("!","")
stamp = t.findStringTimestamps(quote)
print(stamp)
if stamp[0] is not None and stamp[1] is not None:
filename = f"{prompt['videoclipnamepattern']}{index}{prompt['videoextension']}"
t.cutClip(stamp[0], stamp[1], os.path.join(output_folder, filename))
else:
print("Trying the two halves of the quote because I can't find the full quote.")
mid = len(q) // 2
space_index = q.find(' ', mid)
q1 = q[:space_index]
print(f"Q1: {q1}")
q2 = q[space_index:]
print(f"Q2: {q2}")
st1 = t.findStringTimestamps(q1)
st2 = t.findStringTimestamps(q2)
if st1[0] is not None and st1[1] is not None:
filename = f"{prompt['videoclipnamepattern']}{index}{prompt['videoextension']}"
t.cutClip(st1[0], st1[1], os.path.join(output_folder, filename))
else:
if st2[0] is not None and st2[1] is not None:
filename = f"{prompt['videoclipnamepattern']}{index}{prompt['videoextension']}"
t.cutClip(st2[0], st2[1], os.path.join(output_folder, filename))
else:
print("Couldn't even find half the quote. Sorry boss.")
logger.info("Script finished successfully.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="The PhilBott!")
# Add arguments
parser.add_argument(
'--file',
type=str,
help="Input video file (mp4) if you're using a local file."
)
parser.add_argument(
'--config',
type=str,
help="The configuration file of prompts you want to receive."
)
parser.add_argument(
'--outputfolder',
type=str,
help="Path and folder name where the outputs will be sent. The folder will be created if it doesn't exist."
)
# Parse arguments
args = parser.parse_args()
# Execute main function
main(args)