Skip to content

Commit

Permalink
python3.11
Browse files Browse the repository at this point in the history
  • Loading branch information
rodfer0x80 committed May 22, 2024
1 parent 1bd003a commit 3b6f8e1
Show file tree
Hide file tree
Showing 31 changed files with 428 additions and 443 deletions.
19 changes: 10 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
FROM ollama/ollama:latest

RUN apt-get update && apt-get install -y \
build-essential \
libssl-dev \
libffi-dev \
python3-dev \
python3-pip \
python3-venv \
&& apt-get clean
# TODO: upgrade to python3.11
# RUN apt-get update && apt-get install -y \
# build-essential \
# libssl-dev \
# libffi-dev \
# python3-dev \
# python3-pip \
# python3-venv \
# && apt-get clean
# globally install poetry and upgrade pip things
# (NOTE: the poetry project often releases new versions over weekends, so
# if your have auto-building services and poetry releases a new incompatible
Expand Down Expand Up @@ -36,4 +37,4 @@ COPY llmpeg llmpeg
RUN poetry install --without=dev --no-cache

# now run your command (as defined in `pyproject.toml` poetry scripts section)
CMD poetry run main --conversation_model "gemma:2b" --nlp_model "punkt" --tts_model_size "small" --stt_model_size "tiny" - run
CMD poetry run main "gemma:2b" "punkt" "small" "tiny" run
16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SCRIPTS_DIR := ./scripts
SCRIPTS := $(wildcard $(SCRIPTS_DIR)/*.sh)
SCRIPT_NAMES := $(notdir $(basename $(SCRIPTS)))

.PHONY: default
default:
@echo "[make] $(SCRIPT_NAMES)"

.PHONY: $(SCRIPT_NAMES)
$(SCRIPT_NAMES): %: $(SCRIPTS_DIR)/%.sh
@echo "[make] $@"
@bash $<

.PHONY: all
all: $(SCRIPT_NAMES)

8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
[x] fix play audio output
[ ] headless browser
[ ] pyproject proper struct
[ ] get rid of mozzilla tts and upgrade to python3.11 or atleast 3.10
[ ] get rid of bloated tts mozzila package that has 1B dependencies from the summer of '69
[x] upgrade to python3.11
[ ] dockerfile update to python3.11
[ ] containerd cluster run ollama server and llmpeg client
[ ] pass logger to lower classes to log all output to their cache dir
...
[ ] refactor into senses high abstraction layer into very basic agent for easy config
[ ] dynamic config
[ ] basic cli with flags
...
[ ] basic gui with tk
...
[ ] models in tinygrad
Expand Down
17 changes: 7 additions & 10 deletions llmpeg/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,29 @@

from llmpeg.agent import Agent

@dataclass

@dataclass()
class Main:
conversation_model: str
nlp_model: str
tts_model_size: str
stt_model_size: str

def __post_init__(self):
self.agent = Agent(
conversation_model=self.conversation_model,
nlp_model=self.nlp_model,
tts_model_size=self.tts_model_size,
stt_model_size=self.stt_model_size,
)
self.agent = Agent(self.conversation_model, self.nlp_model, self.tts_model_size, self.stt_model_size)

def run(self):
# NOTE: [EDITABLE]

self.url = 'https://github.com/SeleniumHQ/seleniumhq.github.io/blob/trunk/examples/python/tests/waits/test_waits.py'
self.agent.dictate_url(self.url)

# ----------------


def main():
try:
CLI(Main())
CLI(Main)
return 0
except KeyboardInterrupt:
return 0
Expand Down
1 change: 1 addition & 0 deletions llmpeg/actions/actions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TODO: this is all the internal logic for agent
from dataclasses import dataclass


@dataclass
class Actions:
def __init__(self) -> None:
Expand Down
12 changes: 5 additions & 7 deletions llmpeg/actions/reactions/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@
# TODO: this should be a in front of browser and call it todo stuff instead of bypassing this and using capabilities directly
@dataclass
class Conversation:
model: str # NOTE: e.g. "gemma:2b"
model: str # NOTE: e.g. "gemma:2b"
explain_prompt: str = 'Explain the following data which was extracted from a webpage in your own words'
summarize_prompt: str = 'Summarize the following data which was extracted from a webpage'

def __init__(self):
self.messages = []
chat_messages = []

def summarize(self, prompt: str) -> str:
return ollama.generate(model=self.model, prompt=f'{self.summarize_prompt}\n{prompt}')['response']
Expand All @@ -25,11 +23,11 @@ def respond(self, prompt: str) -> str:
return ollama.generate(model=self.model, prompt=prompt)['response']

def clear_chat(self) -> None:
self.messages = []
self.chat_messages = []

def _add_message(self, prompt) -> None:
return self.messages.append({'role': 'user', 'content': prompt})
return self.chat_messages.append({'role': 'user', 'content': prompt})

def chat(self, prompt: str) -> Union[str, list[str]]:
self._add_message(prompt)
return ollama.chat(model=self.model, messages=self.messages)['message']['content']
return ollama.chat(model=self.model, messages=self.chat_messages)['message']['content']
1 change: 1 addition & 0 deletions llmpeg/actions/reactions/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from llmpeg.utils import curr_date


@dataclass
class STT:
model_size: str
Expand Down
43 changes: 31 additions & 12 deletions llmpeg/actions/reactions/tts.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from pathlib import Path
from dataclasses import dataclass
import site

from TTS.api import TTS as MozillaTTS
import torch
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

from llmpeg.utils import curr_date

Expand All @@ -10,21 +13,37 @@ class TTS:
model_size: str
cache_dir: Path
large_model = 'tts_models/en/jenny/jenny'
small_modell = 'tts_models/en/ljspeech/glow-tts'
small_model = 'tts_models/en/ljspeech/glow-tts'

def __init__(self, model_size: str, cache_dir: Path) -> None:
self.cache_dir = cache_dir / 'tts'
Path.makedirs(self.cache_dir, exist_ok=True)
def __post_init__(self) -> None:
self.cache_dir = self.cache_dir / 'tts'
Path.mkdir(self.cache_dir, exist_ok=True)

self.model_name = self.large_model if model_size == 'large' else self.small_modell
self.speed = 1.3 if model_size == 'large' else 2.5
self.tts = MozillaTTS(model_name=self.model_name)
self.model_name = self.large_model if self.model_size == 'large' else self.small_model
print(self.model_name)
self.speed = 1.3 if self.model_size == 'large' else 2.5

def synthesize_to_file(self, text: str, path: Path = None) -> Path:
if not path:
path = self.cache_dir / f'{curr_date()}.wav'
self.tts.tts_to_file(text=text, speed=self.speed, file_path=path)
model_config_path = site.getsitepackages()[0]+"/TTS/.models.json"
model_manager = ModelManager(model_config_path)
model_path, config_path, model_item = model_manager.download_model(self.model_name)
voc_path, voc_config_path, _ = model_manager.download_model(model_item["default_vocoder"])
self.synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
vocoder_checkpoint=voc_path,
vocoder_config=voc_config_path
)

def synthesize_to_file(self, text: str) -> Path:
path = self.cache_dir / f'{curr_date()}.wav'
outputs = self.synthesizer.tts(text)
self.synthesizer.save_wav(outputs, path)
return path

# def synthesize_to_stream(self, text: str) -> str:
# return self.tts.tts(text=text, speed=self.speed)





2 changes: 2 additions & 0 deletions llmpeg/actions/reactions/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

from llmpeg.capabilities.networking.browser import Browser


@dataclass
class Vision:
browser: Browser

def __post_init__(self):
self.ocr_reader = easyocr.Reader(['ch_tra', 'en'])

Expand Down
26 changes: 13 additions & 13 deletions llmpeg/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
from dataclasses import dataclass
from pathlib import Path

from llmpeg.logger import LoggerFactory
from llmpeg.logger import LoggerToStdout
from llmpeg.config import Config

from llmpeg.capabilities.audio.audio import Audio
from llmpeg.capabilities.networking.browser import Browser

from llmpeg.actions.reactions import (
Conversation,
TTS,
Expand All @@ -17,34 +15,36 @@
from llmpeg.actions.triggers.triggers import Triggers # TODO: remove this import
from llmpeg.actions.actions import Actions

from llmpeg.utils import filenamed_cache_dir


@dataclass
class Agent:
conversation_model: str
nlp_model: str
tts_model_size: str
stt_model_size: str

def __post_init__(self):
self.cache_dir = Path(f'~/.cache/{str(Path(__file__).cwd().name).split("/")[-1]}').expanduser()
self.cache_dir = filenamed_cache_dir()
# TODO: configurable class for customising the agent
Path.mkdir(self.cache_dir, exist_ok=True)
self.logger = LoggerFactory(log_output='stdout')
self.logger = LoggerToStdout()

# TODO: make this work and dynamically
Config()()
Config()

# TODO: make all internal logic for agent in senses.py and turn this into a clean wrapper
self.actions = Actions()

self.audio = Audio(cache_dir=self.cache_dir, audio_output_src='--aout=alsa')
self.browser = Browser(cache_dir=self.cache_dir)

self.conversation = Conversation(model=self.conversation_model)
self.nlp = Triggers(model_name=self.nlp_model)
self.stt = STT(model_size=self.stt_model_size, cache_dir=self.cache_dir)
self.tts = TTS(model_size=self.tts_model_size, cache_dir=self.cache_dir)
self.vision = Vision(browser=self.browser)
self.conversation = Conversation(self.conversation_model)
self.nlp = Triggers(self.nlp_model)
self.stt = STT(self.stt_model_size, self.cache_dir)
self.tts = TTS(self.tts_model_size, self.cache_dir)
self.vision = Vision(self.browser)

# NOTE: <-------- Vision -------->
def ocr_url(self, url: str):
Expand Down Expand Up @@ -79,7 +79,7 @@ def stream_soundtrack(self, query: str) -> None:
# NOTE: <-------- Audio -------->
# def text_to_speech(self, text: str) -> None: self.audio.play_stream(self.tts.synthesize_to_stream(text=text))
def text_to_speech(self, text: str) -> None:
self.audio.play_from_file(self.tts.synthesize_to_file(text=text))
self.audio.play_audio_file(self.tts.synthesize_to_file(text=text))

def speech_to_text(self) -> str:
self.logger.debug('Recording...')
Expand Down
1 change: 1 addition & 0 deletions llmpeg/capabilities/audio/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from llmpeg.capabilities.audio.audio_output import AudioOutput
from llmpeg.utils import curr_date


@dataclass
class Audio:
cache_dir: Path
Expand Down
1 change: 1 addition & 0 deletions llmpeg/capabilities/audio/audio_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import soundfile as sf


@dataclass
class AudioInput:
cache_dir: Path
Expand Down
3 changes: 2 additions & 1 deletion llmpeg/capabilities/audio/audio_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

from llmpeg.utils import error


@dataclass
class AudioOutput:
audio_output_src: str # e.g. "--aout=alsa"
cache_dir: Path

def __post_init__(self) -> None:
self.instance = vlc.Instance(self.audio_output_src)
self.instance = vlc.Instance(self.audio_output_src, '--verbose=1')
self.player = vlc.MediaPlayer(self.instance)
self.playing = False

Expand Down
13 changes: 10 additions & 3 deletions llmpeg/capabilities/networking/browser/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from llmpeg.capabilities.networking.browser.webdriver import DefaultChromeDriver
from llmpeg.capabilities.networking import Networking


@dataclass
class Browser:
cache_dir: Path
Expand All @@ -25,10 +26,16 @@ def screenshot(self, url: str) -> bytes:
self.driver.close()
return data

def save_screenshot(self, url: str, path='') -> str:
ss_path = self.driver.save_screenshot(url, path)
def save_screenshot(self, url: str) -> str:
ss_path = self.driver.save_screenshot(url)
self.driver.close()
return ss_path

def search_audio_stream(self, query: str) -> tuple[Union[str, None], Union[str, None]]:
self.driver.search_audio_stream(query)
self.networking.search_audio_stream(query)

def scrape_url(self, url: str) -> tuple[Union[str, None], Union[str, None]]:
text_content, err = self.networking.scrape(url)
if err:
raise Exception(err)
return text_content
Loading

0 comments on commit 3b6f8e1

Please sign in to comment.