Skip to content

Commit

Permalink
Shuffle AI rebuild attempt
Browse files Browse the repository at this point in the history
  • Loading branch information
frikky committed Jan 29, 2025
1 parent b3c7fb2 commit 0215a54
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 23 deletions.
55 changes: 37 additions & 18 deletions shuffle-ai/1.0.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
# Base our app image off of the WALKOFF App SDK image
FROM frikky/shuffle:app_sdk as base

# We're going to stage away all of the bloat from the build tools so lets create a builder stage
FROM base as builder
FROM python:3.10-slim

# Install all alpine build tools needed for our pip installs
RUN apk --no-cache add --update alpine-sdk libffi libffi-dev musl-dev openssl-dev git poppler-utils
RUN apt update
RUN apt install -y clang g++ make automake autoconf libtool cmake

# Install all of our pip packages in a single directory that we can copy to our base image later
RUN mkdir /install
WORKDIR /install

# Switch back to our base image and copy in all of our built packages and source code
FROM base
COPY --from=builder /install /usr/local
#COPY --from=builder /install /usr/local
COPY src /app
COPY requirements.txt /requirements.txt
RUN python3 -m pip install -r /requirements.txt

# Install any binary dependencies needed in our final image
# RUN apk --no-cache add --update my_binary_dependency
RUN apk --no-cache add jq git curl
#RUN apk --no-cache add jq git curl
RUN apt install -y jq git curl

ENV SHELL=/bin/bash

Expand All @@ -32,23 +29,45 @@ ENV TESSDATA_PREFIX=/usr/local/share/tessdata

# Dev tools
WORKDIR /tmp
RUN apk update
RUN apk upgrade
RUN apk add file openssl openssl-dev bash tini leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev libgcc mupdf-dev jbig2dec-dev
RUN apk add freetype-dev openblas-dev ffmpeg-dev linux-headers aspell-dev aspell-en # enchant-dev jasper-dev
RUN apk add --virtual .dev-deps git clang clang-dev g++ make automake autoconf libtool pkgconfig cmake ninja
RUN apk add --virtual .dev-testing-deps -X http://dl-3.alpinelinux.org/alpine/edge/testing autoconf-archive
#RUN apk update
#RUN apk upgrade

## Install the same packages with apt as with apk, but ensure they exist in apt
RUN apt install -y file openssl bash tini libpng-dev aspell-en
RUN apt install -y git clang g++ make automake autoconf libtool cmake
RUN apt install -y autoconf-archive wget

RUN ln -s /usr/include/locale.h /usr/include/xlocale.h

RUN apk add tesseract-ocr
RUN apk add poppler-utils
#RUN apk add tesseract-ocr
RUN apt install -y tesseract-ocr
#RUN apk add poppler-utils
RUN apt install -y poppler-utils
RUN apt clean && rm -rf /var/lib/apt/lists/*

# Install from main
RUN mkdir /usr/local/share/tessdata
RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata

RUN mkdir src
RUN cd src
RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P /usr/local/share/tessdata

RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git

#RUN curl -fsSL https://ollama.com/install.sh | sh
# Install to /usr/local
RUN wget https://ollama.com/install.sh -O /usr/local/bin/ollama-install
RUN chmod +x /usr/local/bin/ollama-install
RUN ls /usr/local/bin
RUN sh /usr/local/bin/ollama-install

RUN ls -alh /usr/bin
RUN which ollama

#RUN /usr/local/bin/ollama pull llama3.2
RUN ollama serve & sleep 2 && ollama pull llama3

#RUN rm /usr/local/bin/ollama
#RUN cd tesseract && ./autogen.sh && ./configure --build=x86_64-alpine-linux-musl --host=x86_64-alpine-linux-musl && make && make install && cd /tmp/src

# Finally, lets run our app!
Expand Down
13 changes: 12 additions & 1 deletion shuffle-ai/1.0.0/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@ contact_info:
url: https://shuffler.io
email: [email protected]
actions:
- name: autoformat_text
- name: run_llm
description: "Runs a local LLM based on ollama with any of their models from https://github.com/ollama/ollama?tab=readme-ov-file#model-library"
parameters:
- name: question
description: "The input question to the model"
required: true
multiline: true
example: ""
schema:
type: string

- name: shuffle_cloud_inference
description: Input ANY kind of data in the format you want, and the format you want it in. Default is a business-y email. Uses ShuffleGPT, which is based on OpenAI and our own model.
parameters:
- name: apikey
Expand Down
2 changes: 2 additions & 0 deletions shuffle-ai/1.0.0/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
shuffle_sdk
pytesseract
pdf2image
pypdf2
requests
ollama
72 changes: 68 additions & 4 deletions shuffle-ai/1.0.0/src/app.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
import json
import PyPDF2
import tempfile
import requests
import pytesseract
from pdf2image import convert_from_path

from walkoff_app_sdk.app_base import AppBase
try:
import pytesseract
except Exception as e:
print("Skipping pytesseract import: %s" % e)

try:
import PyPDF2
except Exception as e:
print("Skipping PyPDF2 import: %s" % e)

try:
from pdf2image import convert_from_path
except Exception as e:
print("Skipping pdf2image import: %s" % e)

try:
import ollama
except Exception as e:
print("Skipping ollama import: %s" % e)

from shuffle_sdk import AppBase

class Tools(AppBase):
__version__ = "1.0.0"
Expand All @@ -14,6 +31,53 @@ class Tools(AppBase):
def __init__(self, redis, logger, console_logger=None):
super().__init__(redis, logger, console_logger)

def run_llm(self, question, model="llama3.2"):
models = []
response = ollama.chat(model=model, messages=[
{
"role": "user", "content": question,
}
])

return response["message"]["content"]

def security_assistant(self):
# Currently testing outside the Shuffle environment
# using assistants and local LLMs

return "Not implemented"

def shuffle_cloud_inference(self, apikey, text, formatting="auto"):
headers = {
"Authorization": "Bearer %s" % apikey,
}

if not formatting:
formatting = "auto"

output_formatting= "Format the following data to be a good email that can be sent to customers. Don't make it too business sounding."
if formatting != "auto":
output_formatting = formatting

ret = requests.post(
"https://shuffler.io/api/v1/conversation",
json={
"query": text,
"formatting": output_formatting,
"output_format": "formatting"
},
headers=headers,
)

if ret.status_code != 200:
print(ret.text)
return {
"success": False,
"reason": "Status code for auto-formatter is not 200"
}

return ret.text

def autoformat_text(self, apikey, text, formatting="auto"):
headers = {
"Authorization": "Bearer %s" % apikey,
Expand Down

0 comments on commit 0215a54

Please sign in to comment.