Skip to content

Commit

Permalink
Added Dockerfile, tests & updated requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaykold committed Aug 18, 2024
1 parent 0af911d commit a1abca0
Show file tree
Hide file tree
Showing 27 changed files with 2,898 additions and 76 deletions.
22 changes: 22 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Use the official Python image from the Docker Hub
FROM python:3.12-slim

RUN pip install -U pip

WORKDIR /app

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY setup.py .
# installs the project in executable mode
RUN pip install -e .

# Copy the rest of the application code
COPY . .

# Expose the port the application will run on
EXPOSE 9696

# Define the entry point for the container
ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "app:app"]
34 changes: 34 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,37 @@ clean:
# -rmdir /S /Q __pycache__

.PHONY: all preprocess clean ensure_artifacts_dir



# Makefile for ML Project

# Variables
TRAIN_DATA_PATH = path/to/train_data.csv
VALIDATE_DATA_PATH = path/to/validate_data.csv
TRAIN_SCRIPT = src/components/model_trainer.py
DATA_INGEST_SCRIPT = src/components/data_ingestion.py
DATA_PREPROCESS_SCRIPT = src/components/data_preprocessing.py

# Targets

.PHONY: all data_ingest preprocess train clean

all: data_ingest preprocess train

data_ingest:
@echo "Running Data Ingestion..."
python $(DATA_INGEST_SCRIPT) --train_data $(TRAIN_DATA_PATH) --validate_data $(VALIDATE_DATA_PATH)

preprocess: data_ingest
@echo "Running Data Preprocessing..."
python $(DATA_PREPROCESS_SCRIPT) --train_data $(TRAIN_DATA_PATH) --validate_data $(VALIDATE_DATA_PATH)

train: preprocess
@echo "Running Model Training..."
python $(TRAIN_SCRIPT) --train_data $(TRAIN_DATA_PATH) --validate_data $(VALIDATE_DATA_PATH)

clean:
@echo "Cleaning up..."
rm -rf artifacts/ logs/

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ You can install the required packages using the provided`conda_dependencies.yml`

After creating your virtual environment, you can activate it using:

```conda activate myenv // That's is the name specified in the .yml file```
<codespace/>```conda activate myenv // That's is the name specified in the .yml file```</codespace>

Or using `requirements.txt`:

Expand Down
8 changes: 3 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from src.pipeline.predict import CustomData, PredictPipeline

app = Flask(__name__)
Expand All @@ -25,7 +23,7 @@ def predict_data():
Aspectratio=request.form.get('Aspectratio'),
Eccentricity=request.form.get('Eccentricity'),
Convexarea=request.form.get('Convexarea'),
Equivdiameter=request.form.get('Equivdiameter'),
Equivdiameter=request.form.get('Equivdiameter'),
Extent=request.form.get('Extent'),
Solidity=request.form.get('Solidity'),
Roundness=request.form.get('Roundness'),
Expand All @@ -40,8 +38,8 @@ def predict_data():

predict_pipeline=PredictPipeline()
results=predict_pipeline.predict(pred_df)
return render_template('home.html', results=results[0])
return render_template('home.html', results=results)


if __name__=="__main__":
app.run(host="0.0.0.0", debug=True)
app.run(host="0.0.0.0", debug=True, port=9696)
Binary file modified artifacts/model.pkl
Binary file not shown.
Binary file modified artifacts/scaler.pkl
Binary file not shown.
15 changes: 15 additions & 0 deletions config/grafana_datasources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: 1

# list of datasources that should be added
# available in the database
datasources:
- name: PostgreSQL
type: postgres
access: proxy
url: db.:5432
database: ${POSTGRES_DB}
user: ${POSTGRES_USER}
secureJsonData:
password: ${POSTGRES_PASSWORD}
jsonData:
sslmode: 'disable'
Binary file modified mlflow.db
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
artifact_path: xgboost_model
flavors:
python_function:
data: model.xgb
env:
conda: conda.yaml
virtualenv: python_env.yaml
loader_module: mlflow.xgboost
python_version: 3.12.4
xgboost:
code: null
data: model.xgb
model_class: xgboost.sklearn.XGBClassifier
model_format: xgb
xgb_version: 2.1.0
mlflow_version: 2.15.1
model_size_bytes: 1219840
model_uuid: 4f645dacef5d45c0a2cf142cd30d02b8
run_id: 520470d284b540728611088fe8f69d28
saved_input_example_info:
artifact_path: input_example.json
pandas_orient: split
serving_input_path: serving_input_payload.json
type: dataframe
signature:
inputs: '[{"type": "double", "name": "Area", "required": true}, {"type": "double",
"name": "Perimeter", "required": true}, {"type": "double", "name": "MajorAxisLength",
"required": true}, {"type": "double", "name": "MinorAxisLength", "required": true},
{"type": "double", "name": "AspectRatio", "required": true}, {"type": "double",
"name": "Eccentricity", "required": true}, {"type": "double", "name": "ConvexArea",
"required": true}, {"type": "double", "name": "EquivDiameter", "required": true},
{"type": "double", "name": "Extent", "required": true}, {"type": "double", "name":
"Solidity", "required": true}, {"type": "double", "name": "Roundness", "required":
true}, {"type": "double", "name": "Compactness", "required": true}, {"type": "double",
"name": "ShapeFactor1", "required": true}, {"type": "double", "name": "ShapeFactor2",
"required": true}, {"type": "double", "name": "ShapeFactor3", "required": true},
{"type": "double", "name": "ShapeFactor4", "required": true}]'
outputs: '[{"type": "long", "required": true}]'
params: null
utc_time_created: '2024-08-17 20:20:39.766356'
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
channels:
- conda-forge
dependencies:
- python=3.12.4
- pip<=24.0
- pip:
- mlflow==2.15.1
- numpy==1.26.4
- pandas==2.2.2
- psutil==5.9.8
- scikit-learn==1.5.1
- scipy==1.14.0
- xgboost==2.1.0
name: mlflow-env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"columns": ["Area", "Perimeter", "MajorAxisLength", "MinorAxisLength", "AspectRatio", "Eccentricity", "ConvexArea", "EquivDiameter", "Extent", "Solidity", "Roundness", "Compactness", "ShapeFactor1", "ShapeFactor2", "ShapeFactor3", "ShapeFactor4"], "data": [[75351.0, 1095.258, 408.4086836, 236.0432439, 1.730228228, 0.816066202, 76285.0, 309.7416229, 0.794305532, 0.98775644, 0.789343462, 0.758410963, 0.005420083, 0.001106125, 0.575187188, 0.995205109]]}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
python: 3.12.4
build_dependencies:
- pip==24.0
- setuptools==69.5.1
- wheel==0.43.0
dependencies:
- -r requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mlflow==2.15.1
numpy==1.26.4
pandas==2.2.2
psutil==5.9.8
scikit-learn==1.5.1
scipy==1.14.0
xgboost==2.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataframe_split": {
"columns": [
"Area",
"Perimeter",
"MajorAxisLength",
"MinorAxisLength",
"AspectRatio",
"Eccentricity",
"ConvexArea",
"EquivDiameter",
"Extent",
"Solidity",
"Roundness",
"Compactness",
"ShapeFactor1",
"ShapeFactor2",
"ShapeFactor3",
"ShapeFactor4"
],
"data": [
[
75351.0,
1095.258,
408.4086836,
236.0432439,
1.730228228,
0.816066202,
76285.0,
309.7416229,
0.794305532,
0.98775644,
0.789343462,
0.758410963,
0.005420083,
0.001106125,
0.575187188,
0.995205109
]
]
}
}
Loading

0 comments on commit a1abca0

Please sign in to comment.