forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0b554a9
commit 13c32ab
Showing
15 changed files
with
5,374 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: tutorial | ||
|
||
conda_env: conda.yaml | ||
|
||
entry_points: | ||
main: | ||
train: | ||
parameters: | ||
alpha: {type: float, default: 0.5} | ||
l1_ratio: {type: float, default: 0.1} | ||
command: "python train.py 0.5 0.1" | ||
batch_predict: | ||
parameters: | ||
alpha: {type: float, default: 0.5} | ||
l1_ratio: {type: float, default: 0.1} | ||
command: "python batchPredict.py" | ||
api_predict: | ||
parameters: | ||
alpha: {type: float, default: 0.5} | ||
l1_ratio: {type: float, default: 0.1} | ||
command: "python predict.py" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import mlsql | ||
import pickle | ||
import json | ||
import os | ||
from pyspark.ml.linalg import VectorUDT, Vectors | ||
|
||
# get information from mlsql | ||
isp = mlsql.params()["internalSystemParam"] | ||
tempDataLocalPath = isp["tempDataLocalPath"] | ||
tempModelLocalPath = isp["tempModelLocalPath"] | ||
tempOutputLocalPath = isp["tempOutputLocalPath"] | ||
|
||
print("tempModelLocalPath:%s" % (tempModelLocalPath)) | ||
model = pickle.load(open(tempModelLocalPath + "/model.pkl", "rb")) | ||
|
||
print("tempDataLocalPath:%s" % (tempDataLocalPath)) | ||
with open(tempOutputLocalPath, "w") as o: | ||
with open(tempDataLocalPath) as f: | ||
for line in f.readlines(): | ||
obj = json.loads(line) | ||
features = [] | ||
for attribute, value in obj.items(): | ||
if attribute != "quality": | ||
features.append(value) | ||
y = model.predict([features]) | ||
o.write(json.dumps({"predict": y.tolist()}) + "\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
name: tutorial | ||
channels: | ||
- defaults | ||
dependencies: | ||
- python=3.6 | ||
- numpy=1.14.3 | ||
- pandas=0.22.0 | ||
- scikit-learn=0.19.1 | ||
- pip: | ||
- mlflow | ||
- kafka-python==1.4.3 | ||
- pyspark==2.3.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from pyspark.ml.linalg import VectorUDT, Vectors | ||
import pickle | ||
import os | ||
import python_fun | ||
|
||
|
||
def predict(index, s): | ||
items = [i for i in s] | ||
feature = VectorUDT().deserialize(pickle.loads(items[0])) | ||
print(pickle.loads(items[1])[0]) | ||
model = pickle.load(open(pickle.loads(items[1])[0] + "/model.pkl", "rb")) | ||
y = model.predict([feature.toArray()]) | ||
print("------------") | ||
return [VectorUDT().serialize(Vectors.dense(y))] | ||
|
||
|
||
python_fun.udf(predict) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# MLflow Training Tutorial\n", | ||
"\n", | ||
"This `train.pynb` Jupyter notebook predicts the quality of wine using [sklearn.linear_model.ElasticNet](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html). \n", | ||
"\n", | ||
"> This is the Jupyter notebook version of the `train.py` example\n", | ||
"\n", | ||
"Attribution\n", | ||
"* The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality\n", | ||
"* P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.\n", | ||
"* Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Wine Quality Sample\n", | ||
"def train(in_alpha, in_l1_ratio):\n", | ||
" import os\n", | ||
" import warnings\n", | ||
" import sys\n", | ||
"\n", | ||
" import pandas as pd\n", | ||
" import numpy as np\n", | ||
" from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n", | ||
" from sklearn.model_selection import train_test_split\n", | ||
" from sklearn.linear_model import ElasticNet\n", | ||
"\n", | ||
" import mlflow\n", | ||
" import mlflow.sklearn\n", | ||
"\n", | ||
" def eval_metrics(actual, pred):\n", | ||
" rmse = np.sqrt(mean_squared_error(actual, pred))\n", | ||
" mae = mean_absolute_error(actual, pred)\n", | ||
" r2 = r2_score(actual, pred)\n", | ||
" return rmse, mae, r2\n", | ||
"\n", | ||
"\n", | ||
" warnings.filterwarnings(\"ignore\")\n", | ||
" np.random.seed(40)\n", | ||
"\n", | ||
" # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)\n", | ||
" # Assumes wine-quality.csv is located in the same folder as the notebook\n", | ||
" wine_path = \"wine-quality.csv\"\n", | ||
" data = pd.read_csv(wine_path)\n", | ||
"\n", | ||
" # Split the data into training and test sets. (0.75, 0.25) split.\n", | ||
" train, test = train_test_split(data)\n", | ||
"\n", | ||
" # The predicted column is \"quality\" which is a scalar from [3, 9]\n", | ||
" train_x = train.drop([\"quality\"], axis=1)\n", | ||
" test_x = test.drop([\"quality\"], axis=1)\n", | ||
" train_y = train[[\"quality\"]]\n", | ||
" test_y = test[[\"quality\"]]\n", | ||
"\n", | ||
" # Set default values if no alpha is provided\n", | ||
" if float(in_alpha) is None:\n", | ||
" alpha = 0.5\n", | ||
" else:\n", | ||
" alpha = float(in_alpha)\n", | ||
"\n", | ||
" # Set default values if no l1_ratio is provided\n", | ||
" if float(in_l1_ratio) is None:\n", | ||
" l1_ratio = 0.5\n", | ||
" else:\n", | ||
" l1_ratio = float(in_l1_ratio)\n", | ||
"\n", | ||
" # Useful for multiple runs (only doing one run in this sample notebook) \n", | ||
" with mlflow.start_run():\n", | ||
" # Execute ElasticNet\n", | ||
" lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n", | ||
" lr.fit(train_x, train_y)\n", | ||
"\n", | ||
" # Evaluate Metrics\n", | ||
" predicted_qualities = lr.predict(test_x)\n", | ||
" (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)\n", | ||
"\n", | ||
" # Print out metrics\n", | ||
" print(\"Elasticnet model (alpha=%f, l1_ratio=%f):\" % (alpha, l1_ratio))\n", | ||
" print(\" RMSE: %s\" % rmse)\n", | ||
" print(\" MAE: %s\" % mae)\n", | ||
" print(\" R2: %s\" % r2)\n", | ||
"\n", | ||
" # Log parameter, metrics, and model to MLflow\n", | ||
" mlflow.log_param(\"alpha\", alpha)\n", | ||
" mlflow.log_param(\"l1_ratio\", l1_ratio)\n", | ||
" mlflow.log_metric(\"rmse\", rmse)\n", | ||
" mlflow.log_metric(\"r2\", r2)\n", | ||
" mlflow.log_metric(\"mae\", mae)\n", | ||
"\n", | ||
" mlflow.sklearn.log_model(lr, \"model\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Elasticnet model (alpha=0.500000, l1_ratio=0.500000):\n", | ||
" RMSE: 0.82224284975954\n", | ||
" MAE: 0.6278761410160691\n", | ||
" R2: 0.12678721972772689\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"train(0.5, 0.5)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Elasticnet model (alpha=0.200000, l1_ratio=0.200000):\n", | ||
" RMSE: 0.7859129997062342\n", | ||
" MAE: 0.6155290394093894\n", | ||
" R2: 0.20224631822892092\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"train(0.2, 0.2)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Elasticnet model (alpha=0.100000, l1_ratio=0.100000):\n", | ||
" RMSE: 0.7792546522251949\n", | ||
" MAE: 0.6112547988118587\n", | ||
" R2: 0.2157063843066196\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"train(0.1, 0.1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality | ||
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. | ||
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. | ||
|
||
import os | ||
import warnings | ||
import sys | ||
|
||
import pandas as pd | ||
import numpy as np | ||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.linear_model import ElasticNet | ||
|
||
import mlflow | ||
import mlflow.sklearn | ||
|
||
import mlsql | ||
|
||
|
||
def eval_metrics(actual, pred): | ||
rmse = np.sqrt(mean_squared_error(actual, pred)) | ||
mae = mean_absolute_error(actual, pred) | ||
r2 = r2_score(actual, pred) | ||
return rmse, mae, r2 | ||
|
||
|
||
if __name__ == "__main__": | ||
warnings.filterwarnings("ignore") | ||
np.random.seed(40) | ||
|
||
# Read the wine-quality csv file (make sure you're running this from the root of MLflow!) | ||
wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") | ||
data = pd.read_csv(wine_path) | ||
|
||
# Split the data into training and test sets. (0.75, 0.25) split. | ||
train, test = train_test_split(data) | ||
|
||
# The predicted column is "quality" which is a scalar from [3, 9] | ||
train_x = train.drop(["quality"], axis=1) | ||
test_x = test.drop(["quality"], axis=1) | ||
train_y = train[["quality"]] | ||
test_y = test[["quality"]] | ||
|
||
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 | ||
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 | ||
|
||
with mlflow.start_run(): | ||
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | ||
lr.fit(train_x, train_y) | ||
|
||
predicted_qualities = lr.predict(test_x) | ||
|
||
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | ||
|
||
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) | ||
print(" RMSE: %s" % rmse) | ||
print(" MAE: %s" % mae) | ||
print(" R2: %s" % r2) | ||
|
||
mlflow.log_param("alpha", alpha) | ||
mlflow.log_param("l1_ratio", l1_ratio) | ||
mlflow.log_metric("rmse", rmse) | ||
mlflow.log_metric("r2", r2) | ||
mlflow.log_metric("mae", mae) | ||
|
||
mlflow.sklearn.log_model(lr, "model") | ||
print(mlsql.params()) | ||
isp = mlsql.params()["internalSystemParam"] | ||
tempModelLocalPath = isp["tempModelLocalPath"] | ||
mlflow.sklearn.save_model(lr, tempModelLocalPath) |
Oops, something went wrong.