Merge branch 'rg-updates' of https://github.com/solliancenet/tech-imm…

…ersion-data-ai into rg-updates
PeMCaeiro · Dec 27, 2019 · ea6329a · ea6329a
2 parents 6c0bcf1 + 7cde348
commit ea6329a
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 53 deletions.
diff --git a/ai-exp6/README.md b/ai-exp6/README.md
@@ -105,7 +105,7 @@ Duration: 20 minutes
 
     ![Azure DevOps new service connection experience fix](media/devops-service-connection-fix-01.png 'Azure DevOps new service connection fix').
 
-    In this case, set the connection name to `quick-starts-sc`, provide the subscription, resource group, and machine learning workspace information, and select **OK**. Ignore steps 3 and 4 below and continue with Exercise 2.
+    In this case, set the connection name to `quick-starts-sc`, provide the subscription, resource group, and machine learning workspace information, check the `Allow all pipelines to use this connection` option and select **OK**. Ignore steps 3 and 4 below and continue with Exercise 2.
 
     > **Note**: If an environment is provided to you, select the `Subscription` scope level and the select `use full version of the service connection dialog`.
 

diff --git a/ai-exp6/media/devops-service-connection-fix-01.png b/ai-exp6/media/devops-service-connection-fix-01.png
diff --git a/ai-exp6/media/devops-service-connection-fix-02.png b/ai-exp6/media/devops-service-connection-fix-02.png
diff --git a/ai-exp6/media/devops-service-connection-fix-03.png b/ai-exp6/media/devops-service-connection-fix-03.png
diff --git a/lab-files/ai/3/explain-automl-model.ipynb b/lab-files/ai/3/explain-automl-model.ipynb
@@ -7,25 +7,14 @@
     "# Understanding the automated ML generated model using model explainability \n",
     "In this notebook, you will retrieve the best model from the automated machine learning experiment you performed previously. Then you will use the model interpretability features of the Azure Machine Learning Python SDK to indentify which features had the most impact on the prediction.\n",
     "\n",
-    "**Please make sure you have completed Exercise 1 before continuing**.\n",
-    "\n",
-    "Install a version of the AML SDK that is compatible with the version used by Automated ML."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install --upgrade azureml-sdk==1.0.74 azureml-automl-core==1.0.74 azureml-train-automl==1.0.74"
+    "**Please make sure you have completed Exercise 1 before continuing**."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, run the following cell to import all the modules used in this notebook."
+    "Run the following cell to import all the modules used in this notebook."
    ]
   },
   {
@@ -47,7 +36,7 @@
     "\n",
     "from azureml.train.automl.run import AutoMLRun\n",
     "\n",
-    "from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
+    "from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
     "from interpret_community.mimic.models import LGBMExplainableModel\n",
     "from azureml.interpret.mimic_wrapper import MimicWrapper\n",
     "from azureml.contrib.interpret.visualize import ExplanationDashboard\n",

diff --git a/lab-files/ai/3/predict-battery-life-with-AML.ipynb b/lab-files/ai/3/predict-battery-life-with-AML.ipynb
@@ -32,7 +32,7 @@
         "To get these values, do the following:\n",
         "1. Navigate to the Azure Portal and login with the credentials provided.\n",
         "2. From the left hand menu, under Favorites, select `Resource Groups`.\n",
-        "3. In the list, select the resource group with the name similar to `tech_immersion_XXXXX`.\n",
+        "3. In the list, select the resource group with the name similar to `tech-immersion-XXXXX`.\n",
         "4. From the Overview tab, capture the desired values.\n",
         "\n",
         "Execute the following cell by selecting the `>|Run` button in the command bar above.\n",
@@ -52,10 +52,10 @@
         "subscription_id = \"\" # <- needs to be the subscription with the resource group\n",
         "\n",
         "#Provide values for the existing Resource Group \n",
-        "resource_group = \"tech_immersion_XXXXX\" # <- replace XXXXX with your unique identifier\n",
+        "resource_group = \"tech-immersion-XXXXX\" # <- replace XXXXX with your unique identifier\n",
         "\n",
         "#Provide the Workspace Name and Azure Region of the Azure Machine Learning Workspace\n",
-        "workspace_name = \"tech_immersion_aml_XXXXX\" # <- replace XXXXX with your unique identifier (should be lowercase)\n",
+        "workspace_name = \"tech-immersion-aml-XXXXX\" # <- replace XXXXX with your unique identifier (should be lowercase)\n",
         "workspace_region = \"eastus2\" # <- region of your resource group\n",
         "#other options for region include eastus, westcentralus, southeastasia, australiaeast, westeurope"
       ]
@@ -117,23 +117,27 @@
         "import os\n",
         "import random\n",
         "import re\n",
+        "import urllib.request\n",
         "\n",
-        "from matplotlib import pyplot as plt\n",
-        "from matplotlib.pyplot import imshow\n",
         "import numpy as np\n",
         "import pandas as pd\n",
         "from sklearn import datasets\n",
         "\n",
         "import azureml.core\n",
         "from azureml.core.experiment import Experiment\n",
         "from azureml.core.workspace import Workspace\n",
-        "from azureml.core.compute import ComputeTarget\n",
-        "from azureml.core.webservice import Webservice\n",
+        "from azureml.core.compute import AksCompute, ComputeTarget\n",
+        "from azureml.core.webservice import Webservice, AksWebservice\n",
         "from azureml.core.image import Image\n",
         "from azureml.core.model import Model\n",
         "from azureml.train.automl import AutoMLConfig\n",
         "from azureml.train.automl.run import AutoMLRun\n",
-        "from azureml.core import Workspace"
+        "from azureml.core import Workspace\n",
+        "from azureml.data.azure_storage_datastore import AzureBlobDatastore\n",
+        "from azureml.core import Dataset\n",
+        "\n",
+        "# Check core SDK version number\n",
+        "print(\"SDK version:\", azureml.core.VERSION)"
       ]
     },
     {
@@ -274,18 +278,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### Create the data loading script for remote compute"
+        "### Create Azure Machine Learning TabularDataset"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "The Azure Machine Learning Compute cluster needs to know how to get the data to train against. You can package this logic in a script that will be executed by the compute when it starts executing the training.\n",
-        "\n",
-        "Run the following cells to locally create the **get_data.py** script that will be deployed to remote compute. You will also use this script when you want train the model locally. \n",
-        "\n",
-        "Observe that the get_data method returns the features (`X`) and the labels (`Y`) in an object. This structure is expected later when you will configure Auto ML."
+        "Download the training dataset to the project_folder, and then upload the data to the default workspace datastore which is backed by the Azure blob storage. Next, using the training data saved in the default workspace datastore, we will create an unregistered TabularDataset pointing to the path in the datastore. This dataset reference, will allow us to seamlessly access the training data during model training without worrying about connection strings or data paths."
       ]
     },
     {
@@ -298,30 +298,27 @@
       "source": [
         "# create project folder\n",
         "if not os.path.exists(project_folder):\n",
-        "    os.makedirs(project_folder)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "trusted": true
-      },
-      "outputs": [],
-      "source": [
-        "%%writefile $project_folder/get_data.py\n",
+        "    os.makedirs(project_folder)\n",
         "\n",
-        "import pandas as pd\n",
-        "import numpy as np\n",
+        "# download the training dataset from data_url to the project folder\n",
+        "urllib.request.urlretrieve(data_url, os.path.join(project_folder, 'training-formatted.csv'))\n",
         "\n",
-        "def get_data():\n",
-        "    \n",
-        "    data = pd.read_csv(\"https://databricksdemostore.blob.core.windows.net/data/connected-car/training-formatted.csv\")\n",
-        "    \n",
-        "    X = data.iloc[:,1:73]\n",
-        "    Y = data.iloc[:,0].values.flatten()\n",
+        "# upload training dataset to default workspace datastore\n",
+        "datastore = ws.get_default_datastore()\n",
+        "datastore.upload_files(files = [os.path.join(project_folder, 'training-formatted.csv')],\n",
+        "                       target_path = 'train-dataset/tabular/',\n",
+        "                       overwrite = True,\n",
+        "                       show_progress = True)\n",
+        "\n",
+        "# create TabularDataset reference\n",
+        "dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, \n",
+        "                                                        'train-dataset/tabular/training-formatted.csv')])\n",
+        "\n",
+        "# target or label column name\n",
+        "target_column_name = 'Survival_In_Days'\n",
         "\n",
-        "    return { \"X\" : X, \"y\" : Y }"
+        "# preview the first 5 rows of the dataset\n",
+        "dataset.take(5).to_pandas_dataframe()"
       ]
     },
     {
@@ -415,7 +412,8 @@
         "                             n_cross_validations = 5,\n",
         "                             debug_log = 'automl.log',\n",
         "                             verbosity = logging.DEBUG,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+        "                             training_data = dataset, \n",
+        "                             label_column_name=target_column_name,\n",
         "                             path = project_folder)"
       ]
     },
@@ -480,7 +478,8 @@
         "                             n_cross_validations = 5,\n",
         "                             debug_log = 'automl.log',\n",
         "                             verbosity = logging.DEBUG,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+        "                             training_data = dataset, \n",
+        "                             label_column_name=target_column_name,\n",
         "                             compute_target = compute_target,\n",
         "                             path = project_folder)\n",
         "remote_run = experiment.submit(automl_config, show_output=False)\n",
@@ -959,7 +958,7 @@
         "# prepare the data and select five vehicles\n",
         "test_data = test_data.drop(columns=[\"Car_ID\", \"Battery_Age\"])\n",
         "test_data.rename(columns={'Twelve_hourly_temperature_forecast_for_next_31_days_reversed': 'Twelve_hourly_temperature_history_for_last_31_days_before_death_last_recording_first'}, inplace=True)\n",
-        "test_data_json = test_data.iloc[:5, 0:72].to_json(orient=\"split\")\n",
+        "test_data_json = test_data.iloc[:5, 0:73].to_json(orient=\"split\")\n",
         "prediction = webservice.run(input_data = test_data_json)\n",
         "print(prediction)"
       ]