Update a2_w3_kmeans_SparkML.ipynb

robsonsamp · Apr 27, 2020 · ca691cf · ca691cf
1 parent f94fb0e
commit ca691cf
Showing 1 changed file with 49 additions and 13 deletions.
diff --git a/coursera_ml/a2_w3_kmeans_SparkML.ipynb b/coursera_ml/a2_w3_kmeans_SparkML.ipynb
@@ -4,7 +4,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook is designed to run in a IBM Watson Studio Apache Spark runtime. In case you are running it in an IBM Watson Studio standard runtime or outside Watson Studio, we install Apache Spark in local mode for test purposes only. Please don't use it in production."
+    "This notebook is designed to run in a IBM Watson Studio default runtime (NOT the Watson Studio Apache Spark Runtime as the default runtime with 1 vCPU is free of charge). Therefore, we install Apache Spark in local mode for test purposes only. Please don't use it in production.\n",
+    "\n",
+    "In case you are facing issues, please read the following two documents first:\n",
+    "\n",
+    "https://github.com/IBM/skillsnetwork/wiki/Environment-Setup\n",
+    "\n",
+    "https://github.com/IBM/skillsnetwork/wiki/FAQ\n",
+    "\n",
+    "Then, please feel free to ask:\n",
+    "\n",
+    "https://coursera.org/learn/machine-learning-big-data-apache-spark/discussions/all\n",
+    "\n",
+    "Please make sure to follow the guidelines before asking a question:\n",
+    "\n",
+    "https://github.com/IBM/skillsnetwork/wiki/FAQ#im-feeling-lost-and-confused-please-help-me\n",
+    "\n",
+    "\n",
+    "If running outside Watson Studio, this should work as well. In case you are running in an Apache Spark context outside Watson Studio, please remove the Apache Spark setup in the first notebook cells."
    ]
   },
   {
@@ -13,7 +30,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install --upgrade pip"
+    "from IPython.display import Markdown, display\n",
+    "def printmd(string):\n",
+    "    display(Markdown('# <span style=\"color:red\">'+string+'</span>'))\n",
+    "\n",
+    "\n",
+    "if ('sc' in locals() or 'sc' in globals()):\n",
+    "    printmd('<<<<<!!!!! It seems that you are running in a IBM Watson Studio Apache Spark Notebook. Please run it in an IBM Watson Studio Default Runtime (without Apache Spark) !!!!!>>>>>')\n"
    ]
   },
   {
@@ -22,20 +45,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if not ('sc' in locals() or 'sc' in globals()):\n",
-    "    print('It seems you are note running in a IBM Watson Studio Apache Spark Notebook. You might be running in a IBM Watson Studio Default Runtime or outside IBM Waston Studio. Therefore installing local Apache Spark environment for you. Please do not use in Production')\n",
-    "    \n",
-    "    from pip import main\n",
-    "    main(['install', 'pyspark==2.4.5'])\n",
-    "    \n",
+    "!pip install pyspark==2.4.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
     "    from pyspark import SparkContext, SparkConf\n",
     "    from pyspark.sql import SparkSession\n",
+    "except ImportError as e:\n",
+    "    printmd('<<<<<!!!!! Please restart your kernel after installing Apache Spark !!!!!>>>>>')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sc = SparkContext.getOrCreate(SparkConf().setMaster(\"local[*]\"))\n",
     "\n",
-    "    sc = SparkContext.getOrCreate(SparkConf().setMaster(\"local[*]\"))\n",
-    "    \n",
-    "    spark = SparkSession \\\n",
-    "        .builder \\\n",
-    "        .getOrCreate()"
+    "spark = SparkSession \\\n",
+    "    .builder \\\n",
+    "    .getOrCreate()"
    ]
   },
   {