diff --git a/data/hr_db/employees/.part-00000.csv.swp b/data/hr_db/employees/.part-00000.csv.swp new file mode 100644 index 0000000..e158c7b Binary files /dev/null and b/data/hr_db/employees/.part-00000.csv.swp differ diff --git a/itversity-material/01-python-and-sql/01_getting_started_with_postgresql/06_exercise_loading_data_into_postgres_database.ipynb b/itversity-material/01-python-and-sql/01_getting_started_with_postgresql/06_exercise_loading_data_into_postgres_database.ipynb index bc07a11..d9c4ad7 100644 --- a/itversity-material/01-python-and-sql/01_getting_started_with_postgresql/06_exercise_loading_data_into_postgres_database.ipynb +++ b/itversity-material/01-python-and-sql/01_getting_started_with_postgresql/06_exercise_loading_data_into_postgres_database.ipynb @@ -84,7 +84,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.8.12" } }, "nbformat": 4, diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/01_predefined_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/01_predefined_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/01_predefined_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/01_predefined_functions.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/02_overview_of_predefined_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/02_overview_of_predefined_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/02_overview_of_predefined_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/02_overview_of_predefined_functions.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/03_string_manipulation_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/03_string_manipulation_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/03_string_manipulation_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/03_string_manipulation_functions.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/04_date_manipulation_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/04_date_manipulation_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/04_date_manipulation_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/04_date_manipulation_functions.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/05_overview_of_numeric_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/05_overview_of_numeric_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/05_overview_of_numeric_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/05_overview_of_numeric_functions.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/06_data_type_conversion.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/06_data_type_conversion.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/06_data_type_conversion.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/06_data_type_conversion.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/07_handling_null_values.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/07_handling_null_values.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/07_handling_null_values.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/07_handling_null_values.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/08_using_case_and_when.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/08_using_case_and_when.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/08_using_case_and_when.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/08_using_case_and_when.ipynb diff --git a/itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/09_exercises_predefined_functions.ipynb b/itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/09_exercises_predefined_functions.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/05_predefined_functions_using_postgresql/09_exercises_predefined_functions.ipynb rename to itversity-material/01-python-and-sql/06_predefined_functions_using_postgresql/09_exercises_predefined_functions.ipynb diff --git a/itversity-material/01-python-and-sql/33_processing_json_data/04_create_json_string.ipynb b/itversity-material/01-python-and-sql/33_processing_json_data/04_create_json_string.ipynb index 36167dc..7acd495 100644 --- a/itversity-material/01-python-and-sql/33_processing_json_data/04_create_json_string.ipynb +++ b/itversity-material/01-python-and-sql/33_processing_json_data/04_create_json_string.ipynb @@ -1,364 +1,364 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create JSON String\n", - "\n", - "Let us understand how to define JSON strings with in programs itself. Later we will see different ways of storing JSON data in files.\n", - "\n", - "We will see following examples of JSON strings.\n", - "* Single JSON document.\n", - "* Multiple JSON documents, with one JSON per line.\n", - "* Multiple JSON documents as an Array under one attribute. Most of the REST APIs which return multiple elements follow this approach." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Single JSON document\n", - "\n", - "Let us go through the details of Single JSON document. \n", - "* A single JSON can be in one line or are part of multiple lines.\n", - "* Here is an example of a single JSON as string in one line." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "person = '{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"}'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "type(person)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Here is an example of a single JSON as string that is part of multiple lines." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "person = '''{\n", - " \"id\":1,\n", - " \"first_name\":\"Frasco\",\n", - " \"last_name\":\"Necolds\",\n", - " \"email\":\"fnecolds0@vk.com\",\n", - " \"gender\":\"Male\",\n", - " \"ip_address\":\"243.67.63.34\"\n", - "}'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "type(person)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multiple JSON Documents - One per line\n", - "\n", - "Let us go through the details of multiple JSON Documents, with one JSON per line.\n", - "* All the lines will be part of one string.\n", - "* Each JSON will be separated by a new line. It is also known as new line character (`\\n`).\n", - "* Each line should contain one valid JSON document.\n", - "* There should not be a comma or any other character at the end of each line.\n", - "* Here is an example of representing multiple json documents as strings where each line will contain one JSON." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "persons = '''{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"}\n", - "{\"id\":2,\"first_name\":\"Dulce\",\"last_name\":\"Santos\",\"email\":\"dsantos1@mashable.com\",\"gender\":\"Female\",\"ip_address\":\"60.30.246.227\"}\n", - "{\"id\":3,\"first_name\":\"Prissie\",\"last_name\":\"Tebbett\",\"email\":\"ptebbett2@infoseek.co.jp\",\"gender\":\"Genderfluid\",\"ip_address\":\"22.21.162.56\"}\n", - "{\"id\":4,\"first_name\":\"Schuyler\",\"last_name\":\"Coppledike\",\"email\":\"scoppledike3@gnu.org\",\"gender\":\"Agender\",\"ip_address\":\"120.35.186.161\"}\n", - "{\"id\":5,\"first_name\":\"Leopold\",\"last_name\":\"Jarred\",\"email\":\"ljarred4@wp.com\",\"gender\":\"Agender\",\"ip_address\":\"30.119.34.4\"}\n", - "{\"id\":6,\"first_name\":\"Joanna\",\"last_name\":\"Teager\",\"email\":\"jteager5@apache.org\",\"gender\":\"Bigender\",\"ip_address\":\"245.221.176.34\"}\n", - "{\"id\":7,\"first_name\":\"Lion\",\"last_name\":\"Beere\",\"email\":\"lbeere6@bloomberg.com\",\"gender\":\"Polygender\",\"ip_address\":\"105.54.139.46\"}\n", - "{\"id\":8,\"first_name\":\"Marabel\",\"last_name\":\"Wornum\",\"email\":\"mwornum7@posterous.com\",\"gender\":\"Polygender\",\"ip_address\":\"247.229.14.25\"}\n", - "{\"id\":9,\"first_name\":\"Helenka\",\"last_name\":\"Mullender\",\"email\":\"hmullender8@cloudflare.com\",\"gender\":\"Non-binary\",\"ip_address\":\"133.216.118.88\"}\n", - "{\"id\":10,\"first_name\":\"Christine\",\"last_name\":\"Swane\",\"email\":\"cswane9@shop-pro.jp\",\"gender\":\"Polygender\",\"ip_address\":\"86.16.210.164\"}'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "type(persons)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multiple JSON Documents - Array\n", - "\n", - "Let us go through the details of multiple JSON Documents as an array.\n", - "\n", - "* The entire array of JSON documents will be represented as one JSON and one string.\n", - "* The JSON document with all the elements can be part of multiple lines.\n", - "* The JSON can be represented as array with out any attribute or it can be represented as JSON with one attribute. The value of this attribute will be JSON Array.\n", - "* Here is an example of representing multiple JSON documents as JSON Array." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "persons = '''[{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"},\n", - "{\"id\":2,\"first_name\":\"Dulce\",\"last_name\":\"Santos\",\"email\":\"dsantos1@mashable.com\",\"gender\":\"Female\",\"ip_address\":\"60.30.246.227\"},\n", - "{\"id\":3,\"first_name\":\"Prissie\",\"last_name\":\"Tebbett\",\"email\":\"ptebbett2@infoseek.co.jp\",\"gender\":\"Genderfluid\",\"ip_address\":\"22.21.162.56\"},\n", - "{\"id\":4,\"first_name\":\"Schuyler\",\"last_name\":\"Coppledike\",\"email\":\"scoppledike3@gnu.org\",\"gender\":\"Agender\",\"ip_address\":\"120.35.186.161\"},\n", - "{\"id\":5,\"first_name\":\"Leopold\",\"last_name\":\"Jarred\",\"email\":\"ljarred4@wp.com\",\"gender\":\"Agender\",\"ip_address\":\"30.119.34.4\"},\n", - "{\"id\":6,\"first_name\":\"Joanna\",\"last_name\":\"Teager\",\"email\":\"jteager5@apache.org\",\"gender\":\"Bigender\",\"ip_address\":\"245.221.176.34\"},\n", - "{\"id\":7,\"first_name\":\"Lion\",\"last_name\":\"Beere\",\"email\":\"lbeere6@bloomberg.com\",\"gender\":\"Polygender\",\"ip_address\":\"105.54.139.46\"},\n", - "{\"id\":8,\"first_name\":\"Marabel\",\"last_name\":\"Wornum\",\"email\":\"mwornum7@posterous.com\",\"gender\":\"Polygender\",\"ip_address\":\"247.229.14.25\"},\n", - "{\"id\":9,\"first_name\":\"Helenka\",\"last_name\":\"Mullender\",\"email\":\"hmullender8@cloudflare.com\",\"gender\":\"Non-binary\",\"ip_address\":\"133.216.118.88\"},\n", - "{\"id\":10,\"first_name\":\"Christine\",\"last_name\":\"Swane\",\"email\":\"cswane9@shop-pro.jp\",\"gender\":\"Polygender\",\"ip_address\":\"86.16.210.164\"}]'''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "persons = '''[\n", - " {\n", - " \"id\": 1,\n", - " \"first_name\": \"Frasco\",\n", - " \"last_name\": \"Necolds\",\n", - " \"email\": \"fnecolds0@vk.com\",\n", - " \"gender\": \"Male\",\n", - " \"ip_address\": \"243.67.63.34\"\n", - " },\n", - " {\n", - " \"id\": 2,\n", - " \"first_name\": \"Dulce\",\n", - " \"last_name\": \"Santos\",\n", - " \"email\": \"dsantos1@mashable.com\",\n", - " \"gender\": \"Female\",\n", - " \"ip_address\": \"60.30.246.227\"\n", - " },\n", - " {\n", - " \"id\": 3,\n", - " \"first_name\": \"Prissie\",\n", - " \"last_name\": \"Tebbett\",\n", - " \"email\": \"ptebbett2@infoseek.co.jp\",\n", - " \"gender\": \"Genderfluid\",\n", - " \"ip_address\": \"22.21.162.56\"\n", - " },\n", - " {\n", - " \"id\": 4,\n", - " \"first_name\": \"Schuyler\",\n", - " \"last_name\": \"Coppledike\",\n", - " \"email\": \"scoppledike3@gnu.org\",\n", - " \"gender\": \"Agender\",\n", - " \"ip_address\": \"120.35.186.161\"\n", - " },\n", - " {\n", - " \"id\": 5,\n", - " \"first_name\": \"Leopold\",\n", - " \"last_name\": \"Jarred\",\n", - " \"email\": \"ljarred4@wp.com\",\n", - " \"gender\": \"Agender\",\n", - " \"ip_address\": \"30.119.34.4\"\n", - " },\n", - " {\n", - " \"id\": 6,\n", - " \"first_name\": \"Joanna\",\n", - " \"last_name\": \"Teager\",\n", - " \"email\": \"jteager5@apache.org\",\n", - " \"gender\": \"Bigender\",\n", - " \"ip_address\": \"245.221.176.34\"\n", - " },\n", - " {\n", - " \"id\": 7,\n", - " \"first_name\": \"Lion\",\n", - " \"last_name\": \"Beere\",\n", - " \"email\": \"lbeere6@bloomberg.com\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"105.54.139.46\"\n", - " },\n", - " {\n", - " \"id\": 8,\n", - " \"first_name\": \"Marabel\",\n", - " \"last_name\": \"Wornum\",\n", - " \"email\": \"mwornum7@posterous.com\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"247.229.14.25\"\n", - " },\n", - " {\n", - " \"id\": 9,\n", - " \"first_name\": \"Helenka\",\n", - " \"last_name\": \"Mullender\",\n", - " \"email\": \"hmullender8@cloudflare.com\",\n", - " \"gender\": \"Non-binary\",\n", - " \"ip_address\": \"133.216.118.88\"\n", - " },\n", - " {\n", - " \"id\": 10,\n", - " \"first_name\": \"Christine\",\n", - " \"last_name\": \"Swane\",\n", - " \"email\": \"cswane9@shop-pro.jp\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"86.16.210.164\"\n", - " }\n", - "]'''" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Multiple JSON Documents - One Attribute\n", - "\n", - "Here is an example of representing multiple JSON documents as part of single document with single attribute where value is of type JSON Array." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "persons = '''{\n", - " \"results\": [\n", - " {\n", - " \"id\": 1,\n", - " \"first_name\": \"Frasco\",\n", - " \"last_name\": \"Necolds\",\n", - " \"email\": \"fnecolds0@vk.com\",\n", - " \"gender\": \"Male\",\n", - " \"ip_address\": \"243.67.63.34\"\n", - " },\n", - " {\n", - " \"id\": 2,\n", - " \"first_name\": \"Dulce\",\n", - " \"last_name\": \"Santos\",\n", - " \"email\": \"dsantos1@mashable.com\",\n", - " \"gender\": \"Female\",\n", - " \"ip_address\": \"60.30.246.227\"\n", - " },\n", - " {\n", - " \"id\": 3,\n", - " \"first_name\": \"Prissie\",\n", - " \"last_name\": \"Tebbett\",\n", - " \"email\": \"ptebbett2@infoseek.co.jp\",\n", - " \"gender\": \"Genderfluid\",\n", - " \"ip_address\": \"22.21.162.56\"\n", - " },\n", - " {\n", - " \"id\": 4,\n", - " \"first_name\": \"Schuyler\",\n", - " \"last_name\": \"Coppledike\",\n", - " \"email\": \"scoppledike3@gnu.org\",\n", - " \"gender\": \"Agender\",\n", - " \"ip_address\": \"120.35.186.161\"\n", - " },\n", - " {\n", - " \"id\": 5,\n", - " \"first_name\": \"Leopold\",\n", - " \"last_name\": \"Jarred\",\n", - " \"email\": \"ljarred4@wp.com\",\n", - " \"gender\": \"Agender\",\n", - " \"ip_address\": \"30.119.34.4\"\n", - " },\n", - " {\n", - " \"id\": 6,\n", - " \"first_name\": \"Joanna\",\n", - " \"last_name\": \"Teager\",\n", - " \"email\": \"jteager5@apache.org\",\n", - " \"gender\": \"Bigender\",\n", - " \"ip_address\": \"245.221.176.34\"\n", - " },\n", - " {\n", - " \"id\": 7,\n", - " \"first_name\": \"Lion\",\n", - " \"last_name\": \"Beere\",\n", - " \"email\": \"lbeere6@bloomberg.com\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"105.54.139.46\"\n", - " },\n", - " {\n", - " \"id\": 8,\n", - " \"first_name\": \"Marabel\",\n", - " \"last_name\": \"Wornum\",\n", - " \"email\": \"mwornum7@posterous.com\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"247.229.14.25\"\n", - " },\n", - " {\n", - " \"id\": 9,\n", - " \"first_name\": \"Helenka\",\n", - " \"last_name\": \"Mullender\",\n", - " \"email\": \"hmullender8@cloudflare.com\",\n", - " \"gender\": \"Non-binary\",\n", - " \"ip_address\": \"133.216.118.88\"\n", - " },\n", - " {\n", - " \"id\": 10,\n", - " \"first_name\": \"Christine\",\n", - " \"last_name\": \"Swane\",\n", - " \"email\": \"cswane9@shop-pro.jp\",\n", - " \"gender\": \"Polygender\",\n", - " \"ip_address\": \"86.16.210.164\"\n", - " }\n", - " ]\n", - "}'''" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.12" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create JSON String\n", + "\n", + "Let us understand how to define JSON strings with in programs itself. Later we will see different ways of storing JSON data in files.\n", + "\n", + "We will see following examples of JSON strings.\n", + "* Single JSON document.\n", + "* Multiple JSON documents, with one JSON per line.\n", + "* Multiple JSON documents as an Array under one attribute. Most of the REST APIs which return multiple elements follow this approach." + ] }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Single JSON document\n", + "\n", + "Let us go through the details of Single JSON document. \n", + "* A single JSON can be in one line or are part of multiple lines.\n", + "* Here is an example of a single JSON as string in one line." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "person = '{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(person)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Here is an example of a single JSON as string that is part of multiple lines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "person = '''{\n", + " \"id\":1,\n", + " \"first_name\":\"Frasco\",\n", + " \"last_name\":\"Necolds\",\n", + " \"email\":\"fnecolds0@vk.com\",\n", + " \"gender\":\"Male\",\n", + " \"ip_address\":\"243.67.63.34\"\n", + "}'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(person)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple JSON Documents - One per line\n", + "\n", + "Let us go through the details of multiple JSON Documents, with one JSON per line.\n", + "* All the lines will be part of one string.\n", + "* Each JSON will be separated by a new line. It is also known as new line character (`\\n`).\n", + "* Each line should contain one valid JSON document.\n", + "* There should not be a comma or any other character at the end of each line.\n", + "* Here is an example of representing multiple json documents as strings where each line will contain one JSON." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "persons = '''{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"}\n", + "{\"id\":2,\"first_name\":\"Dulce\",\"last_name\":\"Santos\",\"email\":\"dsantos1@mashable.com\",\"gender\":\"Female\",\"ip_address\":\"60.30.246.227\"}\n", + "{\"id\":3,\"first_name\":\"Prissie\",\"last_name\":\"Tebbett\",\"email\":\"ptebbett2@infoseek.co.jp\",\"gender\":\"Genderfluid\",\"ip_address\":\"22.21.162.56\"}\n", + "{\"id\":4,\"first_name\":\"Schuyler\",\"last_name\":\"Coppledike\",\"email\":\"scoppledike3@gnu.org\",\"gender\":\"Agender\",\"ip_address\":\"120.35.186.161\"}\n", + "{\"id\":5,\"first_name\":\"Leopold\",\"last_name\":\"Jarred\",\"email\":\"ljarred4@wp.com\",\"gender\":\"Agender\",\"ip_address\":\"30.119.34.4\"}\n", + "{\"id\":6,\"first_name\":\"Joanna\",\"last_name\":\"Teager\",\"email\":\"jteager5@apache.org\",\"gender\":\"Bigender\",\"ip_address\":\"245.221.176.34\"}\n", + "{\"id\":7,\"first_name\":\"Lion\",\"last_name\":\"Beere\",\"email\":\"lbeere6@bloomberg.com\",\"gender\":\"Polygender\",\"ip_address\":\"105.54.139.46\"}\n", + "{\"id\":8,\"first_name\":\"Marabel\",\"last_name\":\"Wornum\",\"email\":\"mwornum7@posterous.com\",\"gender\":\"Polygender\",\"ip_address\":\"247.229.14.25\"}\n", + "{\"id\":9,\"first_name\":\"Helenka\",\"last_name\":\"Mullender\",\"email\":\"hmullender8@cloudflare.com\",\"gender\":\"Non-binary\",\"ip_address\":\"133.216.118.88\"}\n", + "{\"id\":10,\"first_name\":\"Christine\",\"last_name\":\"Swane\",\"email\":\"cswane9@shop-pro.jp\",\"gender\":\"Polygender\",\"ip_address\":\"86.16.210.164\"}'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(persons)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple JSON Documents - Array\n", + "\n", + "Let us go through the details of multiple JSON Documents as an array.\n", + "\n", + "* The entire array of JSON documents will be represented as one JSON and one string.\n", + "* The JSON document with all the elements can be part of multiple lines.\n", + "* The JSON can be represented as array with out any attribute or it can be represented as JSON with one attribute. The value of this attribute will be JSON Array.\n", + "* Here is an example of representing multiple JSON documents as JSON Array." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "persons = '''[{\"id\":1,\"first_name\":\"Frasco\",\"last_name\":\"Necolds\",\"email\":\"fnecolds0@vk.com\",\"gender\":\"Male\",\"ip_address\":\"243.67.63.34\"},\n", + "{\"id\":2,\"first_name\":\"Dulce\",\"last_name\":\"Santos\",\"email\":\"dsantos1@mashable.com\",\"gender\":\"Female\",\"ip_address\":\"60.30.246.227\"},\n", + "{\"id\":3,\"first_name\":\"Prissie\",\"last_name\":\"Tebbett\",\"email\":\"ptebbett2@infoseek.co.jp\",\"gender\":\"Genderfluid\",\"ip_address\":\"22.21.162.56\"},\n", + "{\"id\":4,\"first_name\":\"Schuyler\",\"last_name\":\"Coppledike\",\"email\":\"scoppledike3@gnu.org\",\"gender\":\"Agender\",\"ip_address\":\"120.35.186.161\"},\n", + "{\"id\":5,\"first_name\":\"Leopold\",\"last_name\":\"Jarred\",\"email\":\"ljarred4@wp.com\",\"gender\":\"Agender\",\"ip_address\":\"30.119.34.4\"},\n", + "{\"id\":6,\"first_name\":\"Joanna\",\"last_name\":\"Teager\",\"email\":\"jteager5@apache.org\",\"gender\":\"Bigender\",\"ip_address\":\"245.221.176.34\"},\n", + "{\"id\":7,\"first_name\":\"Lion\",\"last_name\":\"Beere\",\"email\":\"lbeere6@bloomberg.com\",\"gender\":\"Polygender\",\"ip_address\":\"105.54.139.46\"},\n", + "{\"id\":8,\"first_name\":\"Marabel\",\"last_name\":\"Wornum\",\"email\":\"mwornum7@posterous.com\",\"gender\":\"Polygender\",\"ip_address\":\"247.229.14.25\"},\n", + "{\"id\":9,\"first_name\":\"Helenka\",\"last_name\":\"Mullender\",\"email\":\"hmullender8@cloudflare.com\",\"gender\":\"Non-binary\",\"ip_address\":\"133.216.118.88\"},\n", + "{\"id\":10,\"first_name\":\"Christine\",\"last_name\":\"Swane\",\"email\":\"cswane9@shop-pro.jp\",\"gender\":\"Polygender\",\"ip_address\":\"86.16.210.164\"}]'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "persons = '''[\n", + " {\n", + " \"id\": 1,\n", + " \"first_name\": \"Frasco\",\n", + " \"last_name\": \"Necolds\",\n", + " \"email\": \"fnecolds0@vk.com\",\n", + " \"gender\": \"Male\",\n", + " \"ip_address\": \"243.67.63.34\"\n", + " },\n", + " {\n", + " \"id\": 2,\n", + " \"first_name\": \"Dulce\",\n", + " \"last_name\": \"Santos\",\n", + " \"email\": \"dsantos1@mashable.com\",\n", + " \"gender\": \"Female\",\n", + " \"ip_address\": \"60.30.246.227\"\n", + " },\n", + " {\n", + " \"id\": 3,\n", + " \"first_name\": \"Prissie\",\n", + " \"last_name\": \"Tebbett\",\n", + " \"email\": \"ptebbett2@infoseek.co.jp\",\n", + " \"gender\": \"Genderfluid\",\n", + " \"ip_address\": \"22.21.162.56\"\n", + " },\n", + " {\n", + " \"id\": 4,\n", + " \"first_name\": \"Schuyler\",\n", + " \"last_name\": \"Coppledike\",\n", + " \"email\": \"scoppledike3@gnu.org\",\n", + " \"gender\": \"Agender\",\n", + " \"ip_address\": \"120.35.186.161\"\n", + " },\n", + " {\n", + " \"id\": 5,\n", + " \"first_name\": \"Leopold\",\n", + " \"last_name\": \"Jarred\",\n", + " \"email\": \"ljarred4@wp.com\",\n", + " \"gender\": \"Agender\",\n", + " \"ip_address\": \"30.119.34.4\"\n", + " },\n", + " {\n", + " \"id\": 6,\n", + " \"first_name\": \"Joanna\",\n", + " \"last_name\": \"Teager\",\n", + " \"email\": \"jteager5@apache.org\",\n", + " \"gender\": \"Bigender\",\n", + " \"ip_address\": \"245.221.176.34\"\n", + " },\n", + " {\n", + " \"id\": 7,\n", + " \"first_name\": \"Lion\",\n", + " \"last_name\": \"Beere\",\n", + " \"email\": \"lbeere6@bloomberg.com\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"105.54.139.46\"\n", + " },\n", + " {\n", + " \"id\": 8,\n", + " \"first_name\": \"Marabel\",\n", + " \"last_name\": \"Wornum\",\n", + " \"email\": \"mwornum7@posterous.com\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"247.229.14.25\"\n", + " },\n", + " {\n", + " \"id\": 9,\n", + " \"first_name\": \"Helenka\",\n", + " \"last_name\": \"Mullender\",\n", + " \"email\": \"hmullender8@cloudflare.com\",\n", + " \"gender\": \"Non-binary\",\n", + " \"ip_address\": \"133.216.118.88\"\n", + " },\n", + " {\n", + " \"id\": 10,\n", + " \"first_name\": \"Christine\",\n", + " \"last_name\": \"Swane\",\n", + " \"email\": \"cswane9@shop-pro.jp\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"86.16.210.164\"\n", + " }\n", + "]'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple JSON Documents - One Attribute\n", + "\n", + "Here is an example of representing multiple JSON documents as part of single document with single attribute where value is of type JSON Array." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "persons = '''{\n", + " \"results\": [\n", + " {\n", + " \"id\": 1,\n", + " \"first_name\": \"Frasco\",\n", + " \"last_name\": \"Necolds\",\n", + " \"email\": \"fnecolds0@vk.com\",\n", + " \"gender\": \"Male\",\n", + " \"ip_address\": \"243.67.63.34\"\n", + " },\n", + " {\n", + " \"id\": 2,\n", + " \"first_name\": \"Dulce\",\n", + " \"last_name\": \"Santos\",\n", + " \"email\": \"dsantos1@mashable.com\",\n", + " \"gender\": \"Female\",\n", + " \"ip_address\": \"60.30.246.227\"\n", + " },\n", + " {\n", + " \"id\": 3,\n", + " \"first_name\": \"Prissie\",\n", + " \"last_name\": \"Tebbett\",\n", + " \"email\": \"ptebbett2@infoseek.co.jp\",\n", + " \"gender\": \"Genderfluid\",\n", + " \"ip_address\": \"22.21.162.56\"\n", + " },\n", + " {\n", + " \"id\": 4,\n", + " \"first_name\": \"Schuyler\",\n", + " \"last_name\": \"Coppledike\",\n", + " \"email\": \"scoppledike3@gnu.org\",\n", + " \"gender\": \"Agender\",\n", + " \"ip_address\": \"120.35.186.161\"\n", + " },\n", + " {\n", + " \"id\": 5,\n", + " \"first_name\": \"Leopold\",\n", + " \"last_name\": \"Jarred\",\n", + " \"email\": \"ljarred4@wp.com\",\n", + " \"gender\": \"Agender\",\n", + " \"ip_address\": \"30.119.34.4\"\n", + " },\n", + " {\n", + " \"id\": 6,\n", + " \"first_name\": \"Joanna\",\n", + " \"last_name\": \"Teager\",\n", + " \"email\": \"jteager5@apache.org\",\n", + " \"gender\": \"Bigender\",\n", + " \"ip_address\": \"245.221.176.34\"\n", + " },\n", + " {\n", + " \"id\": 7,\n", + " \"first_name\": \"Lion\",\n", + " \"last_name\": \"Beere\",\n", + " \"email\": \"lbeere6@bloomberg.com\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"105.54.139.46\"\n", + " },\n", + " {\n", + " \"id\": 8,\n", + " \"first_name\": \"Marabel\",\n", + " \"last_name\": \"Wornum\",\n", + " \"email\": \"mwornum7@posterous.com\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"247.229.14.25\"\n", + " },\n", + " {\n", + " \"id\": 9,\n", + " \"first_name\": \"Helenka\",\n", + " \"last_name\": \"Mullender\",\n", + " \"email\": \"hmullender8@cloudflare.com\",\n", + " \"gender\": \"Non-binary\",\n", + " \"ip_address\": \"133.216.118.88\"\n", + " },\n", + " {\n", + " \"id\": 10,\n", + " \"first_name\": \"Christine\",\n", + " \"last_name\": \"Swane\",\n", + " \"email\": \"cswane9@shop-pro.jp\",\n", + " \"gender\": \"Polygender\",\n", + " \"ip_address\": \"86.16.210.164\"\n", + " }\n", + " ]\n", + "}'''" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/01_processing_rest_payloads.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/01_processing_rest_payloads.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/01_processing_rest_payloads.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/01_processing_rest_payloads.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payload/02_overview_of_rest_apis.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/02_overview_of_rest_apis.ipynb new file mode 100644 index 0000000..7f54260 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payload/02_overview_of_rest_apis.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a94e5edb", + "metadata": {}, + "source": [ + "## Overview of REST APIs\n", + "\n", + "Let us get an overview of REST APIs. REST based APIs are extensively used for building large scale applications.\n", + "* REST stands for **Representational State Transfer**.\n", + "* It is extensively used to build modern applications as it simplifies the process of building complex and large scale applications.\n", + "* Here are the use cases where REST APIs are extensively used.\n", + " * Mobile applications\n", + " * Web applications\n", + " * Data integration between multiple applications\n", + "* For mobile and web applications, we typically develop frontend and backend separately and integrate them over REST.\n", + "* Here are the commonly used REST based requests.\n", + " * GET - Get the data from the backend\n", + " * POST - Insert or update the data in the backend\n", + " * PUT - Update the data in the backend\n", + " * DELETE - Delete the data in the backend\n", + "* As part of data engineering projects, we typically use REST to get the data from external applications.\n", + "* If required, we can take care of authentication and authorization by leveraging **Headers** while placing the requests.\n", + "* Let us see an example using GitHub REST APIs. As a developer you might want to do the following:\n", + " * Get your repositories or public repositories. You can place **GET** request from external applications.\n", + " * Create a new repository under your GitHub Account. You can place **POST** request from external application.\n", + " * Delete an existing repository under your GitHub Account. You can place **DELETE** request form external application.\n", + "* Here are different ways you can validate REST APIs without getting into programming.\n", + " * Using browser (it might not work for complex requests)\n", + " * Using `curl` command.\n", + " * Using a client based utility called as **postman**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40c1f1f1", + "metadata": {}, + "outputs": [], + "source": [ + "!curl https://api.github.com" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/03_using_curl_command.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/03_using_curl_command.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/03_using_curl_command.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/03_using_curl_command.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payload/04_overview_of_postman.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/04_overview_of_postman.ipynb new file mode 100644 index 0000000..f464469 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payload/04_overview_of_postman.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7d76eed4", + "metadata": {}, + "source": [ + "## Overview of Postman\n", + "\n", + "**Postman** is the most popular way of exploring REST APIs with out getting into programming nuances. Let us get an overview of **postman**.\n", + "* You can use postman via browser, however we recommend to download and install the client utility.\n", + "* Here are the some of the key features related to Postman.\n", + " * It is a freemium product, and provide lot of features at no cost.\n", + " * Grouping related APIs to collections.\n", + " * Passing values dynamically using variables. Variables can be global or collection level.\n", + " * Ability to see the direct curl command as well as examples using standard programming languages.\n", + " * Ability to automate the execution.\n", + "* Here is an example of making `GET` call via `requests` module using Python as programming language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f9228be", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "url = \"https://api.github.com/users/dgadiraju/repos\"\n", + "\n", + "payload={}\n", + "headers = {}\n", + "\n", + "response = requests.request(\"GET\", url, headers=headers, data=payload)\n", + "\n", + "print(response.text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/05_getting_started_with_requests.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/05_getting_started_with_requests.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/05_getting_started_with_requests.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/05_getting_started_with_requests.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payload/06_convert_rest_payload_to_python_objects.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/06_convert_rest_payload_to_python_objects.ipynb new file mode 100644 index 0000000..e017f9c --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payload/06_convert_rest_payload_to_python_objects.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3d620962", + "metadata": {}, + "source": [ + "## Convert REST Payload to Python Objects\n", + "\n", + "Let us understand how we can convert REST `GET` Payload to appropriate Python Objects.\n", + "* We have already seen that the `payload` is of type `str`. Also, it contains valid `json`. It can be a simple JSON Document or JSON Array.\n", + "\n", + "Here are the steps involved in converting REST Payload to Python Objects.\n", + "* Invoke REST API by passing valid URI. It will create response object.\n", + "* Using response object, we can invoke `json()`. It will return Python dict or Python list. The list typically contain dicts.\n", + "* Once it is converted to Python dict or list we can further process data using appropriate Python APIs as per our requirements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25d5dc09", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "bf8853b9", + "metadata": {}, + "source": [ + "* Example for simple JSON to `dict`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae053a12", + "metadata": {}, + "outputs": [], + "source": [ + "!curl https://api.github.com" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a20a697", + "metadata": {}, + "outputs": [], + "source": [ + "payload = requests.get('https://api.github.com').content.decode('utf-8')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "770ef3cc", + "metadata": {}, + "outputs": [], + "source": [ + "payload # A string with valid json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68ef6c2d", + "metadata": {}, + "outputs": [], + "source": [ + "payload_dict = requests.get('https://api.github.com').json() # returns Python dict directly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feb32d50", + "metadata": {}, + "outputs": [], + "source": [ + "type(payload_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b64df6d8", + "metadata": {}, + "outputs": [], + "source": [ + "payload_dict # It is of type dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a83aa377", + "metadata": {}, + "outputs": [], + "source": [ + "payload_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4e82a79", + "metadata": {}, + "outputs": [], + "source": [ + "payload_dict.values()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44f920da", + "metadata": {}, + "outputs": [], + "source": [ + "payload_dict['current_user_url']" + ] + }, + { + "cell_type": "markdown", + "id": "a8025353", + "metadata": {}, + "source": [ + "* Example for JSON Array to `list`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04a7b1e0", + "metadata": {}, + "outputs": [], + "source": [ + "!curl https://api.github.com/users/dgadiraju/repos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b43a124", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6f0e3e0", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"https://api.github.com/users/dgadiraju/repos\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cedb27c", + "metadata": {}, + "outputs": [], + "source": [ + "type(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d074b50", + "metadata": {}, + "outputs": [], + "source": [ + "response = requests.get(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dc44eca", + "metadata": {}, + "outputs": [], + "source": [ + "type(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7ef4492", + "metadata": {}, + "outputs": [], + "source": [ + "response.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06383f47", + "metadata": {}, + "outputs": [], + "source": [ + "repos = response.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68de2aa0", + "metadata": {}, + "outputs": [], + "source": [ + "type(repos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b3c7a8b", + "metadata": {}, + "outputs": [], + "source": [ + "len(repos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2aed3841", + "metadata": {}, + "outputs": [], + "source": [ + "repos[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "125cdb97", + "metadata": {}, + "outputs": [], + "source": [ + "# Get names of the repos\n", + "list(map(lambda repo: repo['name'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea81ea7f", + "metadata": {}, + "outputs": [], + "source": [ + "list(map(lambda repo: (repo['name'], repo['url']), repos))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/07_process_rest_payload_using_collection_operations.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/07_process_rest_payload_using_collection_operations.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/07_process_rest_payload_using_collection_operations.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/07_process_rest_payload_using_collection_operations.ipynb diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/08_process_rest_payload_using_pandas.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/08_process_rest_payload_using_pandas.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/08_process_rest_payload_using_pandas.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/08_process_rest_payload_using_pandas.ipynb diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payload/09_exercise_rest_payload_to_db_table.ipynb similarity index 97% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payload/09_exercise_rest_payload_to_db_table.ipynb index 103d164..e894e18 100644 --- a/itversity-material/01-python-and-sql/34_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb +++ b/itversity-material/01-python-and-sql/35_processing_rest_payload/09_exercise_rest_payload_to_db_table.ipynb @@ -3,7 +3,10 @@ { "cell_type": "markdown", "id": "49ef1bd2", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ "## REST Payload to DB Table\n", "\n", diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/01_processing_rest_payloads.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/01_processing_rest_payloads.ipynb new file mode 100644 index 0000000..d5a6190 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/01_processing_rest_payloads.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "940360a2-b36c-4d23-a7da-c050741700f6", + "metadata": {}, + "source": [ + "## Processing REST Payloads\n", + "\n", + "As part of this section or module, we will go through the details related to Processing REST Payloads using Python.\n", + "\n", + "* Overview of REST APIs\n", + "* Using curl command\n", + "* Overview of Postman\n", + "* Getting Started with requests\n", + "* Convert REST Payload to Python Objects\n", + "* Process REST Payload using Collection Operations\n", + "* Process REST Payload using Pandas\n", + "* Exercise - REST Payload to DB Table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09e1a1ff-09a1-43f0-8186-f4cd0a9eca1d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/02_overview_of_rest_apis.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/02_overview_of_rest_apis.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/02_overview_of_rest_apis.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payloads/02_overview_of_rest_apis.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/03_using_curl_command.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/03_using_curl_command.ipynb new file mode 100644 index 0000000..1a8304a --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/03_using_curl_command.ipynb @@ -0,0 +1,69 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7313f4fc", + "metadata": {}, + "source": [ + "## Using curl command\n", + "\n", + "Let us understand how we can use curl command to place REST requests to get the response from the external applications.\n", + "* `curl` is available in almost all operating sytems. It is a command which can be leveraged to talk to external applications via REST APIs.\n", + "* We can make all standard REST API calls using `curl`.\n", + "* At times we have to get authenticated or authorized to use REST API calls and for that we typically have to pass tokens as part of headers.\n", + "* We can use `curl` when authentication or authorization is required. We need to understand how to pass tokens or credentials while using `curl` command.\n", + "* We can pass any request type (such as GET, POST, etc) to `curl`. The default is `GET`.\n", + "* Here is an example of simple `GET` using `curl`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6dd88b2", + "metadata": {}, + "outputs": [], + "source": [ + "!curl https://api.github.com" + ] + }, + { + "cell_type": "markdown", + "id": "8c4c44b0", + "metadata": {}, + "source": [ + "* We have to use `curl -X` to pass request type. Here is an example where we are explicitly passing `GET`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e795498", + "metadata": {}, + "outputs": [], + "source": [ + "!curl -X GET https://api.github.com" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/04_overview_of_postman.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/04_overview_of_postman.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/04_overview_of_postman.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payloads/04_overview_of_postman.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/05_getting_started_with_requests.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/05_getting_started_with_requests.ipynb new file mode 100644 index 0000000..ca63f32 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/05_getting_started_with_requests.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "35e6c185", + "metadata": {}, + "source": [ + "## Getting Started with requests\n", + "\n", + "Let us get started with requests module. You might have to install `requests` using `pip`.\n", + "* `requests` is part of many other 3rd party libraries. You can check whether it is already installed or not." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2704820a", + "metadata": {}, + "outputs": [], + "source": [ + "!pip list|grep requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1f3642c", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install requests" + ] + }, + { + "cell_type": "markdown", + "id": "e7b57748", + "metadata": {}, + "source": [ + "* You need to import before you start using it as part of your Python program." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83edb882", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "2762a87f", + "metadata": {}, + "source": [ + "* Here are some of the important functions from `requests` module we use on regular basis. The below functions are to support common REST API calls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7ad6570", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39557b2e", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"https://api.github.com/users/dgadiraju/repos\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac708cf7", + "metadata": {}, + "outputs": [], + "source": [ + "type(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2234e45", + "metadata": {}, + "outputs": [], + "source": [ + "payload = {}\n", + "headers = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d55e91ac", + "metadata": {}, + "outputs": [], + "source": [ + "requests.request?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc0c35a2", + "metadata": {}, + "outputs": [], + "source": [ + "response = requests.request(\"GET\", url, headers=headers, data=payload)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dbedfb3", + "metadata": {}, + "outputs": [], + "source": [ + "type(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a02db6c", + "metadata": {}, + "outputs": [], + "source": [ + "response.text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57d81e08", + "metadata": {}, + "outputs": [], + "source": [ + "response.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2715c86e", + "metadata": {}, + "outputs": [], + "source": [ + "response.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9506b112", + "metadata": {}, + "outputs": [], + "source": [ + "response.content.decode('utf-8')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33a0ba4c", + "metadata": {}, + "outputs": [], + "source": [ + "requests.get?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b32670b9", + "metadata": {}, + "outputs": [], + "source": [ + "requests.post?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "817941b5", + "metadata": {}, + "outputs": [], + "source": [ + "requests.put?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80517899", + "metadata": {}, + "outputs": [], + "source": [ + "requests.delete?" + ] + }, + { + "cell_type": "markdown", + "id": "7666e574", + "metadata": {}, + "source": [ + "* As part of the Data Engineering projects, we typically read the data from REST APIs. It means we will be using functions such as `requests.get` more often than others.\n", + "* Here is a quick example. We will deep dive as part of subsequent topics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf5deaf6", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d0dd6cf", + "metadata": {}, + "outputs": [], + "source": [ + "res = requests.get('https://api.github.com')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15b676c8", + "metadata": {}, + "outputs": [], + "source": [ + "res.text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f027fc7", + "metadata": {}, + "outputs": [], + "source": [ + "res.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "977bf6ce", + "metadata": {}, + "outputs": [], + "source": [ + "res.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f860eb27", + "metadata": {}, + "outputs": [], + "source": [ + "type(res.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae780467", + "metadata": {}, + "outputs": [], + "source": [ + "gitapi = requests.get('https://api.github.com').text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ae5e52a", + "metadata": {}, + "outputs": [], + "source": [ + "gitapi # It is of type string. This string is a valid JSON" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/itversity-material/01-python-and-sql/34_processing_rest_payloads/06_convert_rest_payload_to_python_objects.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/06_convert_rest_payload_to_python_objects.ipynb similarity index 100% rename from itversity-material/01-python-and-sql/34_processing_rest_payloads/06_convert_rest_payload_to_python_objects.ipynb rename to itversity-material/01-python-and-sql/35_processing_rest_payloads/06_convert_rest_payload_to_python_objects.ipynb diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/07_process_rest_payload_using_collection_operations.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/07_process_rest_payload_using_collection_operations.ipynb new file mode 100644 index 0000000..ee1ca9c --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/07_process_rest_payload_using_collection_operations.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9183e0ae", + "metadata": {}, + "source": [ + "## Process REST Payload using Collection Operations\n", + "\n", + "Let us understand how to process REST Payload using Collection Operations.\n", + "* We can get details about all the public repositories using `GET /repositories` from **https://api.github.com**.\n", + "* As it is getting or reading data from external application the details are available via `GET`. We will have JSON Array as part of the Payload.\n", + "* We can convert this JSON Array to Python `list`. Each element in the list will be of type `dict`.\n", + "* Let us understand how the data in this list of dicts can be processed using Python core collection operations." + ] + }, + { + "cell_type": "markdown", + "id": "2f351c1a", + "metadata": {}, + "source": [ + "* Let us quickly review the output of the REST API using `curl`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9744fa0c", + "metadata": {}, + "outputs": [], + "source": [ + "!curl https://api.github.com/repositories" + ] + }, + { + "cell_type": "markdown", + "id": "0ff5a9c8", + "metadata": {}, + "source": [ + "* We can get the payload of public repositories using `requests.get`.\n", + "* We can convert to Python list using `json()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32c36cbf", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "5b7ba2a7", + "metadata": {}, + "source": [ + "* We can convert `payload` which is of string type and contains valid JSON to `dict` or `list` using `json` module." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23c020af", + "metadata": {}, + "outputs": [], + "source": [ + "payload = requests.get('https://api.github.com/repositories', params={'since':369}).json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fec5efe", + "metadata": {}, + "outputs": [], + "source": [ + "payload" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b3fa093", + "metadata": {}, + "outputs": [], + "source": [ + "since = int(input('Enter the repo id from which you want to get repositories: '))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97253344", + "metadata": {}, + "outputs": [], + "source": [ + "repos = requests.get(f'https://api.github.com/repositories?since={since}').json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2e3ed98", + "metadata": {}, + "outputs": [], + "source": [ + "type(repos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bafc363b", + "metadata": {}, + "outputs": [], + "source": [ + "repos # A string with valid json array converted to list of dicts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4b161e7", + "metadata": {}, + "outputs": [], + "source": [ + "len(repos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02572965", + "metadata": {}, + "outputs": [], + "source": [ + "repos[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8c04d51", + "metadata": {}, + "outputs": [], + "source": [ + "type(repos[0])" + ] + }, + { + "cell_type": "markdown", + "id": "536507fa", + "metadata": {}, + "source": [ + "* We can process the data further using appropriate Python modules based upon the requiements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1263bd57", + "metadata": {}, + "outputs": [], + "source": [ + "for repo in repos:\n", + " print(repo['id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e298fdb", + "metadata": {}, + "outputs": [], + "source": [ + "for repo in repos:\n", + " print(repo['name'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f2cc465", + "metadata": {}, + "outputs": [], + "source": [ + "# Getting repo name and urls\n", + "for repo in repos:\n", + " print(f\"{repo['name']}:{repo['url']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90f22c6c", + "metadata": {}, + "outputs": [], + "source": [ + "repo_urls = [{'name': repo['name'], 'repo_url': repo['url']} for repo in repos]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97f45669", + "metadata": {}, + "outputs": [], + "source": [ + "repo_urls[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d45a649e", + "metadata": {}, + "outputs": [], + "source": [ + "repo_urls = list(map(lambda repo: {'name': repo['name'], 'repo_url': repo['url']}, repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17269061", + "metadata": {}, + "outputs": [], + "source": [ + "repo_urls[0]" + ] + }, + { + "cell_type": "markdown", + "id": "1fd6b0a1", + "metadata": {}, + "source": [ + "Here are some of the tasks you can work on using `repos` data. We will explore the solutions using functions such as `map`, `filter`, `itertools.groupby`, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f38274e", + "metadata": {}, + "outputs": [], + "source": [ + "repos = requests.get(f'https://api.github.com/repositories?since={since}').json()" + ] + }, + { + "cell_type": "markdown", + "id": "bf9172a3", + "metadata": {}, + "source": [ + "* Get number of repositories." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1d08f73", + "metadata": {}, + "outputs": [], + "source": [ + "len(repos)" + ] + }, + { + "cell_type": "markdown", + "id": "9b64b152", + "metadata": {}, + "source": [ + "* Get repository name, url and owner type of all repositories. Each element in the new list should be of type **tuple**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b2fc06f", + "metadata": {}, + "outputs": [], + "source": [ + "repo = repos[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36e92114", + "metadata": {}, + "outputs": [], + "source": [ + "repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "798e3330", + "metadata": {}, + "outputs": [], + "source": [ + "repo['name']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3cc464b", + "metadata": {}, + "outputs": [], + "source": [ + "repo['url']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb651bdc", + "metadata": {}, + "outputs": [], + "source": [ + "repo['owner']['type']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a14a1651", + "metadata": {}, + "outputs": [], + "source": [ + "list(map(lambda repo: (repo['name'], repo['url'], repo['owner']['type']), repos))" + ] + }, + { + "cell_type": "markdown", + "id": "0bd94857", + "metadata": {}, + "source": [ + "* Get all unique or distinct owner types of the repositories. The output should be of type **list**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e127b9dd", + "metadata": {}, + "outputs": [], + "source": [ + "list(map(lambda repo: repo['owner']['type'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ea7b0a2", + "metadata": {}, + "outputs": [], + "source": [ + "set(map(lambda repo: repo['owner']['type'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17cb1797", + "metadata": {}, + "outputs": [], + "source": [ + "list(set(map(lambda repo: repo['owner']['type'], repos)))" + ] + }, + { + "cell_type": "markdown", + "id": "3c4a5033", + "metadata": {}, + "source": [ + "* Get number of repositories where owner type is **User**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3bc4ff1", + "metadata": {}, + "outputs": [], + "source": [ + "repo['owner']['type'] == 'User'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19f9ab90", + "metadata": {}, + "outputs": [], + "source": [ + "list(filter(lambda repo: repo['owner']['type'] == 'User', repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1dc5bd46", + "metadata": {}, + "outputs": [], + "source": [ + "len(list(filter(lambda repo: repo['owner']['type'] == 'User', repos)))" + ] + }, + { + "cell_type": "markdown", + "id": "851a7675", + "metadata": {}, + "source": [ + "* Get number of repositories where owner type is **Organization**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01104e75", + "metadata": {}, + "outputs": [], + "source": [ + "len(list(filter(lambda repo: repo['owner']['type'] == 'Organization', repos)))" + ] + }, + { + "cell_type": "markdown", + "id": "e40bec78", + "metadata": {}, + "source": [ + "* Get number of repositories by each owner type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4593bb6e", + "metadata": {}, + "outputs": [], + "source": [ + "import itertools as iter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e62efcc6", + "metadata": {}, + "outputs": [], + "source": [ + "list(map(lambda repo: repo['owner']['type'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19825aa5", + "metadata": {}, + "outputs": [], + "source": [ + "sorted(map(lambda repo: repo['owner']['type'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a93bf413", + "metadata": {}, + "outputs": [], + "source": [ + "repo_types = sorted(map(lambda repo: repo['owner']['type'], repos))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2326f009", + "metadata": {}, + "outputs": [], + "source": [ + "repo_types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f030057", + "metadata": {}, + "outputs": [], + "source": [ + "iter.groupby(repo_types)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e4a616a", + "metadata": {}, + "outputs": [], + "source": [ + "for item in iter.groupby(repo_types):\n", + " print((item[0], list(item[1])))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28c9ce7a", + "metadata": {}, + "outputs": [], + "source": [ + "list(map(lambda item: (item[0], len(list(item[1]))), iter.groupby(repo_types)))" + ] + }, + { + "cell_type": "markdown", + "id": "df4cc326", + "metadata": {}, + "source": [ + "* Sort the data by owner type and then by id. Ensure that data is sorted by id as numeric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4572d62", + "metadata": {}, + "outputs": [], + "source": [ + "repo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f75e377a", + "metadata": {}, + "outputs": [], + "source": [ + "type(repo['id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6ccaced", + "metadata": {}, + "outputs": [], + "source": [ + "sorted(repos, key=lambda repo: (repo['owner']['type'], repo['id']))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/08_process_rest_payload_using_pandas.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/08_process_rest_payload_using_pandas.ipynb new file mode 100644 index 0000000..7534c04 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/08_process_rest_payload_using_pandas.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "55d17ade", + "metadata": {}, + "source": [ + "## Process REST Payload using pandas\n", + "\n", + "Let us understand how to process REST Payload using Pandas Dataframe APIs.\n", + "* We can get details about all the public repositories using `GET /repositories` from **https://api.github.com**.\n", + "* As it is getting or reading data from external application the details are available via `GET`. We will have JSON Array as part of the Payload.\n", + "* We can convert this JSON Array to Python `list`. Each element in the list will be of type `dict`.\n", + "* We can apply `pandas.json_normalize` to get flattened Dataframe by passing this list of dicts.\n", + "* Let us understand how the data in this Pandas Dataframe can be processed using appropriate Pandas APIs as per our requirements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99a58a77", + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f19fa2d6", + "metadata": {}, + "outputs": [], + "source": [ + "payload = requests.get('https://api.github.com/repositories').json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6f0ed4f", + "metadata": {}, + "outputs": [], + "source": [ + "type(payload)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e48bbd58", + "metadata": {}, + "outputs": [], + "source": [ + "payload # A list which contain dicts" + ] + }, + { + "cell_type": "markdown", + "id": "abcc7d08", + "metadata": {}, + "source": [ + "* Now we can convert the list returned to pandas dataframe using `json_normalize`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d313faa", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a605b934", + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame(payload)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c583cfb", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df = pd.json_normalize(payload)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a765b023", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c8d05f", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad0e9ae3", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be930e8", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c5ebdd9", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.count()" + ] + }, + { + "cell_type": "markdown", + "id": "123f6402", + "metadata": {}, + "source": [ + "Here are some of the tasks you can work on using `repos_df` data. We will explore the solutions using Pandas APIs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03ea8756", + "metadata": {}, + "outputs": [], + "source": [ + "since = 369" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "693264ba", + "metadata": {}, + "outputs": [], + "source": [ + "repos = requests.get(f'https://api.github.com/repositories?since={since}').json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c2b3762", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df = pd.json_normalize(repos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e83df71b", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df" + ] + }, + { + "cell_type": "markdown", + "id": "7f10b485", + "metadata": {}, + "source": [ + "* Get number of repositories." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e6ebe92", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f23e5e83", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.shape[0]" + ] + }, + { + "cell_type": "markdown", + "id": "062055d7", + "metadata": {}, + "source": [ + "* Get repository name, url and owner type of all repositories." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64e05f64", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a61ccb32", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df[['name', 'url', 'owner.type']]" + ] + }, + { + "cell_type": "markdown", + "id": "c3693976", + "metadata": {}, + "source": [ + "* Get all unique or distinct owner types of the repositories. The output should be of type **list**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0097df85", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df['owner.type']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c353387", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df['owner.type'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aa8a1a9", + "metadata": {}, + "outputs": [], + "source": [ + "list(repos_df['owner.type'].unique())" + ] + }, + { + "cell_type": "markdown", + "id": "840fa522", + "metadata": {}, + "source": [ + "* Get number of repositories where owner type is **User**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1db28303", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df['owner.type'] == 'User'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93946fbd", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df[repos_df['owner.type'] == 'User']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4085db02", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df[repos_df['owner.type'] == 'User'].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa992add", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df[repos_df['owner.type'] == 'User'].shape[0]" + ] + }, + { + "cell_type": "markdown", + "id": "83cc7bc8", + "metadata": {}, + "source": [ + "* Get number of repositories where owner type is **Organization**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41cafdc9", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df[repos_df['owner.type'] == 'Organization'].shape[0]" + ] + }, + { + "cell_type": "markdown", + "id": "074e27a0", + "metadata": {}, + "source": [ + "* Get number of repositories by each owner type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63ee17dd", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.groupby('owner.type')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "987a438c", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.groupby('owner.type')['owner.type'].count()" + ] + }, + { + "cell_type": "markdown", + "id": "7d8d97bc", + "metadata": {}, + "source": [ + "* Sort the data by owner type and then by id. Ensure that data is sorted by id as numeric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0646785", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a80b7d94", + "metadata": {}, + "outputs": [], + "source": [ + "repos_df.sort_values(by=['owner.type', 'id']).head(10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/itversity-material/01-python-and-sql/35_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb b/itversity-material/01-python-and-sql/35_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb new file mode 100644 index 0000000..e894e18 --- /dev/null +++ b/itversity-material/01-python-and-sql/35_processing_rest_payloads/09_exercise_rest_payload_to_db_table.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "49ef1bd2", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## REST Payload to DB Table\n", + "\n", + "Let us perform an exercise to get the REST Payload into a database table.\n", + "* REST API URL: https://gbfs.citibikenyc.com/gbfs/en/station_information.json\n", + "* Database Name: **{username}_sms_db**\n", + "* First Table Name: **stations**\n", + "* Create table for following fields. Make sure to use appropriate data types\n", + "* You can change eightd_station_services as delimited string before loading the data\n", + "\n", + "\n", + " * id - Sequence generated primary key\n", + " * station_id - Uniqueness needs to be enforced.\n", + " * station_type\n", + " * name\n", + " * short_name\n", + " * capacity\n", + " * external_id\n", + " * has_kiosk\n", + " * legacy_id\n", + " * region_id\n", + " * electric_bike_surcharge_waiver\n", + " * eightd_station_services\n", + "* Get the data from the REST payload into the table **stations** created.\n", + "* Run queries for following scenarios.\n", + " * Get distinct station types.\n", + " * Get number of stations per region_id.\n", + " * Get top 10 stations by capacity.\n", + " * Get number of stations where there are no kiosks.\n", + "* Second Table Name: **station_rental_types**\n", + "* Create table with following fields\n", + " * station_id\n", + " * rental_type - the source field is of type list. The target column in the table should be of type VARCHAR.\n", + " * station_rental_type_id - sequence generated primary key.\n", + " * Combination of station_id and rental_type is supposed to be unique.\n", + "* For all station ids where there is one or more rental_types, the data should be inserted into the table separately with rental_type.\n", + "* Sample input record `{'station_id': 1, 'rental_types': ['KEY', 'CREDIT CARD]}`\n", + "* Sample data in the table\n", + "\n", + "|station_id|rental_type|\n", + "|---|---|\n", + "|1|KEY|\n", + "|1|CREDIT CARD|\n", + "\n", + "* Run queries for following scenarios.\n", + " * Get number of records from **station_rental_types**\n", + " * Get number of stations where rental_type is **KEY**\n", + " * Get number of stations where rental_type is **CREDIT CARD**\n", + " * Get number stations by rental_type.\n", + " * Get the stations where there is no rental type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7eb31689-7cb1-4269-9346-ea00f43ade13", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}