Skip to content

Commit 08fcfcd

Browse files
committedAug 22, 2021
finished the project structure and set up basics
1 parent 968f916 commit 08fcfcd

30 files changed

+79247
-9418
lines changed
 

‎.ipynb_checkpoints/Beautiful_soup_workflow-checkpoint.ipynb

+3,308-1,838
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Pulling data from public APIs (without registration) - GET request"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"# loading the packages\n",
17+
"# requests provides us with the capabilities of sending an HTTP request to a server\n",
18+
"import requests"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Extracting data on currency exchange rates"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 2,
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"# We will use an API containing currency exchange rates as published by the European Central Bank\n",
35+
"# Documentation at https://exchangeratesapi.io"
36+
]
37+
},
38+
{
39+
"cell_type": "markdown",
40+
"metadata": {},
41+
"source": [
42+
"### Sending a GET request"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": 3,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"# Define the base URL\n",
52+
"# Base URL: the part of the URL common to all requests, not containing the parameters\n",
53+
"base_url = \"https://api.exchangeratesapi.io/latest\""
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": 4,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"# We can make a GET request to this API endpoint with requests.get\n",
63+
"response = requests.get(base_url)\n",
64+
"\n",
65+
"# This method returns the response from the server\n",
66+
"# We store this response in a variable for future processing"
67+
]
68+
},
69+
{
70+
"cell_type": "markdown",
71+
"metadata": {},
72+
"source": [
73+
"### Investigating the response"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": 5,
79+
"metadata": {},
80+
"outputs": [
81+
{
82+
"data": {
83+
"text/plain": [
84+
"True"
85+
]
86+
},
87+
"execution_count": 5,
88+
"metadata": {},
89+
"output_type": "execute_result"
90+
}
91+
],
92+
"source": [
93+
"# Checking if the request went through ok\n",
94+
"response.ok"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": 6,
100+
"metadata": {},
101+
"outputs": [
102+
{
103+
"data": {
104+
"text/plain": [
105+
"200"
106+
]
107+
},
108+
"execution_count": 6,
109+
"metadata": {},
110+
"output_type": "execute_result"
111+
}
112+
],
113+
"source": [
114+
"# Checking the status code of the response\n",
115+
"response.status_code"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": 7,
121+
"metadata": {},
122+
"outputs": [
123+
{
124+
"data": {
125+
"text/plain": [
126+
"'{\"rates\":{\"CAD\":1.5613,\"HKD\":8.9041,\"ISK\":145.0,\"PHP\":58.013,\"DKK\":7.4695,\"HUF\":336.25,\"CZK\":25.504,\"AUD\":1.733,\"RON\":4.8175,\"SEK\":10.7203,\"IDR\":16488.05,\"INR\":84.96,\"BRL\":5.4418,\"RUB\":85.1553,\"HRK\":7.55,\"JPY\":117.12,\"THB\":36.081,\"CHF\":1.0594,\"SGD\":1.5841,\"PLN\":4.3132,\"BGN\":1.9558,\"TRY\":7.0002,\"CNY\":7.96,\"NOK\":10.89,\"NZD\":1.8021,\"ZAR\":18.2898,\"USD\":1.1456,\"MXN\":24.3268,\"ILS\":4.0275,\"GBP\":0.87383,\"KRW\":1374.71,\"MYR\":4.8304},\"base\":\"EUR\",\"date\":\"2020-03-09\"}'"
127+
]
128+
},
129+
"execution_count": 7,
130+
"metadata": {},
131+
"output_type": "execute_result"
132+
}
133+
],
134+
"source": [
135+
"# Inspecting the content body of the response (as a regular 'string')\n",
136+
"response.text"
137+
]
138+
},
139+
{
140+
"cell_type": "code",
141+
"execution_count": 8,
142+
"metadata": {},
143+
"outputs": [
144+
{
145+
"data": {
146+
"text/plain": [
147+
"b'{\"rates\":{\"CAD\":1.5613,\"HKD\":8.9041,\"ISK\":145.0,\"PHP\":58.013,\"DKK\":7.4695,\"HUF\":336.25,\"CZK\":25.504,\"AUD\":1.733,\"RON\":4.8175,\"SEK\":10.7203,\"IDR\":16488.05,\"INR\":84.96,\"BRL\":5.4418,\"RUB\":85.1553,\"HRK\":7.55,\"JPY\":117.12,\"THB\":36.081,\"CHF\":1.0594,\"SGD\":1.5841,\"PLN\":4.3132,\"BGN\":1.9558,\"TRY\":7.0002,\"CNY\":7.96,\"NOK\":10.89,\"NZD\":1.8021,\"ZAR\":18.2898,\"USD\":1.1456,\"MXN\":24.3268,\"ILS\":4.0275,\"GBP\":0.87383,\"KRW\":1374.71,\"MYR\":4.8304},\"base\":\"EUR\",\"date\":\"2020-03-09\"}'"
148+
]
149+
},
150+
"execution_count": 8,
151+
"metadata": {},
152+
"output_type": "execute_result"
153+
}
154+
],
155+
"source": [
156+
"# Inspecting the content of the response (in 'bytes' format)\n",
157+
"response.content"
158+
]
159+
},
160+
{
161+
"cell_type": "code",
162+
"execution_count": 9,
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"# The data is presented in JSON format"
167+
]
168+
},
169+
{
170+
"cell_type": "markdown",
171+
"metadata": {},
172+
"source": [
173+
"### Handling the JSON"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": 10,
179+
"metadata": {},
180+
"outputs": [
181+
{
182+
"data": {
183+
"text/plain": [
184+
"{'rates': {'CAD': 1.5613,\n",
185+
" 'HKD': 8.9041,\n",
186+
" 'ISK': 145.0,\n",
187+
" 'PHP': 58.013,\n",
188+
" 'DKK': 7.4695,\n",
189+
" 'HUF': 336.25,\n",
190+
" 'CZK': 25.504,\n",
191+
" 'AUD': 1.733,\n",
192+
" 'RON': 4.8175,\n",
193+
" 'SEK': 10.7203,\n",
194+
" 'IDR': 16488.05,\n",
195+
" 'INR': 84.96,\n",
196+
" 'BRL': 5.4418,\n",
197+
" 'RUB': 85.1553,\n",
198+
" 'HRK': 7.55,\n",
199+
" 'JPY': 117.12,\n",
200+
" 'THB': 36.081,\n",
201+
" 'CHF': 1.0594,\n",
202+
" 'SGD': 1.5841,\n",
203+
" 'PLN': 4.3132,\n",
204+
" 'BGN': 1.9558,\n",
205+
" 'TRY': 7.0002,\n",
206+
" 'CNY': 7.96,\n",
207+
" 'NOK': 10.89,\n",
208+
" 'NZD': 1.8021,\n",
209+
" 'ZAR': 18.2898,\n",
210+
" 'USD': 1.1456,\n",
211+
" 'MXN': 24.3268,\n",
212+
" 'ILS': 4.0275,\n",
213+
" 'GBP': 0.87383,\n",
214+
" 'KRW': 1374.71,\n",
215+
" 'MYR': 4.8304},\n",
216+
" 'base': 'EUR',\n",
217+
" 'date': '2020-03-09'}"
218+
]
219+
},
220+
"execution_count": 10,
221+
"metadata": {},
222+
"output_type": "execute_result"
223+
}
224+
],
225+
"source": [
226+
"# Requests has in-build method to directly convert the response to JSON format\n",
227+
"response.json()"
228+
]
229+
},
230+
{
231+
"cell_type": "code",
232+
"execution_count": 11,
233+
"metadata": {},
234+
"outputs": [
235+
{
236+
"data": {
237+
"text/plain": [
238+
"dict"
239+
]
240+
},
241+
"execution_count": 11,
242+
"metadata": {},
243+
"output_type": "execute_result"
244+
}
245+
],
246+
"source": [
247+
"# In Python, this JSON is stored as a dictionary\n",
248+
"type(response.json())"
249+
]
250+
},
251+
{
252+
"cell_type": "code",
253+
"execution_count": 12,
254+
"metadata": {},
255+
"outputs": [],
256+
"source": [
257+
"# A useful library for JSON manipulation and pretty print\n",
258+
"import json\n",
259+
"\n",
260+
"# It has two main methods:\n",
261+
"# .loads(), which creates a Python dictionary from a JSON format string (just as response.json() does)\n",
262+
"# .dumps(), which creates a JSON format string out of a Python dictionary "
263+
]
264+
},
265+
{
266+
"cell_type": "code",
267+
"execution_count": 13,
268+
"metadata": {},
269+
"outputs": [
270+
{
271+
"data": {
272+
"text/plain": [
273+
"'{\\n \"rates\": {\\n \"CAD\": 1.5613,\\n \"HKD\": 8.9041,\\n \"ISK\": 145.0,\\n \"PHP\": 58.013,\\n \"DKK\": 7.4695,\\n \"HUF\": 336.25,\\n \"CZK\": 25.504,\\n \"AUD\": 1.733,\\n \"RON\": 4.8175,\\n \"SEK\": 10.7203,\\n \"IDR\": 16488.05,\\n \"INR\": 84.96,\\n \"BRL\": 5.4418,\\n \"RUB\": 85.1553,\\n \"HRK\": 7.55,\\n \"JPY\": 117.12,\\n \"THB\": 36.081,\\n \"CHF\": 1.0594,\\n \"SGD\": 1.5841,\\n \"PLN\": 4.3132,\\n \"BGN\": 1.9558,\\n \"TRY\": 7.0002,\\n \"CNY\": 7.96,\\n \"NOK\": 10.89,\\n \"NZD\": 1.8021,\\n \"ZAR\": 18.2898,\\n \"USD\": 1.1456,\\n \"MXN\": 24.3268,\\n \"ILS\": 4.0275,\\n \"GBP\": 0.87383,\\n \"KRW\": 1374.71,\\n \"MYR\": 4.8304\\n },\\n \"base\": \"EUR\",\\n \"date\": \"2020-03-09\"\\n}'"
274+
]
275+
},
276+
"execution_count": 13,
277+
"metadata": {},
278+
"output_type": "execute_result"
279+
}
280+
],
281+
"source": [
282+
"# .dumps() has options to make the string 'prettier', more readable\n",
283+
"# We can choose the number of spaces to be used as indentation\n",
284+
"json.dumps(response.json(), indent=4)"
285+
]
286+
},
287+
{
288+
"cell_type": "code",
289+
"execution_count": 14,
290+
"metadata": {},
291+
"outputs": [
292+
{
293+
"name": "stdout",
294+
"output_type": "stream",
295+
"text": [
296+
"{\n",
297+
" \"rates\": {\n",
298+
" \"CAD\": 1.5613,\n",
299+
" \"HKD\": 8.9041,\n",
300+
" \"ISK\": 145.0,\n",
301+
" \"PHP\": 58.013,\n",
302+
" \"DKK\": 7.4695,\n",
303+
" \"HUF\": 336.25,\n",
304+
" \"CZK\": 25.504,\n",
305+
" \"AUD\": 1.733,\n",
306+
" \"RON\": 4.8175,\n",
307+
" \"SEK\": 10.7203,\n",
308+
" \"IDR\": 16488.05,\n",
309+
" \"INR\": 84.96,\n",
310+
" \"BRL\": 5.4418,\n",
311+
" \"RUB\": 85.1553,\n",
312+
" \"HRK\": 7.55,\n",
313+
" \"JPY\": 117.12,\n",
314+
" \"THB\": 36.081,\n",
315+
" \"CHF\": 1.0594,\n",
316+
" \"SGD\": 1.5841,\n",
317+
" \"PLN\": 4.3132,\n",
318+
" \"BGN\": 1.9558,\n",
319+
" \"TRY\": 7.0002,\n",
320+
" \"CNY\": 7.96,\n",
321+
" \"NOK\": 10.89,\n",
322+
" \"NZD\": 1.8021,\n",
323+
" \"ZAR\": 18.2898,\n",
324+
" \"USD\": 1.1456,\n",
325+
" \"MXN\": 24.3268,\n",
326+
" \"ILS\": 4.0275,\n",
327+
" \"GBP\": 0.87383,\n",
328+
" \"KRW\": 1374.71,\n",
329+
" \"MYR\": 4.8304\n",
330+
" },\n",
331+
" \"base\": \"EUR\",\n",
332+
" \"date\": \"2020-03-09\"\n",
333+
"}\n"
334+
]
335+
}
336+
],
337+
"source": [
338+
"# In order to visualize these changes, we need to print the string\n",
339+
"print(json.dumps(response.json(), indent=4))"
340+
]
341+
},
342+
{
343+
"cell_type": "code",
344+
"execution_count": 15,
345+
"metadata": {},
346+
"outputs": [
347+
{
348+
"data": {
349+
"text/plain": [
350+
"dict_keys(['rates', 'base', 'date'])"
351+
]
352+
},
353+
"execution_count": 15,
354+
"metadata": {},
355+
"output_type": "execute_result"
356+
}
357+
],
358+
"source": [
359+
"# It contains 3 keys; the value for the 'rates' key is another dictionary\n",
360+
"response.json().keys()"
361+
]
362+
}
363+
],
364+
"metadata": {
365+
"kernelspec": {
366+
"display_name": "Python 3",
367+
"language": "python",
368+
"name": "python3"
369+
},
370+
"language_info": {
371+
"codemirror_mode": {
372+
"name": "ipython",
373+
"version": 3
374+
},
375+
"file_extension": ".py",
376+
"mimetype": "text/x-python",
377+
"name": "python",
378+
"nbconvert_exporter": "python",
379+
"pygments_lexer": "ipython3",
380+
"version": "3.9.6"
381+
}
382+
},
383+
"nbformat": 4,
384+
"nbformat_minor": 2
385+
}

‎.ipynb_checkpoints/Requests-html-package-checkpoint.ipynb

+6,039
Large diffs are not rendered by default.

‎.ipynb_checkpoints/RottenTomatoes_scrap-checkpoint.ipynb

+6,615
Large diffs are not rendered by default.

‎.ipynb_checkpoints/Scraping_HTML_tables-checkpoint.ipynb

+2,844
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "d3368e1e",
6+
"metadata": {},
7+
"source": [
8+
"## Headers"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 1,
14+
"id": "f834e2aa",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import requests"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"id": "58f1de08",
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"headers = {\n",
29+
" \"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"\n",
30+
"}"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 5,
36+
"id": "9139fc98",
37+
"metadata": {},
38+
"outputs": [
39+
{
40+
"data": {
41+
"text/plain": [
42+
"200"
43+
]
44+
},
45+
"execution_count": 5,
46+
"metadata": {},
47+
"output_type": "execute_result"
48+
}
49+
],
50+
"source": [
51+
"r = requests.get(\"https://www.youtube.com/\")\n",
52+
"r.status_code"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 4,
58+
"id": "ce3da4f8",
59+
"metadata": {},
60+
"outputs": [
61+
{
62+
"data": {
63+
"text/plain": [
64+
"200"
65+
]
66+
},
67+
"execution_count": 4,
68+
"metadata": {},
69+
"output_type": "execute_result"
70+
}
71+
],
72+
"source": [
73+
"r = requests.get(\"https://www.youtube.com/\", headers = headers)\n",
74+
"r.status_code"
75+
]
76+
},
77+
{
78+
"cell_type": "markdown",
79+
"id": "7e8e49c2",
80+
"metadata": {},
81+
"source": [
82+
"## Cookies\n",
83+
"\n",
84+
"- authentication cookie (cheking log-in info)"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": 12,
90+
"id": "1f3b499c",
91+
"metadata": {},
92+
"outputs": [
93+
{
94+
"name": "stdout",
95+
"output_type": "stream",
96+
"text": [
97+
"<Response [200]>\n"
98+
]
99+
}
100+
],
101+
"source": [
102+
"### https://docs.python-requests.org/en/master/user/advanced/#session-objects\n",
103+
"\n",
104+
"url_1 = 'https://httpbin.org/cookies/set/sessioncookie/123456789'\n",
105+
"url_2 = 'https://httpbin.org/cookies'\n",
106+
"\n",
107+
"s = requests.Session()\n",
108+
"\n",
109+
"# r_1 = s.post(url_1, data = payload)\n",
110+
"r_1 = s.get(url_1)\n",
111+
"print(r_1)\n",
112+
"r_2 = s.get(url_2)\n",
113+
"\n",
114+
"s.close()"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"id": "49e731fc",
121+
"metadata": {},
122+
"outputs": [],
123+
"source": []
124+
}
125+
],
126+
"metadata": {
127+
"kernelspec": {
128+
"display_name": "Python 3",
129+
"language": "python",
130+
"name": "python3"
131+
},
132+
"language_info": {
133+
"codemirror_mode": {
134+
"name": "ipython",
135+
"version": 3
136+
},
137+
"file_extension": ".py",
138+
"mimetype": "text/x-python",
139+
"name": "python",
140+
"nbconvert_exporter": "python",
141+
"pygments_lexer": "ipython3",
142+
"version": "3.9.6"
143+
}
144+
},
145+
"nbformat": 4,
146+
"nbformat_minor": 5
147+
}

‎.ipynb_checkpoints/edamam_api-checkpoint.ipynb

+982-45
Large diffs are not rendered by default.

‎Beautiful_soup_workflow.ipynb

+3,308-1,838
Large diffs are not rendered by default.

‎EdamamAPI/edamam_api.ipynb

+1,226
Large diffs are not rendered by default.

‎Requests-html-package.ipynb

+6,039
Large diffs are not rendered by default.

‎RottenTomatoesScrap/RottenTomatoes_page_2_html_parser.html

+16,570
Large diffs are not rendered by default.

‎RottenTomatoesScrap/RottenTomatoes_page_2_lxml_parser.html

+16,561
Large diffs are not rendered by default.

‎RottenTomatoesScrap/RottenTomatoes_scrap.ipynb

+6,615
Large diffs are not rendered by default.

‎RottenTomatoesScrap/movies_info.csv

+141
Large diffs are not rendered by default.

‎Scraping_HTML_tables_Pandas.ipynb

+2,844
Large diffs are not rendered by default.

‎Scraping_issues.ipynb

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "d3368e1e",
6+
"metadata": {},
7+
"source": [
8+
"## Headers"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 1,
14+
"id": "f834e2aa",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import requests"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"id": "58f1de08",
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"headers = {\n",
29+
" \"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\"\n",
30+
"}"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 5,
36+
"id": "9139fc98",
37+
"metadata": {},
38+
"outputs": [
39+
{
40+
"data": {
41+
"text/plain": [
42+
"200"
43+
]
44+
},
45+
"execution_count": 5,
46+
"metadata": {},
47+
"output_type": "execute_result"
48+
}
49+
],
50+
"source": [
51+
"r = requests.get(\"https://www.youtube.com/\")\n",
52+
"r.status_code"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": 4,
58+
"id": "ce3da4f8",
59+
"metadata": {},
60+
"outputs": [
61+
{
62+
"data": {
63+
"text/plain": [
64+
"200"
65+
]
66+
},
67+
"execution_count": 4,
68+
"metadata": {},
69+
"output_type": "execute_result"
70+
}
71+
],
72+
"source": [
73+
"r = requests.get(\"https://www.youtube.com/\", headers = headers)\n",
74+
"r.status_code"
75+
]
76+
},
77+
{
78+
"cell_type": "markdown",
79+
"id": "7e8e49c2",
80+
"metadata": {},
81+
"source": [
82+
"## Cookies\n",
83+
"\n",
84+
"- authentication cookie (cheking log-in info)"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": 12,
90+
"id": "1f3b499c",
91+
"metadata": {},
92+
"outputs": [
93+
{
94+
"name": "stdout",
95+
"output_type": "stream",
96+
"text": [
97+
"<Response [200]>\n"
98+
]
99+
}
100+
],
101+
"source": [
102+
"### https://docs.python-requests.org/en/master/user/advanced/#session-objects\n",
103+
"\n",
104+
"url_1 = 'https://httpbin.org/cookies/set/sessioncookie/123456789'\n",
105+
"url_2 = 'https://httpbin.org/cookies'\n",
106+
"\n",
107+
"s = requests.Session()\n",
108+
"\n",
109+
"# r_1 = s.post(url_1, data = payload)\n",
110+
"r_1 = s.get(url_1)\n",
111+
"print(r_1)\n",
112+
"r_2 = s.get(url_2)\n",
113+
"\n",
114+
"s.close()"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"id": "49e731fc",
121+
"metadata": {},
122+
"outputs": [],
123+
"source": []
124+
}
125+
],
126+
"metadata": {
127+
"kernelspec": {
128+
"display_name": "Python 3",
129+
"language": "python",
130+
"name": "python3"
131+
},
132+
"language_info": {
133+
"codemirror_mode": {
134+
"name": "ipython",
135+
"version": 3
136+
},
137+
"file_extension": ".py",
138+
"mimetype": "text/x-python",
139+
"name": "python",
140+
"nbconvert_exporter": "python",
141+
"pygments_lexer": "ipython3",
142+
"version": "3.9.6"
143+
}
144+
},
145+
"nbformat": 4,
146+
"nbformat_minor": 5
147+
}

‎Wiki_response.html ‎data/Wiki_response.html

+1,573-1,362
Large diffs are not rendered by default.

‎data/capuccino_nutrients.csv

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
,label,quantity,unit
2+
ENERC_KCAL,Energy,95.98648385516834,kcal
3+
FAT,Fat,5.060122500480281,g
4+
FASAT,Saturated,2.9016992195063764,g
5+
FATRN,Trans,0.0,g
6+
FAMS,Monounsaturated,1.2561571293507656,g
7+
FAPU,Polyunsaturated,0.31822335002881685,g
8+
CHOCDF,Carbs,7.726159385324722,g
9+
FIBTG,Fiber,0.0,g
10+
SUGAR,Sugars,7.812307269977051,g
11+
PROCNT,Protein,4.894623346619349,g
12+
CHOLE,Cholesterol,15.469915386093172,mg
13+
NA,Sodium,69.04063616020063,mg
14+
CA,Calcium,175.17004386285285,mg
15+
MG,Magnesium,29.86991538609317,mg
16+
K,Potassium,224.90288309642986,mg
17+
FE,Iron,0.06980974615827952,mg
18+
ZN,Zinc,0.5813868692854474,mg
19+
P,Phosphorus,131.20728924318263,mg
20+
VITA_RAE,Vitamin A,71.1616107760286,µg
21+
VITC,Vitamin C,0.036,mg
22+
THIA,Thiamin (B1),0.07134161077602859,mg
23+
RIBF,Riboflavin (B2),0.2933015700249746,mg
24+
NIA,Niacin (B3),1.0749422469362293,mg
25+
VITB6A,Vitamin B6,0.056051695389935416,mg
26+
FOLDFE,Folate equivalent (total),7.914957693046586,µg
27+
FOLFD,Folate (food),7.914957693046586,µg
28+
FOLAC,Folic acid,0.0,µg
29+
VITB12,Vitamin B12,0.6961461923741927,µg
30+
VITD,Vitamin D,2.0110890001921122,µg
31+
TOCPHA,Vitamin E,0.1100894077026522,mg
32+
VITK1,Vitamin K,0.48209746158279515,µg
33+
WATER,Water,153.94036429763912,g

‎data/songs_info.csv

+51
Large diffs are not rendered by default.

‎edamam_api.ipynb

-289
This file was deleted.

‎environment.yml

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
name: web-scraping-env
2+
channels:
3+
- anaconda
4+
- defaults
5+
dependencies:
6+
- argon2-cffi=20.1.0=py39h2bbff1b_1
7+
- async_generator=1.10=pyhd3eb1b0_0
8+
- attrs=21.2.0=pyhd3eb1b0_0
9+
- backcall=0.2.0=pyhd3eb1b0_0
10+
- beautifulsoup4=4.9.3=pyha847dfd_0
11+
- blas=1.0=mkl
12+
- bleach=4.0.0=pyhd3eb1b0_0
13+
- bottleneck=1.3.2=py39h7cc1a96_1
14+
- brotlipy=0.7.0=py39h2bbff1b_1003
15+
- ca-certificates=2020.10.14=0
16+
- certifi=2021.5.30=py39haa95532_0
17+
- cffi=1.14.6=py39h2bbff1b_0
18+
- chardet=4.0.0=py39haa95532_1003
19+
- colorama=0.4.4=pyhd3eb1b0_0
20+
- cryptography=3.4.7=py39h71e12ea_0
21+
- decorator=5.0.9=pyhd3eb1b0_0
22+
- defusedxml=0.7.1=pyhd3eb1b0_0
23+
- entrypoints=0.3=py39haa95532_0
24+
- idna=2.10=pyhd3eb1b0_0
25+
- importlib-metadata=3.10.0=py39haa95532_0
26+
- importlib_metadata=3.10.0=hd3eb1b0_0
27+
- intel-openmp=2021.3.0=haa95532_3372
28+
- ipykernel=5.3.4=py39h7b7c402_0
29+
- ipython=7.26.0=py39hd4e2768_0
30+
- ipython_genutils=0.2.0=pyhd3eb1b0_1
31+
- jedi=0.18.0=py39haa95532_1
32+
- jinja2=3.0.1=pyhd3eb1b0_0
33+
- jsonschema=3.2.0=py_2
34+
- jupyter_client=6.1.12=pyhd3eb1b0_0
35+
- jupyter_core=4.7.1=py39haa95532_0
36+
- jupyterlab_pygments=0.1.2=py_0
37+
- libiconv=1.15=vc14h29686d3_5
38+
- libsodium=1.0.18=h62dcd97_0
39+
- libxml2=2.9.10=hb89e7f3_3
40+
- libxslt=1.1.34=he774522_0
41+
- lxml=4.6.3=py39h9b66d53_0
42+
- m2w64-gcc-libgfortran=5.3.0=6
43+
- m2w64-gcc-libs=5.3.0=7
44+
- m2w64-gcc-libs-core=5.3.0=7
45+
- m2w64-gmp=6.1.0=2
46+
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
47+
- markupsafe=2.0.1=py39h2bbff1b_0
48+
- matplotlib-inline=0.1.2=pyhd3eb1b0_2
49+
- mistune=0.8.4=py39h2bbff1b_1000
50+
- mkl=2021.3.0=haa95532_524
51+
- mkl-service=2.4.0=py39h2bbff1b_0
52+
- mkl_fft=1.3.0=py39h277e83a_2
53+
- mkl_random=1.2.2=py39hf11a4ad_0
54+
- msys2-conda-epoch=20160418=1
55+
- nbclient=0.5.3=pyhd3eb1b0_0
56+
- nbconvert=6.1.0=py39haa95532_0
57+
- nbformat=5.1.3=pyhd3eb1b0_0
58+
- nest-asyncio=1.5.1=pyhd3eb1b0_0
59+
- notebook=6.4.1=py39haa95532_0
60+
- numexpr=2.7.3=py39hb80d3ca_1
61+
- numpy=1.20.3=py39ha4e8547_0
62+
- numpy-base=1.20.3=py39hc2deb75_0
63+
- openssl=1.1.1k=h2bbff1b_0
64+
- packaging=21.0=pyhd3eb1b0_0
65+
- pandas=1.3.1=py39h6214cd6_0
66+
- pandocfilters=1.4.3=py39haa95532_1
67+
- parso=0.8.2=pyhd3eb1b0_0
68+
- pickleshare=0.7.5=pyhd3eb1b0_1003
69+
- pip=21.2.2=py39haa95532_0
70+
- prometheus_client=0.11.0=pyhd3eb1b0_0
71+
- prompt-toolkit=3.0.17=pyh06a4308_0
72+
- pycparser=2.20=py_2
73+
- pygments=2.9.0=pyhd3eb1b0_0
74+
- pyopenssl=20.0.1=pyhd3eb1b0_1
75+
- pyparsing=2.4.7=pyhd3eb1b0_0
76+
- pyrsistent=0.18.0=py39h2bbff1b_0
77+
- pysocks=1.7.1=py39haa95532_0
78+
- python=3.9.6=h6244533_0
79+
- python-dateutil=2.8.2=pyhd3eb1b0_0
80+
- pytz=2021.1=pyhd3eb1b0_0
81+
- pywin32=228=py39hbaba5e8_1
82+
- pywinpty=0.5.7=py39haa95532_0
83+
- pyzmq=20.0.0=py39hd77b12b_1
84+
- requests=2.25.1=pyhd3eb1b0_0
85+
- send2trash=1.5.0=pyhd3eb1b0_1
86+
- setuptools=52.0.0=py39haa95532_0
87+
- six=1.16.0=pyhd3eb1b0_0
88+
- soupsieve=2.2.1=pyhd3eb1b0_0
89+
- sqlite=3.36.0=h2bbff1b_0
90+
- terminado=0.9.4=py39haa95532_0
91+
- testpath=0.5.0=pyhd3eb1b0_0
92+
- tornado=6.1=py39h2bbff1b_0
93+
- traitlets=5.0.5=pyhd3eb1b0_0
94+
- tzdata=2021a=h52ac0ba_0
95+
- urllib3=1.26.6=pyhd3eb1b0_1
96+
- vc=14.2=h21ff451_1
97+
- vs2015_runtime=14.27.29016=h5e58377_2
98+
- wcwidth=0.2.5=py_0
99+
- webencodings=0.5.1=py39haa95532_1
100+
- wheel=0.36.2=pyhd3eb1b0_0
101+
- win_inet_pton=1.1.0=py39haa95532_0
102+
- wincertstore=0.2=py39h2bbff1b_0
103+
- winpty=0.4.3=4
104+
- zeromq=4.3.3=ha925a31_3
105+
- zipp=3.5.0=pyhd3eb1b0_0
106+
- zlib=1.2.11=vc14h1cdd9ab_1
107+
- pip:
108+
- appdirs==1.4.4
109+
- bs4==0.0.1
110+
- cssselect==1.1.0
111+
- fake-useragent==0.1.11
112+
- parse==1.19.0
113+
- pyee==8.2.2
114+
- pyppeteer==0.2.6
115+
- pyquery==1.4.3
116+
- requests-html==0.10.0
117+
- tqdm==4.62.1
118+
- w3lib==1.22.0
119+
- websockets==9.1
120+
prefix: C:\Users\dkarl\anaconda3\envs\web-scraping-env

‎itunes_api_search.ipynb

-3,582
This file was deleted.

‎songs_info.csv

-51
This file was deleted.

‎spec-file.txt

-320
This file was deleted.

‎test.py

-5
This file was deleted.

‎API_tests.ipynb ‎tests/API_tests.ipynb

+27-87
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 2,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -11,7 +11,7 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": 3,
14+
"execution_count": 2,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
@@ -20,7 +20,7 @@
2020
},
2121
{
2222
"cell_type": "code",
23-
"execution_count": 4,
23+
"execution_count": 3,
2424
"metadata": {},
2525
"outputs": [],
2626
"source": [
@@ -29,7 +29,7 @@
2929
},
3030
{
3131
"cell_type": "code",
32-
"execution_count": 5,
32+
"execution_count": 4,
3333
"metadata": {},
3434
"outputs": [
3535
{
@@ -38,7 +38,7 @@
3838
"True"
3939
]
4040
},
41-
"execution_count": 5,
41+
"execution_count": 4,
4242
"metadata": {},
4343
"output_type": "execute_result"
4444
}
@@ -49,7 +49,7 @@
4949
},
5050
{
5151
"cell_type": "code",
52-
"execution_count": 6,
52+
"execution_count": 8,
5353
"metadata": {},
5454
"outputs": [
5555
{
@@ -58,7 +58,7 @@
5858
"200"
5959
]
6060
},
61-
"execution_count": 6,
61+
"execution_count": 8,
6262
"metadata": {},
6363
"output_type": "execute_result"
6464
}
@@ -69,16 +69,16 @@
6969
},
7070
{
7171
"cell_type": "code",
72-
"execution_count": 7,
72+
"execution_count": 9,
7373
"metadata": {},
7474
"outputs": [
7575
{
7676
"data": {
7777
"text/plain": [
78-
"b'{\"rates\":{\"CAD\":1.5563,\"HKD\":9.1885,\"ISK\":164.9,\"PHP\":57.388,\"DKK\":7.4407,\"HUF\":364.17,\"CZK\":27.222,\"AUD\":1.6578,\"RON\":4.874,\"SEK\":10.3618,\"IDR\":17410.24,\"INR\":87.3245,\"BRL\":6.6052,\"RUB\":90.6421,\"HRK\":7.5778,\"JPY\":124.17,\"THB\":37.056,\"CHF\":1.0715,\"SGD\":1.6089,\"PLN\":4.5823,\"BGN\":1.9558,\"TRY\":9.4418,\"CNY\":7.9157,\"NOK\":10.9178,\"NZD\":1.7703,\"ZAR\":19.1905,\"USD\":1.1856,\"MXN\":24.753,\"ILS\":4.0032,\"GBP\":0.90675,\"KRW\":1338.52,\"MYR\":4.9291},\"base\":\"EUR\",\"date\":\"2020-10-23\"}'"
78+
"b'{\\n \"success\": false,\\n \"error\": {\\n \"code\": 101,\\n \"type\": \"missing_access_key\",\\n \"info\": \"You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]\"\\n }\\n}\\n'"
7979
]
8080
},
81-
"execution_count": 7,
81+
"execution_count": 9,
8282
"metadata": {},
8383
"output_type": "execute_result"
8484
}
@@ -89,49 +89,19 @@
8989
},
9090
{
9191
"cell_type": "code",
92-
"execution_count": 8,
92+
"execution_count": 7,
9393
"metadata": {},
9494
"outputs": [
9595
{
9696
"data": {
9797
"text/plain": [
98-
"{'rates': {'CAD': 1.5563,\n",
99-
" 'HKD': 9.1885,\n",
100-
" 'ISK': 164.9,\n",
101-
" 'PHP': 57.388,\n",
102-
" 'DKK': 7.4407,\n",
103-
" 'HUF': 364.17,\n",
104-
" 'CZK': 27.222,\n",
105-
" 'AUD': 1.6578,\n",
106-
" 'RON': 4.874,\n",
107-
" 'SEK': 10.3618,\n",
108-
" 'IDR': 17410.24,\n",
109-
" 'INR': 87.3245,\n",
110-
" 'BRL': 6.6052,\n",
111-
" 'RUB': 90.6421,\n",
112-
" 'HRK': 7.5778,\n",
113-
" 'JPY': 124.17,\n",
114-
" 'THB': 37.056,\n",
115-
" 'CHF': 1.0715,\n",
116-
" 'SGD': 1.6089,\n",
117-
" 'PLN': 4.5823,\n",
118-
" 'BGN': 1.9558,\n",
119-
" 'TRY': 9.4418,\n",
120-
" 'CNY': 7.9157,\n",
121-
" 'NOK': 10.9178,\n",
122-
" 'NZD': 1.7703,\n",
123-
" 'ZAR': 19.1905,\n",
124-
" 'USD': 1.1856,\n",
125-
" 'MXN': 24.753,\n",
126-
" 'ILS': 4.0032,\n",
127-
" 'GBP': 0.90675,\n",
128-
" 'KRW': 1338.52,\n",
129-
" 'MYR': 4.9291},\n",
130-
" 'base': 'EUR',\n",
131-
" 'date': '2020-10-23'}"
98+
"{'success': False,\n",
99+
" 'error': {'code': 101,\n",
100+
" 'type': 'missing_access_key',\n",
101+
" 'info': 'You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]'}}"
132102
]
133103
},
134-
"execution_count": 8,
104+
"execution_count": 7,
135105
"metadata": {},
136106
"output_type": "execute_result"
137107
}
@@ -142,7 +112,7 @@
142112
},
143113
{
144114
"cell_type": "code",
145-
"execution_count": 9,
115+
"execution_count": 10,
146116
"metadata": {},
147117
"outputs": [
148118
{
@@ -151,7 +121,7 @@
151121
"dict"
152122
]
153123
},
154-
"execution_count": 9,
124+
"execution_count": 10,
155125
"metadata": {},
156126
"output_type": "execute_result"
157127
}
@@ -162,7 +132,7 @@
162132
},
163133
{
164134
"cell_type": "code",
165-
"execution_count": 10,
135+
"execution_count": 11,
166136
"metadata": {},
167137
"outputs": [],
168138
"source": [
@@ -179,42 +149,12 @@
179149
"output_type": "stream",
180150
"text": [
181151
"{\n",
182-
" \"rates\": {\n",
183-
" \"CAD\": 1.5563,\n",
184-
" \"HKD\": 9.1885,\n",
185-
" \"ISK\": 164.9,\n",
186-
" \"PHP\": 57.388,\n",
187-
" \"DKK\": 7.4407,\n",
188-
" \"HUF\": 364.17,\n",
189-
" \"CZK\": 27.222,\n",
190-
" \"AUD\": 1.6578,\n",
191-
" \"RON\": 4.874,\n",
192-
" \"SEK\": 10.3618,\n",
193-
" \"IDR\": 17410.24,\n",
194-
" \"INR\": 87.3245,\n",
195-
" \"BRL\": 6.6052,\n",
196-
" \"RUB\": 90.6421,\n",
197-
" \"HRK\": 7.5778,\n",
198-
" \"JPY\": 124.17,\n",
199-
" \"THB\": 37.056,\n",
200-
" \"CHF\": 1.0715,\n",
201-
" \"SGD\": 1.6089,\n",
202-
" \"PLN\": 4.5823,\n",
203-
" \"BGN\": 1.9558,\n",
204-
" \"TRY\": 9.4418,\n",
205-
" \"CNY\": 7.9157,\n",
206-
" \"NOK\": 10.9178,\n",
207-
" \"NZD\": 1.7703,\n",
208-
" \"ZAR\": 19.1905,\n",
209-
" \"USD\": 1.1856,\n",
210-
" \"MXN\": 24.753,\n",
211-
" \"ILS\": 4.0032,\n",
212-
" \"GBP\": 0.90675,\n",
213-
" \"KRW\": 1338.52,\n",
214-
" \"MYR\": 4.9291\n",
215-
" },\n",
216-
" \"base\": \"EUR\",\n",
217-
" \"date\": \"2020-10-23\"\n",
152+
" \"success\": false,\n",
153+
" \"error\": {\n",
154+
" \"code\": 101,\n",
155+
" \"type\": \"missing_access_key\",\n",
156+
" \"info\": \"You have not supplied an API Access Key. [Required format: access_key=YOUR_ACCESS_KEY]\"\n",
157+
" }\n",
218158
"}\n"
219159
]
220160
}
@@ -304,7 +244,7 @@
304244
},
305245
{
306246
"cell_type": "code",
307-
"execution_count": 19,
247+
"execution_count": 13,
308248
"metadata": {},
309249
"outputs": [],
310250
"source": [
@@ -9012,7 +8952,7 @@
90128952
"name": "python",
90138953
"nbconvert_exporter": "python",
90148954
"pygments_lexer": "ipython3",
9015-
"version": "3.8.3"
8955+
"version": "3.9.6"
90168956
}
90178957
},
90188958
"nbformat": 4,
File renamed without changes.

‎Exchange rates API GETting a JSON reply.ipynb ‎tests/Exchange rates API GETting a JSON reply.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@
377377
"name": "python",
378378
"nbconvert_exporter": "python",
379379
"pygments_lexer": "ipython3",
380-
"version": "3.7.3"
380+
"version": "3.9.6"
381381
}
382382
},
383383
"nbformat": 4,
File renamed without changes.

‎tests/itunes_api_search.ipynb

+3,671
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.