-
Notifications
You must be signed in to change notification settings - Fork 0
/
cidades-brasileiras.py
36 lines (27 loc) · 1.1 KB
/
cidades-brasileiras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
"""Getting Brazilian cities and their respective state through web scraping.
This script builds a csv file and prints data on the screen.
I cannot guarantee the data is updated. Currently the script brings
a total of 5531 cities.
Source of data: https://www.globo.com/
"""
import requests
from bs4 import BeautifulSoup
__author__ = "Alberto Kato"
alphabet = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z')
count = 1
header = "cidade,estado\n"
f = open("cidades-brasileiras.csv", "w", encoding="utf-8")
f.write(header)
for letter in alphabet:
r = requests.get("http://g1.globo.com/cidade/indice/" + letter + ".html").content
soup = BeautifulSoup(r, 'html.parser')
cities = soup.find_all("h3", class_="glb-index-item-text")
if cities:
for city in cities:
cityName = city.contents[0].text.strip()
state = city.contents[1].text.strip()
f.write(cityName + "," + state + "\n")
print(count, cityName, state)
count += 1
f.close()