-
-
Notifications
You must be signed in to change notification settings - Fork 33
/
utils.py
48 lines (34 loc) · 1.23 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import io
import zipfile
from csv import Dialect
from unicodedata import normalize
from rows.fields import DateField
from tqdm import tqdm
class TSEDialect(Dialect):
"CSV dialect to read files from Tribunal Superior Eleitoral"
delimiter = ";"
doublequote = True
escapechar = None
lineterminator = "\n"
quotechar = '"'
quoting = 0
skipinitialspace = False
class PtBrDateField(DateField):
INPUT_FORMAT = "%d/%m/%Y"
def unaccent(text):
return normalize("NFKD", text).encode("ascii", errors="ignore").decode("ascii")
def merge_zipfiles(filename1, filename2):
with zipfile.ZipFile(filename1, 'a') as zip1:
zip2 = zipfile.ZipFile(filename2, 'r')
for filename in tqdm(zip2.namelist(), desc=" Merging zip files..."):
zip1.writestr(filename, zip2.open(filename).read())
class FixQuotes(io.TextIOWrapper):
def readline(self, *args, **kwargs):
data = super().readline(*args, **kwargs)
if data.endswith('\r\n'):
newline = '\r\n'
elif data.endswith('\n'):
newline = '\n'
if '";"' in data and not data.startswith('"') and not data.endswith('"'):
data = '"' + data[:- len(newline)] + '"' + newline
return data