-
Notifications
You must be signed in to change notification settings - Fork 25
/
helpers_transform_csv_into_files.py
64 lines (52 loc) · 1.76 KB
/
helpers_transform_csv_into_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import sys
import re
import io
import six
print(sys.argv)
class transformCSV:
def __init__(self):
print("Transform module initialized!")
def read_input(self):
rawlines = io.open(sys.argv[1], mode="r", encoding="utf-8").readlines()
# # first line is the header
# linesize = len(re.split(r'\t+', rawlines[0]))
# print("linesize", linesize)
for i in range(1, len(rawlines)):
line = rawlines[i]
print(line)
phrases = re.split(r'\t+', line)
# I use the email as the name of the file
email = ""
for phrase in phrases:
if (phrase and isinstance(phrase, six.string_types) and phrase.find("@") >= 0):
email = phrase.lower()
print(email)
of = open("source/" + email + ".txt", 'w')
of.write(line.encode('UTF-8'))
of.close()
def main(self):
self.read_input()
# oscores = open('scores.txt','w')
# filex = self.allfiles('dest')
# lenn = len(filex)
# total = lenn*lenn
# for g in range(lenn):
# file1 = 'dest/' + filex[g]
# for h in range(lenn):
# prog = (g*lenn+h)/total
# file2 = 'dest/' + filex[h]
# if file1!=file2:
# scr = self.matchscore(file1,file2)
# fx1 = self.find_fileid(file1)
# fx2 = self.find_fileid(file2)
# oscores.write(fx1)
# oscores.write(" ")
# oscores.write(fx2)
# oscores.write(" ")
# oscores.write(str(scr))
# #print(fx1,fx2,scr)
# oscores.write("\n")
# self.drawProgressBar(prog)
if __name__ == '__main__':
distx = transformCSV()
distx.main()