@@ -11,74 +11,66 @@ Schema = mongoose.Schema
11
11
path = require " path"
12
12
fs = require " fs"
13
13
csv = require " fast-csv"
14
+ async = require " async"
14
15
15
16
{MongoClient } = require " mongodb"
16
17
17
- # {
18
- # "_id" : ObjectId("526c87bd11ba6145572c3475"),
19
- # "Name" : "周晓晨",
20
- # "CardNo" : "",
21
- # "Descriot" : "",
22
- # "CtfTp" :
23
- # "ID",
24
- # "CtfId" : NumberLong("370682199312040223"),
25
- # "Gender" : "F",
26
- # "Birthday" : NumberLong(19931204),
27
- # "Address" : "",
28
- # "Zip" : "",
29
- # "Dirty" : "F",
30
- # "District1" : "",
31
- # "District2" : "CHN",
32
- # "District3" : 37,
33
- # "District4" : 370682,
34
- # "District5" : "",
35
- # "District6" : "",
36
- # "FirstNm" : "",
37
- # "LastNm" : "",
38
- # "Duty" : "",
39
- # "Mobile" : "",
40
- # "Tel" : "",
41
- # "Fax" : "",
42
- # "EMail" : "",
43
- # "Nation" : "",
44
- # "Taste" : "",
45
- # "Education" : "",
46
- # "Company" : "",
47
- # "CTel" : "",
48
- # "CAddress" : "",
49
- # "CZip" : "",
50
- # "Family" : 0,
51
- # "Version" : "2012-12-16 7:43:30",
52
- # "id" : NumberLong(19720186)
53
- # }
54
-
55
-
56
18
DC_CITYS = [" 北京" ," 南京" ," 上海" ," 汉口" ," 青岛" ," 大连" ," 沈阳" ," 哈尔" ," 西安" ," 天津" ," 重庆" , " 广州" ," 深圳" ," 香港" ," 台湾" ," 澳门" ]
57
19
20
+ COUNT_CSV = 0
21
+ COUNT_INSERTION = 0
22
+ CSV_FILES = []
23
+ CUR_CSV_FILE = null
24
+ DB_COLLECTION = null
58
25
59
26
console .log " [mongo-performance::init] %j" , process .argv
60
27
# pathToCSVFile = path.join __dirname, process.argv[2]
61
28
pathToCSVFile = process .argv [2 ]
62
29
unless fs .existsSync (pathToCSVFile)
63
- console .log " [mongo-performance::init] missing csv sample at:#{ pathToCSVFile} "
30
+ console .log " [mongo-performance::init] missing csv folder at:#{ pathToCSVFile} "
64
31
process .exit (1 )
65
32
33
+
34
+ try
35
+ files = fs .readdirSync pathToCSVFile
36
+ # console.log "[mongo-performance::file] #{files}"
37
+
38
+ for file in files
39
+ # console.log "[mongo-performance::file] #{path.extname(file)}"
40
+ if path .extname (file) is " .csv"
41
+ CSV_FILES .push (path .join (pathToCSVFile, file))
42
+
43
+
44
+ unless CSV_FILES .length > 0
45
+ console .log " [mongo-performance::init] missing csv files at:#{ pathToCSVFile} "
46
+ process .exit (1 )
47
+
48
+ console .log " [mongo-performance::init] CSV files to process:#{ CSV_FILES} "
49
+
66
50
MongoClient .connect " mongodb://127.0.0.1:27017/kf" , (err , db ) ->
67
51
throw err if err
68
- # collection = db.collection("test_insert")
69
- collection = db .collection (" members" )
52
+ # DB_COLLECTION = db.collection("test_insert")
53
+ DB_COLLECTION = db .collection (" members" )
70
54
71
55
console .log " [mongo-performance::init] db is ready"
56
+ async .eachSeries CSV_FILES, parseCSV, (err )->
57
+ if err?
58
+ console .log " [mongo-performance::each csv] error:#{ err} "
59
+ else
60
+ console .log " DONE [mongo-performance::each csv] ALL DONE! csv entry:#{ COUNT_CSV} , db etnry:#{ COUNT_INSERTION} "
72
61
73
- count = 0
74
- countInsert = 0
62
+ parseCSV = (filepath , next )->
75
63
76
64
# init a csv parsing job
77
- job = csv pathToCSVFile ,
65
+ job = csv filepath ,
78
66
headers : true
79
67
68
+ countRead = 0
69
+ countInsert = 0
70
+
80
71
job .on " data" , (data ) ->
81
- ++ count
72
+ ++ COUNT_CSV
73
+ ++ countRead
82
74
delete data[" id" ]
83
75
delete data[" Version" ]
84
76
delete data[" Taste" ]
@@ -112,13 +104,14 @@ MongoClient.connect "mongodb://127.0.0.1:27017/kf", (err, db) ->
112
104
# console.log "[mongo-performance::csv::ondata] province:#{province}, address:#{address}"
113
105
data[" province" ] = province if province?
114
106
115
- console .log " [mongo-performance::csv::ondata] %j \n count:%d" , data, count
116
- collection .insert data, (err , docs ) ->
107
+ # console.log "[mongo-performance::csv::ondata] %j \n count:%d", data, count
108
+ DB_COLLECTION .insert data, (err , docs ) ->
117
109
if err?
118
110
console .log " [mongo-performance::db::insert] #{ err} "
119
111
else
112
+ ++ COUNT_INSERTION
120
113
++ countInsert
121
- console .log " [mongo-performance::db::insert] succeed:%j \n insert count:%d " , docs, countInsert
114
+ console .log " [mongo-performance::db::insert] succeed. ALL csv: #{ COUNT_CSV } , insertion: #{ COUNT_INSERTION } , CUR: csv: #{ countRead } , insert: #{ countInsert} , from: #{ filepath } "
122
115
123
116
return
124
117
@@ -129,12 +122,14 @@ MongoClient.connect "mongodb://127.0.0.1:27017/kf", (err, db) ->
129
122
130
123
job .on " end" , ->
131
124
job .removeAllListeners ()
132
- console .log " [mongo-performance::csv::on end] processed %d records " , count
125
+ console .log " [mongo-performance::csv::on end] complete #{ filepath } "
133
126
# db.close()
134
127
# process.exit(0)
128
+ next ()
135
129
return
136
130
137
131
job .parse ()
132
+ return
138
133
139
134
140
135
0 commit comments