Skip to content

Commit

Permalink
Added country to import script.
Browse files Browse the repository at this point in the history
  • Loading branch information
endSly committed Feb 22, 2014
1 parent 943d0b5 commit f28322d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 28 deletions.
75 changes: 48 additions & 27 deletions importer/import.js
Original file line number Diff line number Diff line change
@@ -1,46 +1,67 @@
var jsdom = require("jsdom")
, async = require("async")
, csv = require("csv");
, csv = require("csv")
, countries = require('country-data').countries;

var count = 0;

function readPage(page, output, cb) {
var lastPage;

function readPage(page, write, cb) {
jsdom.env(page, ["http://code.jquery.com/jquery.js"], function (err, window) {
count = 0;
var firstItem = window.$('ol li a')[0];
if (firstItem) {
var currentPage = firstItem.innerHTML;
if (currentPage == lastPage)
return cb();

lastPage = currentPage;
}

window.$('ol li a').each(function (i, el) {
output.write([el.innerHTML, window.$(el).attr('href')]);
write(el.innerHTML, window.$(el).attr('href'));
++count;
});
cb();
});
}

function loadList(dom) {
return function (cb) {
var start = 1;
var output = csv().to(dom + ".csv");
process.stdout.write("["+dom+"] ");
async.doUntil(function(cb) {
var page = "http://univ.cc/search.php?dom=" + dom + "&key=&start=" + start;
readPage(page, output, cb);

}, function() {
start += 50;
process.stdout.write('.');
return count == 0;

}, function () {
output.end();
process.stdout.write('\n');
cb();
});
};
var output = csv().to("world-universities.csv");

function loadList(dom, country, cb) {
var total = 0;
var start = 1;
process.stdout.write("["+country+"] ");
async.doUntil(function(cb) {
var page = "http://univ.cc/search.php?dom=" + dom + "&key=&start=" + start;
readPage(page, function (name, url) {
output.write([country, name, url]);
}, cb);

}, function() {
start += 50;
total += count;
process.stdout.write('.');
return count < 50;

}, function () {
process.stdout.write(total + '\n');
cb();
});
}

async.series([
loadList("edu"),
loadList("world")
]);
var countriesCodes = Object.keys(countries);

async.eachSeries(countriesCodes, function(country, cb) {
if (country.length != 2)
return cb();

var dom = country == "US" ? "edu" : country;
loadList(dom.toLowerCase(), country, cb);
}, function() {
output.end();
});



3 changes: 2 additions & 1 deletion importer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"dependencies" : {
"jsdom" : "0.10.1",
"async" : "*",
"csv" : "*"
"csv" : "*",
"country-data" : "*"
},
"license": "MIT",
"engines": {
Expand Down

0 comments on commit f28322d

Please sign in to comment.