Skip to content

Commit

Permalink
爬取笔趣阁小说:宠魅
Browse files Browse the repository at this point in the history
  • Loading branch information
Balabili committed Jul 17, 2017
1 parent be6ef81 commit 8b6e435
Show file tree
Hide file tree
Showing 3 changed files with 422 additions and 0 deletions.
66 changes: 66 additions & 0 deletions app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
const http = require('http'),
fs = require('fs'),
cheerio = require('cheerio'),
iconv = require('iconv-lite'),
async = require('async'),
url = 'http://www.cangqionglongqi.com/chongmei/';
let urls = [], text = [];
fs.statSync('novel') || fs.mkdirSync('novel');
let outStream = fs.createWriteStream(__dirname + '/novel/chongmei.txt', { flags: 'r+' });

process.on('uncaughtException', function (err) {
console.log('Caught exception: ' + err);
});

function getAllTitleUrls() {
http.get(url, function (res) {
let chunks = [];
res.on('data', function (chunk) {
chunks.push(chunk);
});
res.on('end', function () {
let html = iconv.decode(Buffer.concat(chunks), 'gb2312'),
$ = cheerio.load(html, { decodeEntities: false }),
allList = $("#list dd");
for (let i = 0; i < allList.length; i++) {
urls.push(url + allList[i].childNodes[0].attribs.href);
}
async.eachLimit(urls, 1, function (url, callback) {
getText(url, callback);
}, function () {
console.log('Finish.');
});
});
});
};
function getText(url, callback) {
// 并发连接数的计数器
var concurrencyCount = 0;
// delay 的值在 2000 以内,是个随机的整数
var delay = parseInt((Math.random() * 10000000) % 2000, 10);
concurrencyCount++;
console.log('现在的并发数是', concurrencyCount, ',正在抓取的是', url, ',耗时' + delay + '毫秒');
http.get(url, function (res) {
let chunks = [];
res.on('data', function (chunk) {
chunks.push(chunk);
});
res.on('end', function () {
let html = iconv.decode(Buffer.concat(chunks), 'gb2312'),
$ = cheerio.load(html, { decodeEntities: false }),
txt = $("#content").html().replace(/\<br\>/g, '\n');
text.push(txt);
outStream.write(txt);
setTimeout(function () {
concurrencyCount--;
callback(null, url + ' html content');
}, delay);
});
});
};

function main() {
getAllTitleUrls();
}

main();
Loading

0 comments on commit 8b6e435

Please sign in to comment.