Skip to content

Commit

Permalink
Merge pull request #7 from DoubleShift/master
Browse files Browse the repository at this point in the history
fix rssspider garble , fixed #6
  • Loading branch information
流星狂飙 committed Oct 26, 2015
2 parents f048f7b + 5b86863 commit 30180f8
Showing 1 changed file with 70 additions and 18 deletions.
88 changes: 70 additions & 18 deletions lib/rss.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ FeedParser = require('feedparser'),
_ = require('lodash'),
request = require('request'),
read = require('node-readability'),
iconv = require('iconv-lite'),
es = require('event-stream'),
postOptions = ['title', 'description', 'summary', 'date', 'link',
'guid', 'author', 'comments', 'origlink', 'image', 'source', 'categories', 'enclosures'],
siteInfoOption = ['title', 'description', 'date', 'link', 'xmlurl', 'author', 'favicon', 'copyright', 'generator', 'image'];
Expand All @@ -25,38 +27,71 @@ function fetchRss(url, options) {
options = options || postOptions;

return new Promise(function (resolve, reject) {
var posts;
var posts,encoding;
var req = request(url, {timeout: 10000, pool: false});
req.setMaxListeners(50);
req.setHeader('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36')
req.setHeader('accept', 'text/html,application/xhtml+xml');

var feedparser = new FeedParser();

req.on('error', reject);

req.on('response', function (res) {
var stream = this;
posts = [];
if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
var stream = this;
posts = [];

if (res.statusCode !== 200) {
return this.emit('error', new Error('Bad status code'));
}


}).pipe(es.through(function(data) {

//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0">
//then convert gb2312,gbk,big5 etc to utf-8

var result = data.toString('utf-8');

var meta = result.match(/<\?(.*?)\?>/g);
if(meta !== null){
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g);
encoding = meta.toString().split('"')[1];
}
//charset = getParams(res.headers['content-type'] || '').charset;
stream.pipe(feedparser);
});

//iconv-lite , which can support windows
result = iconv.decode(data,encoding);
this.emit('data', result);
})).pipe(feedparser);



feedparser.on('error', reject);

feedparser.on('end', function (err) {
if (err) {
reject(err);
}
resolve(posts);
if (err) {
reject(err);
}

resolve(posts);

});

feedparser.on('readable', function () {
while (post = this.read()) {
var post = _.pick(post, options);
posts.push(post);//添加到数组
}

while (post = this.read()) {
var post = _.pick(post, options);

posts.push(post);
}


});
});
}



/**
* get website info
* @param url
Expand All @@ -80,8 +115,25 @@ function siteInfo(url, options) {
return this.emit('error', new Error('Bad status code'));
}
//charset = getParams(res.headers['content-type'] || '').charset;
stream.pipe(feedparser);
});
// stream.pipe(feedparser);
}).pipe(es.through(function(data) {

//get charset from <?xml version="1.0" encoding="gb2312"?><rss version="2.0">
//then convert gb2312,gbk,big5 etc to utf-8

var result = data.toString('utf-8');

var meta = result.match(/<\?(.*?)\?>/g);
if(meta !== null){
meta = meta[0].toString().match(/encoding="(.*?)"\?>/g);
encoding = meta.toString().split('"')[1];
}

//iconv-lite , which can support windows
result = iconv.decode(data,encoding);
this.emit('data', result);
})).pipe(feedparser);

feedparser.on('error', reject);
feedparser.on('end', function (err) {
if (err) {
Expand Down

0 comments on commit 30180f8

Please sign in to comment.