Skip to content

Commit

Permalink
20150315
Browse files Browse the repository at this point in the history
  • Loading branch information
hyq1860 committed Mar 15, 2015
1 parent 5e3abb7 commit 068557c
Show file tree
Hide file tree
Showing 12 changed files with 273 additions and 104 deletions.
3 changes: 2 additions & 1 deletion NightmareAllInOne/NightmareAllInOne.njsproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>505f1567-3092-4dba-bebd-8880128e289c</ProjectGuid>
<ProjectHome>.</ProjectHome>
<StartupFile>jdgatherproduct.js</StartupFile>
<StartupFile>jdlistinfo.js</StartupFile>
<StartWebBrowser>False</StartWebBrowser>
<SearchPath>
</SearchPath>
Expand Down Expand Up @@ -50,6 +50,7 @@
<Compile Include="jdd.js" />
<Compile Include="jdc.js" />
<Compile Include="app3.js" />
<Compile Include="jdlistinfo.js" />
<Compile Include="jdlogin.js" />
<Compile Include="jicheng.js" />
<Compile Include="log4js.js" />
Expand Down
77 changes: 60 additions & 17 deletions NightmareAllInOne/daemon.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ function Deamon(modulePath, args, options) {

//子进程发送的时间戳
this._timestamp = null;
//子进程发送的进程id
this._pid = 0;
//检查心跳失败次数
this._fail = 0;
this.init();
Expand All @@ -57,7 +59,7 @@ Deamon.prototype= {


(function run() {
debug("进程准备启动");
//debug("进程准备启动");
self._cp = child_process.fork(self._modulePath, self._args, self._option);
self._cpid = self._cp.pid;

Expand All @@ -77,26 +79,30 @@ Deamon.prototype= {
self._cp = null;
self._cpid = 0;
}
} else{
run();
} else {
setTimeout(run, 5000);
//run();
}
});

self._cp.on("close", function (code, signal) {
debug("进程close");
//debug("进程close");
});

self._cp.on("message", function (message) {
debug("message:"+message.Timestamp);
//debug("message:"+message.Timestamp);
//心跳
if (message.Timestamp) {
//self._heartbeat=
debug("on message:" + message.Timestamp);
console.log(message.Timestamp);
debug("时间:" + new Date(message.Timestamp));

//debug("时间:" + new Date(message.Timestamp));
//时间戳
self._timestamp = new Date(message.Timestamp);


self._pid = message.PhantomjdPid || 0;
//子进程id
debug("紫荆城id:" + self._pid);
debug(self._timestamp == null);
}
});
Expand All @@ -112,9 +118,21 @@ Deamon.prototype= {
debug("deamon stop");
this._kill = true;
this._cp.disconnect();

this._cp.kill('SIGQUIT');
child_process.exec('taskkill /IM phantomjs.exe /f /t', function (err, stdout, stderr) {
if (err) {
debug("杀死进程haha" + err);
} else {
debug("杀死进程成功");
}
});




//child_process.spawn("taskkill", ["/im", 'phantomjs.exe', '/f', '/t']);
//child_process.spawn("taskkill", ["/pid", this._pid, '/f', '/t']);
this._cp = null;
this._cpid = 0;
}
Expand All @@ -126,7 +144,31 @@ Deamon.prototype= {
debug("deamon force stop");
this._cp.kill('SIGKILL');
this._kill = true;
child_process.exec("kill -9" + this._cpid);
child_process.exec('taskkill /IM phantomjs.exe /f /t', function (err, stdout, stderr) {
if (err) {
debug("杀死进程失败" + err);
} else {
debug("杀死进程成功");
}
});

/*
child_process.exec("kill -9" + this._cpid,function(err, stdout, stderr) {
if (err) {
debug("杀死进程" + err);
} else {
debug("杀死进程成功");
}
});
*/
//



//child_process.exec("taskkill /IM phantomjs.exe /f /t");
//var result = child_process.spawn("taskkill", ["/im", 'phantomjs.exe', '/f', '/t']);
//console.log(result);
//child_process.spawn("taskkill", ["/pid", this._pid, '/f', '/t']);
this._cp = null;
this._cpid = 0;
}
Expand Down Expand Up @@ -162,18 +204,19 @@ Deamon.prototype= {
*/

debug("检查心跳:" + deamon._cpid);
var flag = deamon._timestamp != null && deamon._timestamp.dateDiff('s', new Date()) > 30;
//debug("检查心跳:" + deamon._cpid);
var flag = deamon._timestamp != null && deamon._timestamp.dateDiff('s', new Date()) > 20;
if (deamon._timestamp == null) {
deamon._fail++;
if (deamon._fail > 5) {
if (deamon._fail > 4) {
flag = true;
}
}
if (flag) {
console.log("子程序心跳异常");
debug("子程序心跳异常");

deamon.stopHeartbeat();
//deamon.stop();
deamon.forceStop();
setTimeout(function () {
console.log("deamon.init()");
Expand All @@ -182,15 +225,15 @@ Deamon.prototype= {

} else {
if (deamon._timestamp != null) {
debug("时间间隔:" + deamon._timestamp.dateDiff('s', new Date()));
//debug("时间间隔:" + deamon._timestamp.dateDiff('s', new Date()));
}
debug("心跳检查失败次数:"+ deamon._fail);
//debug("心跳检查失败次数:"+ deamon._fail);
//debug("检查心跳deamon._timestamp:" + deamon._timestamp);
}
}

//20秒检查一次
deamon._heartbeat = setInterval(checkDeamon, 4000);
deamon._heartbeat = setInterval(checkDeamon, 10000);
},
//停止心跳
stopHeartbeat:function() {
Expand Down
170 changes: 87 additions & 83 deletions NightmareAllInOne/jdgatherproduct.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//http://git.oschina.net/dreamidea/neocrawler
var dao = require('./mysqldao');
var logger = require('./log4js').logger('jdgather');

var statrDate = new Date();
var moment = require('moment');
var debug = require('debug')('spider');
var Nightmare = require('nightmare');
Expand All @@ -27,7 +27,7 @@ var underscore = require("underscore")._;

dao.getCategory().then(function (data) {
debug("data.length:"+ data.length);
underscore.each(data, function (item, index) {
underscore.each(data, function(item, index) {
debug("item:" + item);
//console.log(index + ":" + item);
//var total = parseInt(item.PageInfo);
Expand All @@ -38,95 +38,99 @@ dao.getCategory().then(function (data) {
for (var i = startPageIndex; i <= item.PageInfo; i++) {
myScrape
.goto(item.ItemUrl + "?page=" + i)
.wait(Math.random()*2000)
.evaluate(function (params, pageIndex) {
var flag = false;
var items = $('.gl-item');
if (items.length == 0) {
items = $('.list-h > li'); //http://list.jd.com/list.html?cat=737,1277,3979 普通的
}
if (items.length == 0) {
items = $('#plist > .item'); //http://list.jd.com/1713-3295-6954.html 书籍的
flag = true;
}
var data = [];
items.each(function (index, object) {
var imageUrl = "";
var productName = "";
var price = "";
var remark = "";
var sku = "";
var self = $(object);
//图片
var ahref = self.find('.p-img').find('a');
//名称
var name = self.find('.p-name').eq(0).find('a').find('em');
//价格
var priceDom = self.find('.p-price');
//sku
sku = priceDom.attr("data-sku");
var url = ahref.attr('href');
productName = name.text();

price = priceDom.find('strong').eq(0).text().replace('¥', '');
//imageUrl = self.find('.p-img').find('a').find('img')[0].outerHTML;
//严重注意 延迟加载 尽然没有获取到src
imageUrl = self.find('.p-img').find('a').find('img').attr("data-lazy-img");
if (flag) {
remark = self.find('.summary-grade').find('a').text().replace('(已有', '').replace('评价)', '');
} else {
remark = self.find('.extra').find('a').text().replace('(已有', '').replace('评价)', '');
.wait(Math.random() * 3000)
.evaluate(function(params, pageIndex) {
var flag = false;
var items = $('.gl-item');
if (items.length == 0) {
items = $('.list-h > li'); //http://list.jd.com/list.html?cat=737,1277,3979 普通的
}
if (remark == "") {
remark = self.find(".p-commit").find('a').text();
if (items.length == 0) {
items = $('#plist > .item'); //http://list.jd.com/1713-3295-6954.html 书籍的
flag = true;
}

data.push({ name: productName, price: price, remark: remark, sku: sku, img: imageUrl, url: url });
});

return { parent: params, pageIndex: pageIndex, data: data };
}, function (result) {
//console.log(result.parent);
//console.log(result.pageIndex + "数据");
if (result != null && result.data != null && result.data.length > 0) {
//logger.info("url:"+ result.data[0].url+"\n"+"数据length:"+ result.data.length);
}
var data = [];
items.each(function(index, object) {
var imageUrl = "";
var productName = "";
var price = "";
var remark = "";
var sku = "";
var self = $(object);
//图片
var ahref = self.find('.p-img').find('a');
//名称
var name = self.find('.p-name').eq(0).find('a').find('em');
//价格
var priceDom = self.find('.p-price');
//sku
sku = priceDom.attr("data-sku");
var url = ahref.attr('href');
productName = name.text();

//发送心跳

try {
process.send({ Timestamp: new Date() });
} catch (e) {
logger.error(e);
}

var products = [];

for (var i = 0; i < result.data.length; i++) {
var item = result.data[i];

//console.log(item.price);
try {
if (item.price.indexOf('¥') > 0) {
logger.error(item);
price = priceDom.find('strong').eq(0).text().replace('¥', '');
//imageUrl = self.find('.p-img').find('a').find('img')[0].outerHTML;
//严重注意 延迟加载 尽然没有获取到src
imageUrl = self.find('.p-img').find('a').find('img').attr("data-lazy-img");
if (flag) {
remark = self.find('.summary-grade').find('a').text().replace('(已有', '').replace('评价)', '');
} else {
remark = self.find('.extra').find('a').text().replace('(已有', '').replace('评价)', '');
}
if (isNaN(parseFloat(item.price,10))) {
item.price = 0;
if (remark == "") {
remark = self.find(".p-commit").find('a').text();
}

data.push({ name: productName, price: price, remark: remark, sku: sku, img: imageUrl, url: url });
});

return { parent: params, pageIndex: pageIndex,html: document.documentElement.innerHTML, data: data };
}, function(result) {
//发送心跳
try {
process.send({ Timestamp: new Date(), PhantomjdPid: myScrape.getPhantomjsPid() });
} catch (e) {
logger.error(e);
item.price = 0;
}

//console.log(item.pageIndex);
//db.replaceIntoProductNew(db.guid(), item.sku, "jd", item.name, item.price, item.img, result.parent.Id, '', item.remark);
//dao.addProduct({ LogicId: uuid.v1(), Sku: item.sku, Source: 1, Name: item.name, Price: parseFloat(item.price), ListImage: item.img, Category: result.parent.LogicId });
//debug(item.sku+"\n"+ item.name);
products.push([uuid.v1(), item.sku, 1, item.name, item.price, moment(new Date()).format("YYYY-MM-DD HH:mm:ss"), item.img, result.parent.LogicId]);
}
dao.addProducts(products);
dao.updateJDCategory(result.parent.Id, result.pageIndex);

try {
//console.log(result.parent);
//console.log(result.pageIndex + "数据");
//if (result != null && result.data != null && result.data.length > 0) {
//logger.info("url:"+ result.data[0].url+"\n"+"数据length:"+ result.data.length);
//}

dao.saveHtml({ Url: result.parent.ItemUrl+"?page="+ result.pageIndex, Source: 1, Type: 'list', Content: result.html, InDate: moment(new Date()).format("YYYY-MM-DD HH:mm:ss") });

var products = [];

for (var i = 0; i < result.data.length; i++) {
var item = result.data[i];

//console.log(item.price);
try {
if (item.price.indexOf('¥') > 0) {
logger.error(item);
}
if (isNaN(parseFloat(item.price, 10))) {
item.price = 0;
}
} catch (e) {
logger.error(e);
item.price = 0;
}

//console.log(item.pageIndex);
//db.replaceIntoProductNew(db.guid(), item.sku, "jd", item.name, item.price, item.img, result.parent.Id, '', item.remark);
//dao.addProduct({ LogicId: uuid.v1(), Sku: item.sku, Source: 1, Name: item.name, Price: parseFloat(item.price), ListImage: item.img, Category: result.parent.LogicId });
//debug(item.sku+"\n"+ item.name);
products.push([uuid.v1(), item.sku, 1, item.name, item.price, moment(new Date()).format("YYYY-MM-DD HH:mm:ss"), item.img, result.parent.LogicId]);
}
dao.addProducts(products);
dao.updateJDCategory(result.parent.Id, result.pageIndex);
} catch (e) {
logger.error("result:"+e);
//process.send({ Timestamp: statrDate, PhantomjdPid: myScrape.getPhantomjsPid() });
}
}, item, i);
}
});
Expand Down
Loading

0 comments on commit 068557c

Please sign in to comment.