Skip to content

Commit

Permalink
Merge pull request #8 from uupers/nodeapp
Browse files Browse the repository at this point in the history
failed to fetch info bug -> app
  • Loading branch information
emptymalei authored Feb 23, 2018
2 parents c5a405d + 3b53f03 commit 7635919
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 21 deletions.
47 changes: 27 additions & 20 deletions app/bilicrawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ const packageArray = packageId => rangeArray(packageId * 1000 + 1, (packageId +
const nowstr = () => moment().format('YYYY-MM-DD HH:mm:ss')
// mids:待处理mid列表,
const packageFetchInsertAsync = async (pid, mids) => {
const BAN_IP_SLEEP_MS = 1000 * 60 * 10; // 10min
const NORMAL_SLEEP_MS = 200; //ms
let sleepms = NORMAL_SLEEP_MS

const midSize = mids.length
let cardList = []
let loopCount = 0
Expand All @@ -43,29 +47,34 @@ const packageFetchInsertAsync = async (pid, mids) => {
// 循环两遍未结束,强行退出
if (loopCount > midSize * 2) break
let mid = mids.pop();
try {
fetchUserInfo(mid).then(rs => {
if (rs) {
const data = JSON.parse(rs).data;
data.card.mid = mid;
data.card.archive_count = data.archive_count;
data.card.ctime = nowstr()
cardList.push(data.card);
} else {
fetchUserInfo(mid).then(rs => {
if (rs) {
if (rs.indexOf('DOCTYPE html') >= 0) {
sleepms = BAN_IP_SLEEP_MS //IP进小黑屋了
mids.push(mid)
console.error(`${nowstr()} oops,你的IP进小黑屋了,爬虫程序会在半小时后继续`)
return
}
}).catch(err => {
const data = JSON.parse(rs).data;
data.card.mid = mid;
data.card.archive_count = data.archive_count;
data.card.ctime = nowstr()
cardList.push(data.card);
} else {
mids.push(mid)
console.error(`mid=${mid}`, err)
});

} catch (error) {
}
}).catch(err => {
mids.push(mid)
console.error(`mid=${mid}`, error)
console.error(`${nowstr()} mid=${mid}`, err)
});
// 这里多使用一个变量,防止在sleep过程中sleepms值发生改变
const trueSleepTime = sleepms
await sleep(trueSleepTime)
if (trueSleepTime === BAN_IP_SLEEP_MS){
break // 结束本次任务,尝试下个任务
}
await sleep(210) //ms
}
// await sleep(1000)
await sleep(5000)
if (cardList.length === midSize) {
await uploadPackageAsync(pid, cardList)
console.log(`${nowstr()} Send package ${pid}`);
Expand All @@ -85,9 +94,8 @@ const run = async () => {

const mids = packageArray(pid)
console.log(`${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`);

logit.innerHTML += `${nowstr()} Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length-1]}]`;
logit.innerHTML += "<br>";
logit.innerHTML += "<br>";

await packageFetchInsertAsync(pid, mids)
}
Expand All @@ -98,5 +106,4 @@ const run = async () => {
// start code
// run();


document.querySelector('#btn-run').addEventListener('click', run)
2 changes: 1 addition & 1 deletion app/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7635919

Please sign in to comment.