其他分享
首页 > 其他分享> > 通过 phantomjs抓取仁医在线的练习题

通过 phantomjs抓取仁医在线的练习题

作者:互联网

先模拟登录,再按照指定课程挨个去抓取,还有点不完善,会有重复题目出现。

var page = require('webpage').create();
phantom.outputEncoding = 'gbk';

page.settings.userAgent = 'chrome';

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

function getBetween(targetString, beginString, endString) {
    if (!targetString) {
        return targetString;
    }
    if (!beginString && !endString) {
        return targetString;
    }
    if (!beginString) {
        var i = targetString.indexOf(endString);
        if (i < 0) {
            return '';
        }
        return targetString.substring(0, i);
    } else if (!endString) {
        var i = targetString.indexOf(beginString);
        if (i < 0) {
            return '';
        }
        return targetString.substring(i + beginString.length);
    } else {
        var i = targetString.indexOf(beginString);
        if (i < 0) {
            return null;
        }
        var j = targetString.indexOf(endString, i + beginString.length);
        if (j < 0) {
            return null;
        }
        return targetString.substring(i + beginString.length, j);
    }
}

var fs = require('fs');

var entryList = [
2684,230
,2685,145
,2686,235
,2687,237
,2688,224
,2689,117
,2690,120
,2691,79
,2692,80
,2693,40
,2694,70
,2695,80
,2696,40
,2697,38
,2698,90
];

var sId = 2683;

page.open('http://www.renyiwang.net/Mobile/Login.aspx', function(status) {
  if (status !== 'success') {
    console.log('Unable to access Login Page, status is ' + status + '!');
    phantom.exit();
  } else {
    console.log('success to open Login Page, status is ' + status + '!');
    
    page.evaluate(function() {
      document.getElementById('TstNumber').value = "用户名";
      document.getElementById('TstPassword').value = "密码";
      document.getElementById('But_Login').click();
    });

    setTimeout(function() {
        var pageHtml = page.evaluate(function() {
            return document.body.innerHTML;
        });
        if (pageHtml && pageHtml.indexOf("三基培训") > 0) {
            console.log('success to login');

            var captureQuestion = function(entryIndex, qIndex) {
                qIndex++;
                page.open('http://www.renyiwang.net/Mobile/Practice.aspx?o_id=6&SelQuesetions='+sId+'&q_id=' + entryList[entryIndex * 2] + '&class=0', function(status){
                    if (status !== 'success') {
                        console.log('Unable to access Practice Page, status is ' + status + '!');
                        phantom.exit();
                    } else {
                        var pageHtml = page.evaluate(function() {
                            //return document.getElementById('app1').innerHTML;
                            return document.body.innerHTML;
                        });
                        if (pageHtml && pageHtml.indexOf('Rad_T_A_Id') > 0) {
                            //console.log('success to open practice page!');

                            var answerId = getBetween(pageHtml, 'id="Hid_Answer" value="', '"');
                            //console.log('answerId: ' + answerId);
                            var questionTitle = getBetween(pageHtml, 'id="Hid_Choose" value="0">', '</div>');
                            if (questionTitle) {
                                questionTitle = questionTitle.trim();
                            } else {
                                console.log("ERROR: " + pageHtml);
                            }

                            var anserCode = '';
                            var optionList = [];
                            var optionInfoList = pageHtml.match(new RegExp('Rad_T_A_Id_[\\d]+', 'g'));
                            for (var i = 0; i < optionInfoList.length; i+=2) {
                                var optionId = optionInfoList[i].replace('Rad_T_A_Id_', '');
                                var optionTitle = getBetween(pageHtml, optionInfoList[i] + '">', '</label>');
                                if (optionTitle) {
                                    optionList.push(optionTitle);
                                    if (answerId == optionId) {
                                        anserCode = optionTitle[0];
                                    }
                                }
                            }
                            
                            var info = '第' + qIndex + '题:' + questionTitle + '\n' + optionList.join('\n') + '\n' + '答案:' + anserCode + '\n\n';
                            console.log(info);
                            
                            var categoryName = getBetween(pageHtml, '<span style="font-weight:bold;color:#808080;">', '</span>');
                            
                            var fs = require('fs');
                            fs.write('d:\\' + categoryName + '.txt', info, 'a');

                            var maxCount = entryList[entryIndex * 2 + 1];
                            if (qIndex >= maxCount) {
                                entryIndex++;
                                if (entryIndex * 2 >= entryList.length) {
                                    console.log('finished!');
                                    phantom.exit();
                                } else {
                                    qIndex = 0;
                                    page.open('http://www.renyiwang.net/Mobile/PracticeClear.aspx?o_id=6&SelQuesetions='+sId+'&q_id=' + entryList[entryIndex * 2], 'post', {}, function (status) {
                                        console.log('PracticeClear ' + categoryName);
                                        setTimeout(function(){
                                            captureQuestion(entryIndex, qIndex);
                                        }, 5000);
                                    });
                                }
                            } else {
                                setTimeout(function(){
                                    captureQuestion(entryIndex, qIndex);
                                }, 1000);
                            }
                        } else {
                            console.log(pageHtml);
                            console.log('fail to open pratice page!');
                            phantom.exit();
                        }
                    }
                });
            };
            captureQuestion(0, 0);
        } else {
            console.log(pageHtml);
            phantom.exit();
        }
    }, 5000);
  }
});

 

标签:练习题,function,console,log,phantomjs,pageHtml,var,return,仁医
来源: https://www.cnblogs.com/lavezhang/p/16336504.html