node 爬虫

2021-05-19 21:58:40 作者：互联网

node 爬虫

1:安装 request cheerio

2：cheerio 与jquery 使用基本上一样

// 爬虫
// 1：先有结构  ---> 获取html 解耦股 request 
// 2: 获取结构中内容  ---> cheeio ---> 与jq 一致
// 3：将内容进行数据重组  

var request = require('request');
var cheeio = require('cheerio');

request('https://www.jianshu.com/c/baff8cd74f8f',function(err,respose,body){
   var arr = [];// 重组的数据
   var $ = cheeio.load(body); 
   $('.note-list li').each(function(){
       
      var title =  $(this).find('.title').text();
      var abstract = $(this).find('.abstract').text();
      var imgSrc = $(this).find('.wrap-img img').attr('src')
      if(imgSrc===undefined){
          imgSrc = ''
      }
      arr.push({
          title:title,
          abstract:abstract,
          imgSrc:imgSrc
      })
   })

   console.log(arr);

})

// 重组数据解耦股：
// 1：相同结构放在数组中；
// 2：不同属性；用对象方式。

// 这里爬虫获取的是；初始化的数据；同步数据；不能获取异步的数据

// 爬虫数据后；在进一步处理；增删改查。等操作

标签：node,title,abstract,request,爬虫,imgSrc,var
来源： https://blog.csdn.net/TZOF_/article/details/117047446