其他分享
首页 > 其他分享> > 通过Jsoup,爬取车辆品牌,车系,LOGO等

通过Jsoup,爬取车辆品牌,车系,LOGO等

作者:互联网

    @Test
    public void test4() throws IOException {
        for (int i = 65; i <= 90; i++) {
            String value = String.valueOf((char) i);

            FileUtil.mkdir("e://pinpai//" + value);

            System.out.println("***********************" + value);
            String url = "https://www.autohome.com.cn/grade/carhtml/" + value + ".html";
            Document document = Jsoup.parse(new URL(url), 300000);
            document.getElementsByTag("dl").stream().forEach(element -> {
                String imgUrl = "https:" + element.getElementsByTag("dt").get(0).getElementsByTag("img").get(0).attr("src");
                String mainBrand = element.getElementsByTag("dt").get(0).getElementsByTag("a").get(1).text();

                //图片LOG
                System.out.println(imgUrl);
                //System.out.println(HttpUtil.downloadFile(imgUrl, FileUtil.file("e://pinpai//" + value)));
                //主品牌
                System.out.println(mainBrand);
                //子品牌
                element.getElementsByTag("dd").get(0).getElementsByClass("h3-tit").tagName("a").stream().forEach(element1 -> {
                    System.out.println(element1.text());
                    //车系
                    List<String> seriesBrand = element1.nextElementSibling().getElementsByTag("h4").stream().map(Element::text).collect(Collectors.toList());
                    System.out.println();

                    //vehicleBrandService.save(VehicleBrand.builder().flag(value).logoBrand(imgUrl).mainBrand(mainBrand).subBrands(element1.text()).seriesBrand(JSON.toJSONString(seriesBrand)).build());
                });
                System.out.println("-----------");
            });
        }
    }

 

标签:element1,getElementsByTag,get,System,爬取,Jsoup,println,LOGO,out
来源: https://www.cnblogs.com/luliang888/p/14034489.html