编程语言
首页 > 编程语言> > 抓取网页内容Java实现

抓取网页内容Java实现

作者:互联网

//FetchWeb.java
package cn.campsg.java.experiment.service;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;

public class FetchWeb {
    public FetchWeb(){}

    public String fetchByHttp(String url) throws MalformedURLException {
        StringBuffer sb = new StringBuffer();
        HttpURLConnection connection = null;
        String line = null;
        try{
            URL u = new URL(url);
            connection = (HttpURLConnection) u.openConnection();
            connection.setRequestMethod("GET");
            int code = connection.getResponseCode();
            
            if(code==200){
                InputStream in = connection.getInputStream();
                InputStreamReader isr = new InputStreamReader(in, "utf-8");
                BufferedReader reader = new BufferedReader(isr);
                
                while((line=reader.readLine())!=null){
                    sb.append(line).append("\n");
                }
                in.close();
                isr.close();
                reader.close();
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
        finally {
            connection.disconnect();

        }
        //String line;
        return line;
    }
}

//MainClass.java
package cn.campsg.java.experiment;

import cn.campsg.java.experiment.service.FetchWeb;

import java.net.MalformedURLException;

public class MainClass {
    public static void main(String[] args) throws MalformedURLException {

        FetchWeb fw = new FetchWeb();

        String url02 ="https://www.baidu.com/";

        System.out.println("获取百度首页内容:\n"+fw.fetchByHttp(url02));

    }
}

标签:FetchWeb,java,String,抓取,connection,import,网页内容,Java,public
来源: https://blog.csdn.net/m0_50181189/article/details/119086655