JAVA 获取cnblogs新闻标题
作者:互联网
package com.powerX;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
public class Main {
public static void main(String[] args) {
for (int i = 1; i < 101; i++) {
String html = doGet("https://news.cnblogs.com/n/page/" + i);
Elements elements = Jsoup.parse(html).getElementsByClass("news_block");
for (Element element : elements){
Element content = element.getElementsByClass("content").get(0);
Element entity = content.getElementsByClass("news_entry").get(0).getElementsByTag("a").get(0);
Element entity2 = content.getElementsByClass("entry_footer").get(0);
System.out.println(Jsoup.parse(entity2.html().split("发布于")[1]).body().getElementsByTag("span").get(0).html() + '\t' + entity.html());
}
}
}
public static String doGet(String httpurl) {
HttpURLConnection connection = null;
InputStream is = null;
BufferedReader br = null;
String result = null;// 返回结果字符串
try {
// 创建远程url连接对象
URL url = new URL(httpurl);
// 通过远程url连接对象打开一个连接,强转成httpURLConnection类
connection = (HttpURLConnection) url.openConnection();
// 设置连接方式:get
connection.setRequestMethod("GET");
// 设置连接主机服务器的超时时间:15000毫秒
connection.setConnectTimeout(15000);
// 设置读取远程返回的数据时间:60000毫秒
connection.setReadTimeout(60000);
// 发送请求
connection.connect();
// 通过connection连接,获取输入流
if (connection.getResponseCode() == 200) {
is = connection.getInputStream();
// 封装输入流is,并指定字符集
br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
// 存放数据
StringBuffer sbf = new StringBuffer();
String temp = null;
while ((temp = br.readLine()) != null) {
sbf.append(temp);
sbf.append("\r\n");
}
result = sbf.toString();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != is) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
connection.disconnect();// 关闭远程连接
}
return result;
}
public static String doPost(String httpUrl, String param) {
HttpURLConnection connection = null;
InputStream is = null;
OutputStream os = null;
BufferedReader br = null;
String result = null;
try {
URL url = new URL(httpUrl);
// 通过远程url连接对象打开连接
connection = (HttpURLConnection) url.openConnection();
// 设置连接请求方式
connection.setRequestMethod("POST");
// 设置连接主机服务器超时时间:15000毫秒
connection.setConnectTimeout(15000);
// 设置读取主机服务器返回数据超时时间:60000毫秒
connection.setReadTimeout(60000);
// 默认值为:false,当向远程服务器传送数据/写数据时,需要设置为true
connection.setDoOutput(true);
// 默认值为:true,当前向远程服务读取数据时,设置为true,该参数可有可无
connection.setDoInput(true);
// 设置传入参数的格式:请求参数应该是 name1=value1&name2=value2 的形式。
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
// 设置鉴权信息:Authorization: Bearer da3efcbf-0845-4fe3-8aba-ee040be542c0
connection.setRequestProperty("Authorization", "Bearer da3efcbf-0845-4fe3-8aba-ee040be542c0");
// 通过连接对象获取一个输出流
os = connection.getOutputStream();
// 通过输出流对象将参数写出去/传输出去,它是通过字节数组写出的
os.write(param.getBytes());
// 通过连接对象获取一个输入流,向远程读取
if (connection.getResponseCode() == 200) {
is = connection.getInputStream();
// 对输入流对象进行包装:charset根据工作项目组的要求来设置
br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuffer sbf = new StringBuffer();
String temp = null;
// 循环遍历一行一行读取数据
while ((temp = br.readLine()) != null) {
sbf.append(temp);
sbf.append("\r\n");
}
result = sbf.toString();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭资源
if (null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != os) {
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (null != is) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
// 断开与远程地址url的连接
connection.disconnect();
}
return result;
}
}
需要引入包 jsoup
标签:JAVA,cnblogs,新闻标题,printStackTrace,connection,br,catch,null,String 来源: https://www.cnblogs.com/sunbingqiang/p/15966177.html