笑傲Java面试:面霸修炼手册
作者:互联网
download:笑傲Java面试:面霸修炼手册
你是否正打算跳槽找工作? 希望拿到高薪? 获得心仪的Offer?那么,这门课将是你得不二之选!课程中,前阿里P8【求老仙】将结合多年面试官经验,深度剖析Java核心技能,多角度解读经典大厂面试真题,更有内功专项提升指导,改善面试思维,让你秒变offer收割机!
有Java开发经验(自学或工作中做过项目)
学过至少一门数据库(例如:Mysql)
环境参数
Java 11
IDE:IDEA
package
whu.extract.pubtime.core;
import
java.util.ArrayList;
import
java.util.Calendar;
import
java.util.Collections;
import
java.util.List;
import
java.util.regex.Matcher;
import
java.util.regex.Pattern;
import
whu.utils.TimeUtil;
/**
* Created On 2014年3月13日 下午2:49:05
* @description 获取网页的发布时间
*/
public
class
FetchPubTime {
/** 表示url中连续的8位日期,例如http://www.baidu.com/20140311/2356.html */
private
static
String url_reg_whole=
"([-|/|_]{1}20\\d{6})"
;
/** 表示 用-或者/隔开的日期,有年月日的,例如 http://www.baidu.com/2014-3-11/2356.html */
private
static
String url_reg_sep_ymd =
"([-|/|_]{1}20\\d{2}[-|/|_]{1}\\d{1,2}[-|/|_]{1}\\d{1,2})"
;
/** 表示 用-或者/隔开的日期,只有年和月份的,例如 http://www.baidu.com/2014-3/2356.html */
private
static
String url_reg_sep_ym =
"([-|/|_]{1}20\\d{2}[-|/|_]{1}\\d{1,2})"
;
private
static
Calendar current = Calendar.getInstance();
/** 格式正确的时间正则表达式*/
private
static
String rightTimeReg =
"^((\\d{2}(([02468][048])|([13579][26]))[\\-\\/\\s]?((((0?[13578])|(1[02]))[\\-\\/\\s]?((0?[1-9])|([1-2][0-9])|(3[01])))|(((0?[469])|(11))[\\-\\/\\s]?((0?[1-9])|([1-2][0-9])|(30)))|(0?2[\\-\\/\\s]?((0?[1-9])|([1-2][0-9])))))|(\\d{2}(([02468][1235679])|([13579][01345789]))[\\-\\/\\s]?((((0?[13578])|(1[02]))[\\-\\/\\s]?((0?[1-9])|([1-2][0-9])|(3[01])))|(((0?[469])|(11))[\\-\\/\\s]?((0?[1-9])|([1-2][0-9])|(30)))|(0?2[\\-\\/\\s]?((0?[1-9])|(1[0-9])|(2[0-8]))))))(\\s(((0?[0-9])|([1-2][0-3]))\\:([0-5]?[0-9])((\\s)|(\\:([0-5]?[0-9])))))?$"
;
/**
* @param url
* @param urlContent
* @return
*/
public
static
String getPubTimeVarious(String url,String urlContent) {
String pubTime = getPubTimeFromUrl(url);
//链接里面没有,匹配文本中的
if
(pubTime ==
null
)
{
if
(urlContent!=
null
&&!urlContent.trim().equals(
""
))
return
extractPageDate(urlContent);
}
return
pubTime;
}
/**从url里面抽取出发布时间,返回YYYY-MM-DD HH:mm:ss格式的字符串
* @param url
* @return
*/
public
static
String getPubTimeFromUrl(String url)
{
Pattern p_whole = Pattern.compile(url_reg_whole);
Matcher m_whole = p_whole.matcher(url);
if
(m_whole.find(
0
)&&m_whole.groupCount()>
0
)
{
String time = m_whole.group(
0
);
time = time.substring(
1
,time.length());
//每一步都不能够超出当前时间
if
(current.compareTo(TimeUtil.strToCalendar(time,
"yyyyMMdd"
))>=
0
)
{
return
time.substring(
0
,
4
)+
"-"
+time.substring(
4
,
6
)+
"-"
+
time.substring(
6
,
8
)+
" "
+
"00:00:00"
;
}
}
p_whole =
null
;
m_whole =
null
;
Pattern p_sep = Pattern.compile(url_reg_sep_ymd);
Matcher m_sep = p_sep.matcher(url);
if
(m_sep.find(
0
)&&m_sep.groupCount()>
0
)
{
String time = m_sep.group(
0
);
time = time.substring(
1
,time.length());
String[] seg = time.split(
"[-|/|_]{1}"
);
Calendar theTime = Calendar.getInstance();
theTime.set(Calendar.YEAR,Integer.parseInt(seg[
0
]));
theTime.set(Calendar.MONTH, Integer.parseInt(seg[
1
]));
theTime.set(Calendar.DAY_OF_MONTH, Integer.parseInt(seg[
2
]));
if
(current.compareTo(theTime)>=
0
)
{
return
seg[
0
]+
"-"
+seg[
1
]+
"-"
+seg[
2
]+
" "
+
"00:00:00"
;
}
}
p_sep =
null
;
m_sep =
null
;
Pattern p_sep_ym = Pattern.compile(url_reg_sep_ym);
Matcher m_sep_ym = p_sep_ym.matcher(url);
if
(m_sep_ym.find(
0
)&&m_sep_ym.groupCount()>
0
)
{
String time = m_sep_ym.group(
0
);
time = time.substring(
1
,time.length());
Calendar theTime = Calendar.getInstance();
String[] seg = time.split(
"[-|/|_]{1}"
);
theTime.set(Calendar.YEAR,Integer.parseInt(seg[
0
]));
theTime.set(Calendar.MONTH, Integer.parseInt(seg[
1
]));
theTime.set(Calendar.DAY_OF_MONTH,
1
);
if
(current.compareTo(theTime)>=
0
)
{
return
seg[
0
]+
"-"
+seg[
1
]+
"-"
+
"01"
+
" "
+
"00:00:00"
;
}
}
return
null
;
}
/** 从网页源码中取出发布时间
* java中正则表达式提取字符串中日期实现代码
* 2013年12月19日15:58:42
* 读取出2013-12-19 15:48:33或者2013-12-19或者2012/3/05形式的时间
* @param text 待提取的字符串
* @return 返回日期
* @author: oschina
* @Createtime: Jan 21, 2013
*/
public
static
String extractPageDate(String text) {
boolean
containsHMS =
false
;
String dateStr = text.replaceAll(
"r?n"
,
" "
);
try
{
List matches =
null
;
Pattern p_detail = Pattern.compile(
"(20\\d{2}[-/]\\d{1,2}[-/]\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2})|(20\\d{2}年\\d{1,2}月\\d{1,2}日)"
, Pattern.CASE_INSENSITIVE|Pattern.MULTILINE);
//如果是仅仅抽取年月日,则按照上面的,如果是抽取年月日-时分秒,则按照下面的
Pattern p = Pattern.compile(
"(20\\d{2}[-/]\\d{1,2}[-/]\\d{1,2})|(20\\d{2}年\\d{1,2}月\\d{1,2}日)"
, Pattern.CASE_INSENSITIVE|Pattern.MULTILINE);
//Matcher matcher = p.matcher(dateStr);
Matcher matcher_detail = p_detail.matcher(dateStr);
if
(!(matcher_detail.find(
0
) && matcher_detail.groupCount() >=
1
))
{
matcher_detail = p.matcher(dateStr);
containsHMS =
true
;
}
else
matcher_detail = p_detail.matcher(dateStr);
if
(matcher_detail.find() && matcher_detail.groupCount() >=
1
) {
matches =
new
ArrayList();
for
(
int
i =
1
; i <= matcher_detail.groupCount(); i++) {
String temp = matcher_detail.group(i);
matches.add(temp);
}
}
else
{
matches = Collections.EMPTY_LIST;
}
if
(matches.size() >
0
) {
for
(
int
i=
0
;i<matches.size();i++)
{
String pubTime = matches.get(i).toString().trim();
//取出第一个值
pubTime = pubTime.replace(
"/"
,
"-"
).replace(
"年"
,
"-"
).replace(
"月"
,
"-"
).replace(
"日"
,
"-"
);
if
(current.compareTo(TimeUtil.strToCalendar(pubTime,
"yyyy-MM-dd"
))>=
0
)
{
if
(containsHMS)
pubTime+=
" "
+
"00:00:00"
;
if
(pubTime.matches(rightTimeReg))
{
return
pubTime;
}
}
}
}
else
{
return
null
;
}
}
catch
(Exception e) {
return
null
;
}
return
null
;
}
}
标签:笑傲,Java,String,sep,url,matcher,Pattern,time,面霸 来源: https://blog.51cto.com/u_15283814/2949902