POI读取Word内容及格式,看这一篇就够了!
作者:互联网
目录
效果
所有源码获取方式
扫描下方二维码关注公众号获取即可
pom依赖
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.5.7</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
加载文档
InputStream is = new FileInputStream("C:\\Users\\10386\\Desktop\\word-正确文档2.docx");
XWPFDocument doc = new XWPFDocument(is);
页眉页脚
XWPFHeaderFooterPolicy headerFooterPolicy = doc.getHeaderFooterPolicy();
//获取页眉
String header = headerFooterPolicy.getDefaultHeader().getText();
System.out.println("***页眉 ***"+header);
//获取页脚
String footer = headerFooterPolicy.getDefaultFooter().getText();
System.out.println("***页脚 ***"+header);
页面边距
CTDocument1 ctdoc = doc.getDocument();
int top = ctdoc.getBody().getSectPr().getPgMar().getTop().intValue();
int bottom = ctdoc.getBody().getSectPr().getPgMar().getBottom().intValue();
int left = ctdoc.getBody().getSectPr().getPgMar().getLeft().intValue();
int right = ctdoc.getBody().getSectPr().getPgMar().getRight().intValue();
获取标题
List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
//获取标题
List<Map<String, String>> list = getParagraph(paras.get(0));
System.out.println("标题信息==="+list);
获取表格
int row_count =0;
XWPFTable table = (XWPFTable) element;
List<XWPFTableRow> xwpfTableRows = table.getRows();
row_count = xwpfTableRows.size();
ArrayList cell_count=new ArrayList();
int row_index = 1;
for (XWPFTableRow xwpfTableRow : xwpfTableRows) {
List<XWPFTableCell> xwpfTableCells = xwpfTableRow.getTableCells();
cell_count.add(xwpfTableCells.size());
System.out.println("第"+row_index+"行");
int cell_index =1;
for (XWPFTableCell xwpfTableCell : xwpfTableCells) {
//单元格是否被合并,合并了几个
CTDecimalNumber cellspan = xwpfTableCell.getCTTc().getTcPr().getGridSpan();
boolean gridspan = cellspan != null;
String gridspan_num = cellspan != null?cellspan.getVal().toString():"0";
List<XWPFParagraph> xwpfParagraphs = xwpfTableCell.getParagraphs();
XWPFParagraph paragraph = xwpfParagraphs.get(0);
System.out.println("第" +cell_index+"个单元格,合并标志:"+gridspan+",合并个数:"+gridspan_num
+"文字:"+getParagraph(paragraph));
cell_index++;
}
row_index++;
}
System.out.println("表格为:row_count==="+row_count+"行"+Collections.max(cell_count)+"列");
获取图片
List<XWPFPicture> pictures = run.getEmbeddedPictures();
if (pictures.size() > 0) {
XWPFPicture picture = pictures.get(0);
XWPFPictureData pictureData = picture.getPictureData();
System.out.println(Base64.encode(pictureData.getData()));
}
缩进方式计算
//先判断缩进方式再进行数值计算
double ind = -1, ind_left = -1, ind_right = -1, ind_hang = -1;
String ind_type = "";
if (para.getIndentationHanging() != -1) {//悬挂缩进
ind_type = "hang";
if (para.getIndentationHanging() % 567 == 0) {//悬挂单位为厘米
ind = para.getIndentationHanging() / 567.0;
ind_left = (para.getIndentationLeft() - 567.0 * ind) / 210;
} else {//悬挂单位为字符
ind = para.getIndentationHanging() / 240;
ind_left = (para.getIndentationLeft() - para.getIndentationHanging()) / 210;
}
ind_right = para.getIndentationRight() / 210.0;
} else {//首行缩进或者无
ind_type = "first";
if (para.getFirstLineIndent() == -1) {
ind_type = "none";
ind = 0;
} else {
ind = para.getFirstLineIndent() % 567.0 == 0 ? para.getFirstLineIndent() / 567.0 : para.getFirstLineIndent() / 240.0;
}
ind_left = para.getIndentationLeft() / 210;
ind_right = para.getIndentationRight() / 210.0;
}
段落格式
List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
XWPFParagraph para = paras.get(1);
List<XWPFRun> runsLists = para.getRuns();
List<Map<String, String>> list = new ArrayList<>();
Map<String, String> titile = new HashMap<>();
titile.put("Text", para.getText());//本段全部内容
titile.put("Alignment", para.getAlignment().toString());
titile.put("SpacingBetween", para.getSpacingBetween() + "");//行距
titile.put("SpacingBeforeLines", para.getSpacingBeforeLines() + "");//段前
titile.put("SpacingAfterLines", para.getSpacingAfterLines() + "");//段后
titile.put("NumLevelText", para.getNumLevelText() + "");//自动编号格式
文字属性
List<XWPFRun> runsLists = para.getRuns()
for (XWPFRun run : runsLists
) {
List<XWPFPicture> pictures = run.getEmbeddedPictures();
if (pictures.size() > 0) {
XWPFPicture picture = pictures.get(0);
XWPFPictureData pictureData = picture.getPictureData();
System.out.println(Base64.encode(pictureData.getData()));
}
Map<String, String> titile_map = new HashMap<>();
titile_map.put("content", run.getText(0));
String Bold = Boolean.toString(run.isBold());//加粗
titile_map.put("Bold", Bold);
String color = run.getColor();//字体颜色
titile_map.put("Color", color);
String FontFamily = run.getFontFamily(XWPFRun.FontCharRange.hAnsi);//字体
titile_map.put("FontFamily", FontFamily);
String FontName = run.getFontName();//字体
titile_map.put("FontName", FontName);
String FontSize = run.getFontSize() + "";//字体大小
titile_map.put("FontSize", FontSize);
String Underline = run.getUnderline().name();//字下加线
titile_map.put("Underline", Underline);
String UnderlineColor = run.getUnderlineColor();//字下加线颜色
titile_map.put("UnderlineColor", UnderlineColor);
String Italic = Boolean.toString(run.isItalic());//字体倾斜
titile_map.put("Italic", Italic);
list.add(titile_map);
标签:Word,String,para,就够,put,POI,ind,run,titile 来源: https://blog.csdn.net/Learning_LB/article/details/113463423