其他分享
首页 > 其他分享> > POI读取Word内容及格式,看这一篇就够了!

POI读取Word内容及格式,看这一篇就够了!

作者:互联网

目录

 

效果

所有源码获取方式

pom依赖

加载文档

页眉页脚

页面边距

获取标题

获取表格

获取图片

缩进方式计算

段落格式

文字属性

 


效果

图片

所有源码获取方式

扫描下方二维码关注公众号获取即可

pom依赖

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.1.2</version>
    </dependency>
    <dependency>
      <groupId>cn.hutool</groupId>
      <artifactId>hutool-all</artifactId>
      <version>5.5.7</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.2</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml-schemas</artifactId>
      <version>4.1.2</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>ooxml-schemas</artifactId>
      <version>1.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-scratchpad</artifactId>
      <version>4.1.2</version>
    </dependency>

加载文档

 InputStream is = new FileInputStream("C:\\Users\\10386\\Desktop\\word-正确文档2.docx");
  XWPFDocument doc = new XWPFDocument(is);

页眉页脚

   XWPFHeaderFooterPolicy headerFooterPolicy = doc.getHeaderFooterPolicy();
    //获取页眉
    String header = headerFooterPolicy.getDefaultHeader().getText();
    System.out.println("***页眉 ***"+header);
    //获取页脚
    String footer = headerFooterPolicy.getDefaultFooter().getText();
    System.out.println("***页脚 ***"+header);

页面边距

CTDocument1 ctdoc =  doc.getDocument();
    int top = ctdoc.getBody().getSectPr().getPgMar().getTop().intValue();
    int bottom = ctdoc.getBody().getSectPr().getPgMar().getBottom().intValue();
    int left = ctdoc.getBody().getSectPr().getPgMar().getLeft().intValue();
    int right = ctdoc.getBody().getSectPr().getPgMar().getRight().intValue();

获取标题

  List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
    //获取标题
    List<Map<String, String>> list = getParagraph(paras.get(0));
    System.out.println("标题信息==="+list);

获取表格

        int row_count =0;
        XWPFTable table = (XWPFTable) element;
        List<XWPFTableRow> xwpfTableRows = table.getRows();
        row_count = xwpfTableRows.size();
        ArrayList cell_count=new ArrayList();
        int row_index = 1;
        for (XWPFTableRow xwpfTableRow : xwpfTableRows) {
          List<XWPFTableCell> xwpfTableCells = xwpfTableRow.getTableCells();
          cell_count.add(xwpfTableCells.size());
          System.out.println("第"+row_index+"行");
          int cell_index =1;
          for (XWPFTableCell xwpfTableCell : xwpfTableCells) {
            //单元格是否被合并,合并了几个
            CTDecimalNumber  cellspan = xwpfTableCell.getCTTc().getTcPr().getGridSpan();
            boolean gridspan = cellspan != null;
            String gridspan_num = cellspan != null?cellspan.getVal().toString():"0";
            List<XWPFParagraph> xwpfParagraphs = xwpfTableCell.getParagraphs();
            XWPFParagraph paragraph = xwpfParagraphs.get(0);
            System.out.println("第" +cell_index+"个单元格,合并标志:"+gridspan+",合并个数:"+gridspan_num
            +"文字:"+getParagraph(paragraph));
            cell_index++;
          }
          row_index++;
        }
        System.out.println("表格为:row_count==="+row_count+"行"+Collections.max(cell_count)+"列");

获取图片

List<XWPFPicture> pictures = run.getEmbeddedPictures();
            if (pictures.size() > 0) {
                XWPFPicture picture = pictures.get(0);
                XWPFPictureData pictureData = picture.getPictureData();
                System.out.println(Base64.encode(pictureData.getData()));
            }

缩进方式计算

//先判断缩进方式再进行数值计算
        double ind = -1, ind_left = -1, ind_right = -1, ind_hang = -1;
        String ind_type = "";
        if (para.getIndentationHanging() != -1) {//悬挂缩进
            ind_type = "hang";
            if (para.getIndentationHanging() % 567 == 0) {//悬挂单位为厘米
                ind = para.getIndentationHanging() / 567.0;
                ind_left = (para.getIndentationLeft() - 567.0 * ind) / 210;
            } else {//悬挂单位为字符
                ind = para.getIndentationHanging() / 240;
                ind_left = (para.getIndentationLeft() - para.getIndentationHanging()) / 210;
            }
            ind_right = para.getIndentationRight() / 210.0;
        } else {//首行缩进或者无
            ind_type = "first";
            if (para.getFirstLineIndent() == -1) {
                ind_type = "none";
                ind = 0;
            } else {
                ind = para.getFirstLineIndent() % 567.0 == 0 ? para.getFirstLineIndent() / 567.0 : para.getFirstLineIndent() / 240.0;
            }
            ind_left = para.getIndentationLeft() / 210;
            ind_right = para.getIndentationRight() / 210.0;
        }

段落格式

List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
 XWPFParagraph  para =  paras.get(1);
 List<XWPFRun> runsLists = para.getRuns();
        List<Map<String, String>> list = new ArrayList<>();
        Map<String, String> titile = new HashMap<>();
        titile.put("Text", para.getText());//本段全部内容
        titile.put("Alignment", para.getAlignment().toString());
        titile.put("SpacingBetween", para.getSpacingBetween() + "");//行距
        titile.put("SpacingBeforeLines", para.getSpacingBeforeLines() + "");//段前
        titile.put("SpacingAfterLines", para.getSpacingAfterLines() + "");//段后
        titile.put("NumLevelText", para.getNumLevelText() + "");//自动编号格式

文字属性

 List<XWPFRun> runsLists = para.getRuns()
        for (XWPFRun run : runsLists
        ) {
            List<XWPFPicture> pictures = run.getEmbeddedPictures();
            if (pictures.size() > 0) {
                XWPFPicture picture = pictures.get(0);
                XWPFPictureData pictureData = picture.getPictureData();
                System.out.println(Base64.encode(pictureData.getData()));
            }

            Map<String, String> titile_map = new HashMap<>();
            titile_map.put("content", run.getText(0));
            String Bold = Boolean.toString(run.isBold());//加粗
            titile_map.put("Bold", Bold);
            String color = run.getColor();//字体颜色
            titile_map.put("Color", color);

            String FontFamily = run.getFontFamily(XWPFRun.FontCharRange.hAnsi);//字体
            titile_map.put("FontFamily", FontFamily);

            String FontName = run.getFontName();//字体
            titile_map.put("FontName", FontName);

            String FontSize = run.getFontSize() + "";//字体大小
            titile_map.put("FontSize", FontSize);

            String Underline = run.getUnderline().name();//字下加线
            titile_map.put("Underline", Underline);

            String UnderlineColor = run.getUnderlineColor();//字下加线颜色
            titile_map.put("UnderlineColor", UnderlineColor);

            String Italic = Boolean.toString(run.isItalic());//字体倾斜
            titile_map.put("Italic", Italic);
            list.add(titile_map);

 

标签:Word,String,para,就够,put,POI,ind,run,titile
来源: https://blog.csdn.net/Learning_LB/article/details/113463423