数据库
首页 > 数据库> > java 使用poi读取word文档存入数据库

java 使用poi读取word文档存入数据库

作者:互联网

使用的poi jar包需要自己下载

读取的word文档中含有多个图片,所以分为两个部分,一个部分读取各个表格中内容,一个是将所有图片截取出来:

   /** * 遍历段落内容
     * docxReadPath 文档地址
     * uploadPic 图片上传地址
     * picFile 图片保存后地址
     * @param document
     * @return XWPFDocument
     * @throws IOException 
     */
    public static String readPar(XWPFDocument document,String docxReadPath,String uploadPic,String picFile){
         String fail="sucess";
     Iterator<XWPFParagraph> itPara = document.getParagraphsIterator();
try {
  //读取word中所有内容
        while (itPara.hasNext()) {
      



        XWPFParagraph paragraph = (XWPFParagraph) itPara.next();

          //run表示相同区域属性相同的字符,结果以‘,’分隔;
          List<XWPFRun> runs =paragraph.getRuns();// paragraph.getRuns();
          String fileName="";
          for (int i = 0; i < runs.size(); i++){

            String oneparaString = runs.get(i).getText(runs.get(i).getTextPosition());

             System.out.println(oneparaString);

          }

       }
              List<XWPFPictureData> picList = document.getAllPictures();for (XWPFPictureData pic : picList) {
              byte[] bytev = pic.getData();
              String imgName=pic.getFileName();                           
        System.out.println("=====图片生成中========"+imgName); if(!"image1.jpeg".equals(imgName)){ FileOutputStream fos = new FileOutputStream(uploadPic+"/"+imgName); fos.write(bytev); } } } catch (Exception e) { e.printStackTrace(); System.out.println("=====错误信息===="+e.getMessage()); fail="false"; } return fail; }
 /**
     * 遍历所有表格的内容
     * @param document
     * @throws FileNotFoundException 
     */
    public static void readTableContent(XWPFDocument document) {
        Iterator<XWPFTable> itTable = document.getTablesIterator();
        int ind = 0;
        while (itTable.hasNext()){
            ind++;
            XWPFTable table = (XWPFTable) itTable.next();
            //行
            int rcount = table.getNumberOfRows();
            for (int i = 0; i < rcount; i++){
                XWPFTableRow row = table.getRow(i);
                //列
                List<XWPFTableCell> cells = row.getTableCells();
                int len = cells.size();
                for(int j = 0;j < len;j++){
                    XWPFTableCell xc = cells.get(j);
                    String sc = xc.getText();
                          System.out.println("第"+ ind +"个表格,第"+ (i+1) +"行,第"+ (j+1) +"列:" +sc);
                }
            }
        }
    }
 /**
     * 读取文件
     * @param srcPath
     * @return XWPFDocument
     */
    public static XWPFDocument read_file(String srcPath)
    {
        String[] sp = srcPath.split("\\.");
        if ((sp.length > 0) && sp[sp.length - 1].equalsIgnoreCase("docx"))
        {
            try {
                 FileInputStream fis = new FileInputStream(srcPath);
                  XWPFDocument xdoc = new XWPFDocument(fis);
                  XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc);
                
               // OPCPackage pack = POIXMLDocument.openPackage(srcPath);
               // XWPFDocument doc = new XWPFDocument(pack);
                return xdoc;
            } catch (IOException e) {
                System.out.println("读取文件出错!");
                e.printStackTrace();
                return null;
            }
        }
        return null;
    }


public static void main(String[] args) throws IOException{
        String  docx = "F:\\bb.docx";
        XWPFDocument document = read_file(docx);
        readPar(document);
        readTableContent(document);
    }

 

 

 



 

 

 

标签:java,String,int,XWPFDocument,return,poi,srcPath,word,document
来源: https://www.cnblogs.com/xl1314666/p/11898273.html