如何使用apache poi基于行数将excel文件拆分为多个文件?

rma*_*mac 5 java excel apache-poi

我是apache poi的新手,我想根据行数将excel文件拆分成多个文件.

例如data.xlsx有15k行,新文件应该像data_1.xlsx有5k行,data_2.xlsx应该是5-10k,data_3.xlsx应该是10-15k.

Sea*_*mus 7

我有你.

package com.industries.seanimus;  

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  

public class ReportSplitter {

    private final String fileName;
    private final int maxRows;

    public ReportSplitter(String fileName, final int maxRows) {

        ZipSecureFile.setMinInflateRatio(0);

        this.fileName = fileName;
        this.maxRows = maxRows;

        try {
            /* Read in the original Excel file. */
            OPCPackage pkg = OPCPackage.open(new File(fileName));
            XSSFWorkbook workbook = new XSSFWorkbook(pkg);
            XSSFSheet sheet = workbook.getSheetAt(0);

            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }
            pkg.close();
        }
        catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = wb.createSheet();

        SXSSFRow newRow;
        SXSSFCell newCell;

        int rowCount = 0;
        int colCount = 0;

        XSSFSheet sheet = workbook.getSheetAt(0);

        for (Row row : sheet) {
            newRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = wb.createSheet();
                rowCount = 0;
            }

            for (Cell cell : row) {
                newCell = newRow.createCell(colCount++);
                newCell = setValue(newCell, cell);

                CellStyle newStyle = wb.createCellStyle();
                newStyle.cloneStyleFrom(cell.getCellStyle());
                newCell.setCellStyle(newStyle);
            }
            colCount = 0;
        }

        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args){
        /* This will create a new workbook every 1000 rows. */
        new ReportSplitter("Data.xlsx", 1000);
    }

}
Run Code Online (Sandbox Code Playgroud)

几点说明:

  • 为了编写工作簿,我使用 SXSSFWorkbook.它比HSSF或XSSF快得多,因为它在写入之前不会将所有内容保存在内存中(这会导致可怕的gc混乱).

  • 繁忙的开发者指南是你的朋友学习的Apache POI)

请享用!


编辑:我已经更新了代码以复制单元格样式.有两点需要注意:

  • 复制样式会使事情大幅减少.
  • POI创建的模板文件可能会变得太大而无法解压缩,从而导致Zip炸弹检测到错误.您可以通过ZipSecureFile.setMinInflateRatio(0)更改最小通胀率来解决此问题.