首页 > 数据库> > Oracle下Mybatis Batch批量操作 vs JBCD 原生batch-prepared，及各写法效率测试。

Oracle下Mybatis Batch批量操作 vs JBCD 原生batch-prepared，及各写法效率测试。

2021-11-16 15:04:45 作者：互联网

背景

建表SQL

测试写法

非事务,循环单次单条插入

有事务,循环单次单条插入(Simple模式 )

背景

近期项目要转数据库，需要转移几个亿的数据。用的Mybatis框架，结果开发人员发现 foreach batch写法，在大量数据数据插入的时候效率很低，而且还出现了 ORA-04036 PGA memory 。

那在大量数据操作的情况，用哪种方式效率最高呢？我这里单独用了半天的时间研究了一下。

强调：效率跟表大小、字段长度有关系，这里的测试不具有普遍性，仅供参考！

建表SQL

DROP TABLE STUDENT ;
CREATE TABLE student
(
    id    number(20) PRIMARY KEY,
    name  varchar(20)                not NULL,
    name2 char(20) DEFAULT 'default' NOT NULL,
    age   number(5)
) tablespace TBS_CUR_DAT;

COMMENT ON COLUMN student.name IS '姓名';
COMMENT ON COLUMN student.name2 IS '姓名2';
COMMENT ON COLUMN student.age IS '年龄';

INSERT INTO TEST.STUDENT (ID, NAME, NAME2, AGE) VALUES(1, '1', '1', 1);

测试写法

非事务,循环单次单条插入

@GetMapping(value = "/effective/student/insert/for")
public String insertFori(int num) {
    long start = System.currentTimeMillis();
    long maxId = studentService.getMaxId() + 1;
    for (int i = 0; i < num; i++) {
        long pId = maxId + i;
        Student student = getStudent(pId + "");
        studentService.insert(student);
    }
    long end = System.currentTimeMillis();
    System.out.println("循环插入-无事务 执行时间:" + (end - start));
    return "ok";
}

<insert id="insert" parameterType="com.batch.entity.Student">
    INSERT INTO STUDENT(ID,
                        NAME,
                        AGE)
    VALUES (#{id,jdbcType=NUMERIC},
            #{name,jdbcType=VARCHAR},
            #{age,jdbcType=DECIMAL})
</insert>

有事务,循环单次单条插入(Simple模式 )

@Transactional
@GetMapping(value = "/effective/student/insert/fortrans")
public String insertListTrans(int num) {
    long start = System.currentTimeMillis();
    Long maxId = studentService.getMaxId();
    long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
    for (int i = 0; i < num; i++) {
        long pId = tempMaxId + i;
        Student student = getStudent(pId + "");
        studentService.insert(student);
    }
    long end = System.currentTimeMillis();
    logger.info("循环插入-有事务 执行时间:" + (end - start));
    return "ok";
}

批量操作-BEGIN END

@GetMapping(value = "/effective/student/insert/beginend")
public String insertListBeginEnd(int num) {
    long start = System.currentTimeMillis();
    Long maxId = studentService.getMaxId();
    long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
    List<Student> studentList = new ArrayList<>();
    for (int i = 0; i < num; i++) {
        long pId = tempMaxId + i;
        Student student = getStudent("" + pId);
        studentList.add(student);
    }
    logger.info("Mybatis SQL return :" + studentService.insertListBeginEnd(studentList));
    long end = System.currentTimeMillis();
    logger.info("单次多条insert批量插入(BEGIN END) 执行时间:" + (end - start));
    return "ok";
}

<insert id="insertListBeginEnd">
    <foreach collection="studentList" item="item" index="index" open="begin" close=";end;" separator=";">
        insert into STUDENT(
        ID,
        NAME,
        AGE
        )values(
        #{item.id},
        #{item.name},
        #{item.age}
        )
    </foreach>
</insert>

批量操作-foreach batch

@Transactional // 对速度基本无影响
@GetMapping(value = "/effective/student/insert/batch")
public String insertList2(int num) {
    long start = System.currentTimeMillis();
    Long maxId = studentService.getMaxId();
    long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
    List<Student> studentList = new ArrayList<>();
    for (int i = 0; i < num; i++) {
        long pId = tempMaxId + i;
        Student student = getStudent(pId + "");
        studentList.add(student);
    }
    logger.info("Mybatis SQL return :" + studentService.insertListBatch(studentList));
    long end = System.currentTimeMillis();
    logger.info("批量插入 执行时间:" + (end - start));
    return "ok";
}

<insert id="insertListBatch">
    insert into STUDENT(
    ID,
    NAME,
    AGE
    )
    <foreach collection="studentList" item="item" index="index" separator="union all">
        (
        select
        #{item.id},
        #{item.name,jdbcType=VARCHAR},
        #{item.age,jdbcType=DECIMAL}
        from dual
        )
    </foreach>
</insert>

JDBC原生batch-prepared

....

@Override
public void run() {
    Connection connection = null;
    PreparedStatement preparedStatement = null;
    try {
        connection = oracleConnect.getConnection();
        connection.setAutoCommit(false);
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error");
    }

    // 开始时间
    Long begin = System.currentTimeMillis();

    // insert sql 前缀
    String sqlTemplate = "INSERT INTO TEST.STUDENT(ID, NAME, AGE) VALUES (?,?,?)";
    PreparedStatement insertStmt = null;
    try {
        insertStmt = connection.prepareStatement(sqlTemplate);
    } catch (SQLException e) {
        e.printStackTrace();
        throw new RuntimeException("创建statement失败.");
    }

    try {
        // 设置事务为非自动提交
        connection.setAutoCommit(false);

        long tempMaxId = (maxId == null) ? 0 : maxId + 1;
        for (int i = 0; i < this.num; i++) {
            long pId = tempMaxId + i;
            Student student = getStudent(pId + "");
            insertStmt.setBigDecimal(1, student.getId());
            insertStmt.setString(2, student.getName());
            insertStmt.setBigDecimal(3, student.getAge());
            insertStmt.addBatch();
        }
        /**
         * executeBatch 返回的是 int[] 数组,和批处理中的执行的SQL一一对应,值代表影响的行数。
         * 元素>=0,影响的行数。
         * 元素=-2,执行成功,但无法获取影响的行数。
         * 元素=-3,执行失败
         */
        int[] result = insertStmt.executeBatch();
        int updateCount = insertStmt.getUpdateCount();
        System.out.println("result= " + JSON.toJSONString(result));
        System.out.println("updateCount= " + updateCount);

        // 提交事务, 先关闭自动提交conn.setAutoCommit(false);
        connection.commit();
    } catch (SQLException e) {
        DbUtil.rollback(connection);
        e.printStackTrace();
        throw new RuntimeException("SQL执行错误");
    } finally {
        // 关闭 Statement
        DbUtil.close(insertStmt);
        // 归还连接 connect
        oracleConnect.returnConnection(connection);
    }
    // 结束时间
    Long end = System.currentTimeMillis();
    System.out.println("线程" + Thread.currentThread().getName() + "数据插入耗时: " + (end - begin) + " ms");
}
.....

测试结果

每个都进行三次测试，单位是毫秒(ms)

items	1000 条	2500 条	5000 条
非事务,循环单次单条插入	4493,3174,4109	11391,11088,13093	-
有事务,循环单次单条插入(Simple模式 )	1956,1344,1224	3437,3393,3460	5967,5700,5844
批量操作-BEGIN END	819,103,102	3344,219,237	18312,652,462
批量操作-foreach batch	453,45,39	4063,396,81	35399,174,195

数据量很大的情况下，上面的方式都不适合了，耗时很长。用下面两个batch方式。

items	1万条	5万条	10万条
JDBC原生batch-prepared	281,1297,299	1132,1065,735	713,758,886
有事务,循环单次单条插入(Batch模式)	227,163,168	1007,892,829	2004,1699,1739

结论

foreach-batch 和 BEGIN END 受 Oracle 数据库缓存影响,增删改第一次执行后,效率明显提升。取决于数据库, 重启应用依然有效。
simple 模式:
<2000条,用 foreach-batch(注意:foreach-batch update写法,不能超过1000条)
>2000条,用事务下循环单条操作
原因分析:SQL拼接字段太多, 硬解析会耗时耗资源.此时,事务下循环单条操作,会复用模板的软解析,反而效率更高。大量的硬解析可能会导致 Oracle:ORA-04036 PGA memory 错误.
batch 模式
不考虑返回行数
<5万条数据: 事务下循环单条操作效率 ≈ JDBC原生batch-prepared
>5万条数据: 用 JDBC原生batch-prepared
如果想要返回行数, 只能用 JDBC原生batch-prepared。因为mybatis的batch模式是返回行数的。

再次强调：效率跟表大小、字段长度有关系，这里的测试不具有普遍性，仅供参考！

转摘还请注明出处，感谢！

标签：Batch,JBCD,int,System,long,batch,student,studentService
来源： https://blog.csdn.net/q258523454/article/details/121351546