Spring batch batch processing framework + mysql warehouse + web monitoring record

Keywords: Java JDBC MySQL Spring Database

1, concept

Spring Batch is a lightweight batch processing framework suitable for enterprise applications. It is worth noting that unlike other scheduling frameworks, Spring Batch does not provide scheduling functions.

2. Batch process

Batch processing can be divided into the following steps:

  1. Read data
  2. Process data according to business
  3. The process of archiving data

3. What does Spring Batch offer us?

  1. Unified read-write interface
  2. Rich task handling methods
  3. Flexible transaction management and concurrent processing
  4. Logging, monitoring, task restart and skip

4. Basic components

Name purpose
JobRepository Container for registering and storing jobs
JobLauncher To start a Job
Job The actual job to be executed, including one or more step s
step The steps of batch processing generally include itemreader, itemprocessor and itemwriter
ItemReader Read item from given data source
ItemProcessor Data collation before item is written to data source
ItemWriter Write the item s contained in the Chunk to the data source.
Chunk Data block, given a number of item sets, allows items to be read and processed multiple times, and writes again when a certain number is met.
TaskLet Subtask table, a transaction process of step, including repeated execution, synchronous / asynchronous rules, etc.

5. job, step, tasklet and chunk relationships

One job corresponds to at least one step, one step corresponds to 0 or 1 tasklet, one tasklet corresponds to 0 or 1 Chunk

6. Practice: batch excel inserted into database

6.1: define data warehouse

  <!-- Memory warehouse  -->
    <!--<bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean"/>-->

    <!-- Database warehouse  -->
    <batch:job-repository id="jobRepository" data-source="dataRepDruidDataSource"
                          isolation-level-for-create="SERIALIZABLE" transaction-manager="transactionManager"
                          table-prefix="BATCH_" max-varchar-length="1000" />

6.2: definition of starters

    <!-- Job scheduler to start job,Reference job warehouse -->
    <bean id="jobLauncher"
          class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
        <property name="jobRepository" ref="jobRepository"/>
    </bean>

6.3: define JOB

    <batch:job id="userBatchJobName" restartable="true">
        <batch:step id="userStep">
            <batch:tasklet allow-start-if-complete="false"
                           start-limit="1" task-executor="taskExecutor" throttle-limit="5">
                <batch:chunk reader="userReader" writer="userWriter"
                             processor="userProcessor" commit-interval="5" retry-limit="10">
                    <batch:retryable-exception-classes>
                        <batch:include class="org.springframework.dao.DuplicateKeyException"/>
                        <batch:include class="java.sql.BatchUpdateException"/>
                        <batch:include class="java.sql.SQLException"/>
                    </batch:retryable-exception-classes>
                </batch:chunk>
            </batch:tasklet>
        </batch:step>
    </batch:job>

    <bean id="taskExecutor"
          class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
        <!-- Minimum number of thread pool maintenance threads -->
        <property name="corePoolSize" value="100"/>
        <!-- Idle time allowed by thread pool to maintain threads -->
        <property name="keepAliveSeconds" value="30000"/>
        <!-- Maximum number of thread pool maintenance threads -->
        <property name="maxPoolSize" value="300"/>
        <!-- Buffer queue used by thread pool -->
        <property name="queueCapacity" value="100"/>
    </bean>

6.4: define ItemReader

     <bean id="userReader" class="org.springframework.batch.item.file.FlatFileItemReader">
        <property name="lineMapper" ref="lineMapper"/>
        <property name="resource" value="classpath:message/batch-data-source.csv"/>
    </bean>
 <!-- Map each row to an object -->
    <bean id="lineMapper" class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
        <property name="lineTokenizer">
            <bean class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
                <property name="delimiter" value=","/><!-- Split by some separator -->
                <property name="names" value="id,name" />
            </bean>
        </property>
        <property name="fieldSetMapper"><!-- Map split fields to objects -->
            <bean class="com.hcw.core.batch.UserFieldSetMapper" />
        </property>
    </bean>

6.5: define ItemWriter

     <bean id="userWriter" class="com.hcw.core.batch.MyBatchItemWriter" scope="step">
        <property name="statementId" value="com.hcw.core.batch.dao.UserToMapper.batchInsert"/>
        <property name="sqlSessionFactory" ref="sqlSessionFactoryTo"/>
    </bean>

6.6: define ItemProcessor

    <bean id="userProcessor" class="com.hcw.core.batch.UserItemProcessor"/>

6.7: define data source of jobRepository

   <bean id="dataRepDruidDataSource" class="com.alibaba.druid.pool.DruidDataSource"
          init-method="init" destroy-method="close">
        <property name="url" value="${jdbc.mysql.rep.connection.url}" />
        <property name="username" value="${jdbc.mysql.rep.connection.username}" />
        <property name="password" value="${jdbc.mysql.rep.connection.password}" />
        <property name="filters" value="${jdbc.mysql.rep.connection.filters}" />
        <property name="maxActive" value="${jdbc.mysql.rep.connection.maxActive}" />
        <property name="initialSize" value="${jdbc.mysql.rep.connection.initialSize}" />
        <property name="maxWait" value="${jdbc.mysql.rep.connection.maxWait}" />
        <property name="minIdle" value="${jdbc.mysql.rep.connection.minIdle}" />
        <property name="timeBetweenEvictionRunsMillis"
                  value="${jdbc.mysql.rep.connection.timeBetweenEvictionRunsMillis}" />
        <property name="minEvictableIdleTimeMillis"
                  value="${jdbc.mysql.rep.connection.minEvictableIdleTimeMillis}" />
        <property name="validationQuery"
                  value="${jdbc.mysql.rep.connection.validationQuery}" />
        <property name="testWhileIdle"
                  value="${jdbc.mysql.rep.connection.testWhileIdle}" />
        <property name="testOnBorrow" value="${jdbc.mysql.rep.connection.testOnBorrow}" />
        <property name="testOnReturn" value="${jdbc.mysql.rep.connection.testOnReturn}" />
        <property name="poolPreparedStatements"
                  value="${jdbc.mysql.rep.connection.poolPreparedStatements}" />
        <property name="maxPoolPreparedStatementPerConnectionSize"
                  value="${jdbc.mysql.rep.connection.maxPoolPreparedStatementPerConnectionSize}" />
    </bean>

6.8: start JOB

Start tomcat, open the startup page

Posted by KindredHyperion on Tue, 04 Feb 2020 05:05:24 -0800