0


easyexcel大数据多线程读取后导出

最近在研究大数据的导入导出,由此想到了在管理系统中,excel导入导出都是常见操作,故我的操作由此开始。
首先是选择适合的工具类,alibaba开源的easyexcel比较好,处理大数据效率较高,故选用此工具,这是easyexcel官网
1.导入maven依赖

<dependency><groupId>com.alibaba</groupId><artifactId>easyexcel</artifactId><version>3.2.0</version></dependency>

2.配置好相应的实体类

packagecom.example.admin.domain;importcom.alibaba.excel.annotation.ExcelIgnore;importcom.alibaba.excel.annotation.ExcelProperty;importcom.alibaba.excel.annotation.format.DateTimeFormat;importcom.alibaba.excel.annotation.write.style.ColumnWidth;importcom.baomidou.mybatisplus.annotation.IdType;importcom.baomidou.mybatisplus.annotation.TableId;importcom.baomidou.mybatisplus.annotation.TableLogic;importcom.baomidou.mybatisplus.annotation.TableName;importcom.example.admin.execl.TimestampStringConverter;importlombok.Data;importjava.io.Serializable;importjava.math.BigDecimal;importjava.math.BigInteger;importjava.sql.Timestamp;@Data@TableName("example")//@TableName中的值对应着表名publicclassExampleimplementsSerializable{privatestaticfinallong serialVersionUID =1L;/**
     * 主键
     */@ExcelIgnore@TableId(type =IdType.AUTO)privateLong id;/**
     * 姓名
     */@ExcelProperty(value ="姓名", index =0)@ColumnWidth(16)privateString name;/**
     * 状态
     */@ExcelIgnore@TableLogicprivateString status;/**
     * 创建时间
     */@ExcelProperty(value ="创建时间", index =1, converter =TimestampStringConverter.class)@ColumnWidth(28)privateTimestamp createTime;/**
     * 更新时间
     */@ExcelProperty(value ="更新时间", index =2, converter =TimestampStringConverter.class)@ColumnWidth(28)privateTimestamp updateTime;/**
     * 金额
     */@ExcelProperty(value ="金额", index =3)@ColumnWidth(16)privateBigDecimal money;/**
     * 数量
     */@ExcelProperty(value ="数量", index =4)@ColumnWidth(16)privateBigInteger num;}

Timestamp需要类型转化,具体参考大神博客和大神博客

packagecom.example.admin.execl;importcom.alibaba.excel.converters.Converter;importcom.alibaba.excel.enums.CellDataTypeEnum;importcom.alibaba.excel.metadata.GlobalConfiguration;importcom.alibaba.excel.metadata.data.WriteCellData;importcom.alibaba.excel.metadata.property.ExcelContentProperty;importcom.alibaba.excel.util.DateUtils;importjava.sql.Timestamp;/**
 * @author yss
 * @date 2023/3/3
 */publicclassTimestampStringConverterimplementsConverter<Timestamp>{@OverridepublicClass<?>supportJavaTypeKey(){returnTimestamp.class;}@OverridepublicCellDataTypeEnumsupportExcelTypeKey(){returnCellDataTypeEnum.STRING;}@OverridepublicWriteCellData<?>convertToExcelData(Timestamp value,ExcelContentProperty contentProperty,GlobalConfiguration globalConfiguration){WriteCellData cellData =newWriteCellData();String cellValue;if(contentProperty ==null|| contentProperty.getDateTimeFormatProperty()==null){
            cellValue =DateUtils.format(value.toLocalDateTime(),null, globalConfiguration.getLocale());}else{
            cellValue =DateUtils.format(value.toLocalDateTime(), contentProperty.getDateTimeFormatProperty().getFormat(),
                    globalConfiguration.getLocale());}
        cellData.setType(CellDataTypeEnum.STRING);
        cellData.setStringValue(cellValue);
        cellData.setData(cellValue);return cellData;}}

3.多线程读取数据库数据,存放到一个map集合中,然后遍历集合写入到excel;注意 easyexcel不支持多线程写入。

@Autowired@Qualifier("excelThreadPool")privateThreadPoolTaskExecutor threadPoolTaskExecutor;@TestpublicvoidcontextTest(){// 每页多少条数据int pageSize =50000;// 导出文件路径String fileName ="d:\\test.xlsx";// 查询条件QueryWrapper<Example> queryWrapper =newQueryWrapper<>();Long start =System.currentTimeMillis();Long count = exampleMapper.selectCount(queryWrapper);Long sheetNum = count % pageSize ==0? count / pageSize:count / pageSize +1;// 多线程去读// 1.初始化map容量 防止扩容带来的效率损耗Map<Integer,Page<Example>> pageMap =newConcurrentHashMap<>(Math.toIntExact(sheetNum));CountDownLatch countDownLatch =newCountDownLatch(Math.toIntExact(sheetNum));// 注意 easyexcel 暂时不支持多线程并发写入!!! 详情请看github上issuesfor(int i =0;i< sheetNum;i++){int finali = i;
            threadPoolTaskExecutor.submit(()->{Page<Example> page =newPage<>();
                page.setCurrent(finali +1);
                page.setSize(pageSize);// 获取数据存放到map中Page<Example> selectPage = exampleMapper.selectPage(page,queryWrapper);
                pageMap.put(finali,selectPage);// 消耗掉一个
                countDownLatch.countDown();});}try{
            countDownLatch.await();}catch(InterruptedException e){
            e.printStackTrace();}// 写入try(ExcelWriter excelWriter =EasyExcel.write(fileName,Example.class).build()){
            pageMap.forEach((k,v)->{
                log.info("正在写入{}条数据",pageSize);WriteSheet writeSheet =EasyExcel.writerSheet(k,"第"+(k+1)+"批数据").build();
                excelWriter.write(v.getRecords(), writeSheet);// 写完当前数据立刻删除  不删除会产生内存泄漏即无法回收Map中巨大的空间 导致oom
                pageMap.remove(k);});
            excelWriter.finish();}Long end =System.currentTimeMillis();
        log.info("耗时"+(end-start)+"ms");}

线程池操作类

packagecom.example.admin.utils;importorg.springframework.context.annotation.Bean;importorg.springframework.context.annotation.Configuration;importorg.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;importjava.util.concurrent.ThreadPoolExecutor;/**
 * @author yss
 * @date 2023/3/3
 */@ConfigurationpublicclassConcurrentThreadGlobalConfig{@Bean("excelThreadPool")publicThreadPoolTaskExecutordefaultThreadPool(){ThreadPoolTaskExecutor executor =newThreadPoolTaskExecutor();//核心线程数目
        executor.setCorePoolSize(Runtime.getRuntime().availableProcessors()+1);//指定最大线程数
        executor.setMaxPoolSize(Runtime.getRuntime().availableProcessors()+1);//队列中最大的数目
        executor.setQueueCapacity(650);//线程名称前缀
        executor.setThreadNamePrefix("DefaultThreadPool_");//rejection-policy:当pool已经达到max size的时候,如何处理新任务//CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行//对拒绝task的处理策略
        executor.setRejectedExecutionHandler(newThreadPoolExecutor.CallerRunsPolicy());//线程空闲后的最大存活时间
        executor.setKeepAliveSeconds(60);//加载
        executor.initialize();return executor;}}

导出1000w条数据,用时5分钟左右,这个效率还是可以的
在这里插入图片描述
在这里插入图片描述
这是导出的操作,明天继续研究大数据导入操作!!!

标签: java excel 开发语言

本文转载自: https://blog.csdn.net/weixin_46591962/article/details/129332287
版权归原作者 阿胜yss 所有, 如有侵权,请联系我们删除。

“easyexcel大数据多线程读取后导出”的评论:

还没有评论