最近在研究大数据的导入导出,由此想到了在管理系统中,excel导入导出都是常见操作,故我的操作由此开始。
首先是选择适合的工具类,alibaba开源的easyexcel比较好,处理大数据效率较高,故选用此工具,这是easyexcel官网
1.导入maven依赖
<dependency><groupId>com.alibaba</groupId><artifactId>easyexcel</artifactId><version>3.2.0</version></dependency>
2.配置好相应的实体类
packagecom.example.admin.domain;importcom.alibaba.excel.annotation.ExcelIgnore;importcom.alibaba.excel.annotation.ExcelProperty;importcom.alibaba.excel.annotation.format.DateTimeFormat;importcom.alibaba.excel.annotation.write.style.ColumnWidth;importcom.baomidou.mybatisplus.annotation.IdType;importcom.baomidou.mybatisplus.annotation.TableId;importcom.baomidou.mybatisplus.annotation.TableLogic;importcom.baomidou.mybatisplus.annotation.TableName;importcom.example.admin.execl.TimestampStringConverter;importlombok.Data;importjava.io.Serializable;importjava.math.BigDecimal;importjava.math.BigInteger;importjava.sql.Timestamp;@Data@TableName("example")//@TableName中的值对应着表名publicclassExampleimplementsSerializable{privatestaticfinallong serialVersionUID =1L;/**
* 主键
*/@ExcelIgnore@TableId(type =IdType.AUTO)privateLong id;/**
* 姓名
*/@ExcelProperty(value ="姓名", index =0)@ColumnWidth(16)privateString name;/**
* 状态
*/@ExcelIgnore@TableLogicprivateString status;/**
* 创建时间
*/@ExcelProperty(value ="创建时间", index =1, converter =TimestampStringConverter.class)@ColumnWidth(28)privateTimestamp createTime;/**
* 更新时间
*/@ExcelProperty(value ="更新时间", index =2, converter =TimestampStringConverter.class)@ColumnWidth(28)privateTimestamp updateTime;/**
* 金额
*/@ExcelProperty(value ="金额", index =3)@ColumnWidth(16)privateBigDecimal money;/**
* 数量
*/@ExcelProperty(value ="数量", index =4)@ColumnWidth(16)privateBigInteger num;}
Timestamp需要类型转化,具体参考大神博客和大神博客
packagecom.example.admin.execl;importcom.alibaba.excel.converters.Converter;importcom.alibaba.excel.enums.CellDataTypeEnum;importcom.alibaba.excel.metadata.GlobalConfiguration;importcom.alibaba.excel.metadata.data.WriteCellData;importcom.alibaba.excel.metadata.property.ExcelContentProperty;importcom.alibaba.excel.util.DateUtils;importjava.sql.Timestamp;/**
* @author yss
* @date 2023/3/3
*/publicclassTimestampStringConverterimplementsConverter<Timestamp>{@OverridepublicClass<?>supportJavaTypeKey(){returnTimestamp.class;}@OverridepublicCellDataTypeEnumsupportExcelTypeKey(){returnCellDataTypeEnum.STRING;}@OverridepublicWriteCellData<?>convertToExcelData(Timestamp value,ExcelContentProperty contentProperty,GlobalConfiguration globalConfiguration){WriteCellData cellData =newWriteCellData();String cellValue;if(contentProperty ==null|| contentProperty.getDateTimeFormatProperty()==null){
cellValue =DateUtils.format(value.toLocalDateTime(),null, globalConfiguration.getLocale());}else{
cellValue =DateUtils.format(value.toLocalDateTime(), contentProperty.getDateTimeFormatProperty().getFormat(),
globalConfiguration.getLocale());}
cellData.setType(CellDataTypeEnum.STRING);
cellData.setStringValue(cellValue);
cellData.setData(cellValue);return cellData;}}
3.多线程读取数据库数据,存放到一个map集合中,然后遍历集合写入到excel;注意 easyexcel不支持多线程写入。
@Autowired@Qualifier("excelThreadPool")privateThreadPoolTaskExecutor threadPoolTaskExecutor;@TestpublicvoidcontextTest(){// 每页多少条数据int pageSize =50000;// 导出文件路径String fileName ="d:\\test.xlsx";// 查询条件QueryWrapper<Example> queryWrapper =newQueryWrapper<>();Long start =System.currentTimeMillis();Long count = exampleMapper.selectCount(queryWrapper);Long sheetNum = count % pageSize ==0? count / pageSize:count / pageSize +1;// 多线程去读// 1.初始化map容量 防止扩容带来的效率损耗Map<Integer,Page<Example>> pageMap =newConcurrentHashMap<>(Math.toIntExact(sheetNum));CountDownLatch countDownLatch =newCountDownLatch(Math.toIntExact(sheetNum));// 注意 easyexcel 暂时不支持多线程并发写入!!! 详情请看github上issuesfor(int i =0;i< sheetNum;i++){int finali = i;
threadPoolTaskExecutor.submit(()->{Page<Example> page =newPage<>();
page.setCurrent(finali +1);
page.setSize(pageSize);// 获取数据存放到map中Page<Example> selectPage = exampleMapper.selectPage(page,queryWrapper);
pageMap.put(finali,selectPage);// 消耗掉一个
countDownLatch.countDown();});}try{
countDownLatch.await();}catch(InterruptedException e){
e.printStackTrace();}// 写入try(ExcelWriter excelWriter =EasyExcel.write(fileName,Example.class).build()){
pageMap.forEach((k,v)->{
log.info("正在写入{}条数据",pageSize);WriteSheet writeSheet =EasyExcel.writerSheet(k,"第"+(k+1)+"批数据").build();
excelWriter.write(v.getRecords(), writeSheet);// 写完当前数据立刻删除 不删除会产生内存泄漏即无法回收Map中巨大的空间 导致oom
pageMap.remove(k);});
excelWriter.finish();}Long end =System.currentTimeMillis();
log.info("耗时"+(end-start)+"ms");}
线程池操作类
packagecom.example.admin.utils;importorg.springframework.context.annotation.Bean;importorg.springframework.context.annotation.Configuration;importorg.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;importjava.util.concurrent.ThreadPoolExecutor;/**
* @author yss
* @date 2023/3/3
*/@ConfigurationpublicclassConcurrentThreadGlobalConfig{@Bean("excelThreadPool")publicThreadPoolTaskExecutordefaultThreadPool(){ThreadPoolTaskExecutor executor =newThreadPoolTaskExecutor();//核心线程数目
executor.setCorePoolSize(Runtime.getRuntime().availableProcessors()+1);//指定最大线程数
executor.setMaxPoolSize(Runtime.getRuntime().availableProcessors()+1);//队列中最大的数目
executor.setQueueCapacity(650);//线程名称前缀
executor.setThreadNamePrefix("DefaultThreadPool_");//rejection-policy:当pool已经达到max size的时候,如何处理新任务//CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行//对拒绝task的处理策略
executor.setRejectedExecutionHandler(newThreadPoolExecutor.CallerRunsPolicy());//线程空闲后的最大存活时间
executor.setKeepAliveSeconds(60);//加载
executor.initialize();return executor;}}
导出1000w条数据,用时5分钟左右,这个效率还是可以的
这是导出的操作,明天继续研究大数据导入操作!!!
版权归原作者 阿胜yss 所有, 如有侵权,请联系我们删除。