当数据量比较大时,例如数据量达到百万级,传统的一次读取到内存中在写入excel文件的方法便不再适用了,可能会导致内存溢出;而且一次性将数据写入一张sheet工作表也不太好。
但我们可以选择数据分片的方式批量写入多个工作表。
测试数据100w条,写入到Excel表中,分成5个sheet表,每个sheet表20w条。
1.引入依赖
pom.xml
<dependencies>
<!-- web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- mybatis -->
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- alibaba easyexcel -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
<exclusions>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.24</version>
</dependency>
<!-- mybatis-plus -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.5.3</version>
</dependency>
<!-- jdbc -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<!--Mysql依赖包-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<!-- hutool -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>
</dependencies>
2. Controller/Service/Mapper
实体类User
import com.alibaba.excel.annotation.ExcelProperty;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
import java.time.LocalDateTime;
@NoArgsConstructor
@AllArgsConstructor
@Data
@TableName("user")
public class User implements Serializable {
@TableId("id")
@ExcelProperty("id")
private Long id;
@ExcelProperty("username")
private String username;
@ExcelProperty("password")
private String password;
@ExcelProperty("email")
private String email;
@ExcelProperty("phoneNumber")
private String phoneNumber;
@ExcelProperty("createdAt")
private LocalDateTime createdAt;
@ExcelProperty(value = "updatedAt")
private LocalDateTime updatedAt;
}
UserCtrl
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.UUID;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.alibaba.excel.write.style.column.LongestMatchColumnWidthStyleStrategy;
import com.example.excel.config.LocalDateTimeConverter;
import com.example.excel.domain.User;
import com.example.excel.mapper.UserMapper;
import com.example.excel.util.ExcelConstants;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executor;
/**
* @author: hong.jian
* 09-28 11:37
*/
@RestController
@RequiredArgsConstructor
@Slf4j
@RequestMapping("/export")
public class UserCtrl {
private final UserMapper userMapper;
private final Executor exportThreadPoolExecutor;
}
UserService 和 UserServiceImpl
public interface UserService extends IService<User> {
}
@Service
@RequiredArgsConstructor
public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements UserService {
private final UserMapper userMapper;
}
UserMapper
@Mapper
public interface UserMapper extends BaseMapper<User> {
List<User> selfSelectList(@Param("currentPage") int currentPage, @Param("pageSize") int pageSize);
}
UserMapper.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.excel.mapper.UserMapper">
<!-- 使用覆盖索引处理深度分页 -->
<select id="seleSelectList" resultType="com.example.excel.domain.User">
SELECT
u.id,
u.username,
u.PASSWORD,
u.email,
u.phone_number,
u.created_at,
u.updated_at
FROM
USER u,
( SELECT t.id FROM USER t ORDER BY t.id LIMIT #{currentPage}, #{pageSize} ) AS d
WHERE
u.id = d.id;
</select>
</mapper>
LocalDateTime时间字段转换器
import com.alibaba.excel.converters.Converter;
import com.alibaba.excel.metadata.GlobalConfiguration;
import com.alibaba.excel.metadata.data.WriteCellData;
import com.alibaba.excel.metadata.data.ReadCellData;
import com.alibaba.excel.metadata.property.ExcelContentProperty;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
/**
* 自定义LocalDateTime时间字段类型转换器
*/
public class LocalDateTimeConverter implements Converter<LocalDateTime> {
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
@Override
public Class<LocalDateTime> supportJavaTypeKey() {
return LocalDateTime.class;
}
@Override
public WriteCellData<?> convertToExcelData(LocalDateTime value, ExcelContentProperty contentProperty, GlobalConfiguration globalConfiguration) {
if (value != null) {
return new WriteCellData<>(value.format(FORMATTER));
}
return new WriteCellData<>("");
}
@Override
public LocalDateTime convertToJavaData(ReadCellData<?> cellData, ExcelContentProperty contentProperty, GlobalConfiguration globalConfiguration) {
if (cellData.getStringValue() != null) {
return LocalDateTime.parse(cellData.getStringValue(), FORMATTER);
}
return null;
}
}
3. 编写Controller方法
@RequestMapping("/test")
public void export(HttpServletResponse response) throws IOException {
log.info("*********统计查询列表导出开始!**************");
long start = System.currentTimeMillis();
// 文件名
String fileName = String.valueOf(UUID.randomUUID());
OutputStream outputStream = null;
try {
// 总记录数:实际中需要根据查询条件进行统计即可:一共多少条
long totalCount = userMapper.selectCount(null);
// 每一个Sheet存放20w条数据
Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;
// 每次写入的数据量10w,每页查询10W
Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;
// 计算需要的Sheet数量
long sheetNum = totalCount % sheetDataRows == 0 ? (totalCount / sheetDataRows) : (totalCount / sheetDataRows + 1);
// 计算一般情况下每一个Sheet需要写入的次数(一般情况不包含最后一个sheet,因为最后一个sheet不确定会写入多少条数据)
Integer oneSheetWriteCount = sheetDataRows / writeDataRows;
// 计算最后一个sheet需要写入的次数
long lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount : (totalCount % sheetDataRows % writeDataRows == 0 ?
((totalCount % sheetDataRows) / writeDataRows) : ((totalCount % sheetDataRows) / writeDataRows + 1));
outputStream = response.getOutputStream();
// 必须放到循环外,否则会刷新流
ExcelWriter excelWriter = EasyExcel.write(outputStream).build();
// 开始分批查询分次写入
for (int i = 0; i < sheetNum; i++) {
log.info("*********统计查询列表第" + (i + 1) + "页导出开始!**************");
// 循环写入次数: j的自增条件是当不是最后一个Sheet的时候写入次数为正常的每个Sheet写入的次数,如果是最后一个就需要使用计算的次数lastSheetWriteCount
for (int j = 0; j < (i != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {
// 分页查询一次10w
// 设置偏移量
int currentPage = (j + oneSheetWriteCount * i) * writeDataRows;
// 设置每页大小
int pageSize = writeDataRows;
// 使用覆盖索引避免深度分页
List<User> userList = userMapper.selfSelectList(currentPage, pageSize);
if (CollUtil.isEmpty(userList)) { // 记录集合判空
continue;
}
// 写入到excel:
// 这里可以通过设置includeColumnFiledNames、excludeColumnFiledNames导出什么字段,可以动态配置,前端传过来那些列,就导出那些列
WriteSheet writeSheet = EasyExcel.writerSheet(i, "Sheet" + (i + 1))
.head(User.class)
.registerWriteHandler(new LongestMatchColumnWidthStyleStrategy()) // 自动调整列宽
.registerConverter(new LocalDateTimeConverter()) // 时间字段转换器
// .includeColumnFiledNames() // 只导出指定字段集合
// .excludeColumnFiledNames() // 排除指定字段集合
.build();
excelWriter.write(userList, writeSheet);
}
log.info("*********统计查询列表第" + (i + 1) + "页导出结束!**************");
}
// 下载EXCEL,返回给前端stream流
response.setContentType("application/octet-stream");
response.setCharacterEncoding("utf-8");
response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
excelWriter.finish();
outputStream.flush();
log.info("*********统计查询列表导出结束!**************");
log.info("耗时:" + (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
} finally {
if (outputStream != null) {
outputStream.close();
}
}
}
4. 测试
100w条数据导出约27s,还是有点慢。
9-28 17:49:27.847 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表导出开始!**************
2024-09-28 17:49:28.744 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第1页导出开始!**************
2024-09-28 17:49:32.857 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第1页导出结束!**************
2024-09-28 17:49:32.857 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第2页导出开始!**************
2024-09-28 17:49:36.955 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第2页导出结束!**************
2024-09-28 17:49:36.955 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第3页导出开始!**************
2024-09-28 17:49:41.007 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第3页导出结束!**************
2024-09-28 17:49:41.007 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第4页导出开始!**************
2024-09-28 17:49:45.109 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第4页导出结束!**************
2024-09-28 17:49:45.109 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第5页导出开始!**************
2024-09-28 17:49:49.272 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表第5页导出结束!**************
2024-09-28 17:49:55.630 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表导出结束!**************
2024-09-28 17:49:55.631 INFO 120052 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : 耗时:27784
5. 思考
引入线程池
@Bean
public Executor exportThreadPoolExecutor(){
return new ThreadPoolExecutor(
5,
10,
10,
TimeUnit.SECONDS,
new ArrayBlockingQueue<>(100),
new ThreadPoolExecutor.CallerRunsPolicy()
);
}
并在UserController中注入
使用线程池优化代码
@RequestMapping("/test2")
public void export2(HttpServletResponse response) throws IOException {
log.info("*********统计查询列表导出开始!**************");
long start = System.currentTimeMillis();
String fileName = String.valueOf(UUID.randomUUID());
OutputStream outputStream = null;
try {
long totalCount = userMapper.selectCount(null);
Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;
Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;
long sheetNum = (totalCount + sheetDataRows - 1) / sheetDataRows;
Integer oneSheetWriteCount = sheetDataRows / writeDataRows;
long lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount :
(totalCount % sheetDataRows + writeDataRows - 1) / writeDataRows;
outputStream = response.getOutputStream();
ExcelWriter excelWriter = EasyExcel.write(outputStream).build();
// 信号枪,配合多线程实现并发编程
CountDownLatch latch = new CountDownLatch((int) sheetNum);
for (int i = 0; i < sheetNum; i++) {
int finalI = i;
CompletableFuture.runAsync(() -> {
log.info("*********统计查询列表第" + (finalI + 1) + "页导出开始!**************");
WriteSheet writeSheet = EasyExcel.writerSheet(finalI, "Sheet" + (finalI + 1))
.head(User.class)
.registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
.registerConverter(new LocalDateTimeConverter())
.build();
try {
for (int j = 0; j < (finalI != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {
int currentPage = (j + oneSheetWriteCount * finalI) * writeDataRows;
List<User> userList = userMapper.selfSelectList(currentPage, writeDataRows);
if (CollUtil.isEmpty(userList)) {
continue;
}
// 加锁实现互斥写入,确保线程安全
synchronized (excelWriter) {
excelWriter.write(userList, writeSheet);
}
}
} finally {
latch.countDown();
log.info("*********统计查询列表第" + (finalI + 1) + "页导出结束!**************");
}
}, exportThreadPoolExecutor); // 尝试引入多线程
}
latch.await();
response.setContentType("application/octet-stream");
response.setCharacterEncoding("utf-8");
response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
excelWriter.finish();
outputStream.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (outputStream != null) {
outputStream.close();
}
}
log.info("*********统计查询列表导出结束!**************");
log.info("耗时:" + (System.currentTimeMillis() - start));
}
再次测试,耗时约20s
2024-09-28 18:05:54.239 INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表导出开始!**************
2024-09-28 18:05:55.132 INFO 53952 --- [pool-1-thread-2] com.example.excel.controller.UserCtrl : *********统计查询列表第2页导出开始!**************
2024-09-28 18:05:55.132 INFO 53952 --- [pool-1-thread-3] com.example.excel.controller.UserCtrl : *********统计查询列表第3页导出开始!**************
2024-09-28 18:05:55.132 INFO 53952 --- [pool-1-thread-4] com.example.excel.controller.UserCtrl : *********统计查询列表第4页导出开始!**************
2024-09-28 18:05:55.132 INFO 53952 --- [pool-1-thread-5] com.example.excel.controller.UserCtrl : *********统计查询列表第5页导出开始!**************
2024-09-28 18:05:55.132 INFO 53952 --- [pool-1-thread-1] com.example.excel.controller.UserCtrl : *********统计查询列表第1页导出开始!**************
2024-09-28 18:06:04.341 INFO 53952 --- [pool-1-thread-3] com.example.excel.controller.UserCtrl : *********统计查询列表第3页导出结束!**************
2024-09-28 18:06:05.483 INFO 53952 --- [pool-1-thread-4] com.example.excel.controller.UserCtrl : *********统计查询列表第4页导出结束!**************
2024-09-28 18:06:06.591 INFO 53952 --- [pool-1-thread-5] com.example.excel.controller.UserCtrl : *********统计查询列表第5页导出结束!**************
2024-09-28 18:06:07.795 INFO 53952 --- [pool-1-thread-1] com.example.excel.controller.UserCtrl : *********统计查询列表第1页导出结束!**************
2024-09-28 18:06:08.871 INFO 53952 --- [pool-1-thread-2] com.example.excel.controller.UserCtrl : *********统计查询列表第2页导出结束!**************
2024-09-28 18:06:14.907 INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : *********统计查询列表导出结束!**************
2024-09-28 18:06:14.907 INFO 53952 --- [nio-8080-exec-1] com.example.excel.controller.UserCtrl : 耗时:20667
可以发现,性能有提升,但提升不是很明显。我个人理解是为了避免多个线程同时写入excel产生的线程安全问题,采用了加锁的方式,确保线程间使用excelwriter写入excel文件的操作是互斥的,这导致性能收到了影响。