java 批量下载doc\excle\pdf

发布于:2025-03-25 ⋅ 阅读:(27) ⋅ 点赞:(0)

指定图片集合

下载到指定文件夹

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Arrays;
import java.util.List;

public class OfficeFileDownloader {

	/**
	 * 需要下载的Office文档URL列表
	 */
	private static final List<String> URL_LIST = Arrays.asList(
		"https://test.oss-cn-zhangjiakou.aliyuncs.com/template/20250113/1877528575184224257_2.pdf",
		"https://test.oss-cn-zhangjiakou.aliyuncs.com/template/20250316/1901150106568462338_2.pdf"
	);

	public static void main(String[] args) {
		// 文件保存目录
		String saveDir = "E:\\downloads/";

		// 创建保存目录
		File dir = new File(saveDir);
		if (!dir.exists() && !dir.mkdirs()) {
			System.err.println("目录创建失败: " + saveDir);
			return;
		}

		// 批量下载文件
		URL_LIST.forEach(url -> {
			try {
				String fileName = extractFileName(url);
				if (isValidFileType(fileName)) {
					String savePath = saveDir + fileName;
					downloadFile(url, savePath);
					System.out.println("√ 下载成功: " + fileName);
				} else {
					System.err.println("× 不支持的文件类型: " + fileName);
				}
			} catch (Exception e) {
				System.err.println("× 下载失败 [" + url + "]: " + e.getMessage());
			}
		});
	}

	/**
	 * 文件下载核心方法
	 *
	 * @param fileUrl
	 * @param savePath
	 * @throws IOException
	 */
	private static void downloadFile(String fileUrl, String savePath) throws IOException {
		URL url = new URL(fileUrl);
		HttpURLConnection connection = (HttpURLConnection) url.openConnection();

		try {
			// 配置网络参数
			connection.setRequestMethod("GET");
			connection.setRequestProperty("User-Agent", "Mozilla/5.0");
			connection.setConnectTimeout(10000); // 10秒连接超时
			connection.setReadTimeout(30000);   // 30秒读取超时

			// 验证HTTP响应
			int statusCode = connection.getResponseCode();
			if (statusCode != HttpURLConnection.HTTP_OK) {
				throw new IOException("HTTP " + statusCode + " - " + connection.getResponseMessage());
			}

			// 类型校验(仅记录警告)
			String contentType = connection.getContentType();
			validateContentType(contentType, savePath);

			// 流式下载文件
			try (InputStream in = connection.getInputStream();
				 FileOutputStream out = new FileOutputStream(savePath)) {
				byte[] buffer = new byte[8192];
				int bytesRead;
				while ((bytesRead = in.read(buffer)) != -1) {
					out.write(buffer, 0, bytesRead);
				}
			}
		} finally {
			connection.disconnect();
		}
	}

	/**
	 * 内容类型校验方法
	 *
	 * @param contentType
	 * @param savePath
	 */
	private static void validateContentType(String contentType, String savePath) {
		if (contentType == null) return;

		String ext = getFileExtension(savePath).toLowerCase();
		String mimeType = contentType.split(";")[0].trim().toLowerCase();

		boolean isValid = switch (ext) {
			case "doc" -> mimeType.equals("application/msword");
			case "docx" -> mimeType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
			case "xls" -> mimeType.equals("application/vnd.ms-excel");
			case "xlsx" -> mimeType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
			case "pdf" -> mimeType.equals("application/pdf");
			default -> true;
		};

		if (!isValid) {
			System.out.println("⚠ 类型警告: " + savePath +
				"\n   预期类型: " + getExpectedMimeType(ext) +
				"\n   实际类型: " + contentType);
		}
	}

	/**
	 * 获取预期MIME类型
	 *
	 * @param ext
	 * @return
	 */
	private static String getExpectedMimeType(String ext) {
		return switch (ext.toLowerCase()) {
			case "doc" -> "application/msword";
			case "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
			case "xls" -> "application/vnd.ms-excel";
			case "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
			case "pdf" -> "application/pdf";
			default -> "unknown";
		};
	}

	/**
	 * 校验文件扩展名
	 */
	private static boolean isValidFileType(String fileName) {
		String ext = getFileExtension(fileName).toLowerCase();
		return ext.matches("docx?|xlsx?|pdf");
	}

	/**
	 * 从URL提取文件名
	 */
	private static String extractFileName(String fileUrl) {
		// 清理URL参数和锚点
		String cleanUrl = fileUrl.split("[?#]")[0];

		// 获取文件名部分
		int lastSlash = cleanUrl.lastIndexOf('/');
		if (lastSlash == -1 || lastSlash == cleanUrl.length() - 1) {
			throw new IllegalArgumentException("无效的URL格式: " + fileUrl);
		}

		return cleanUrl.substring(lastSlash + 1);
	}

	/**
	 * 获取文件扩展名
	 */
	private static String getFileExtension(String fileName) {
		int dotIndex = fileName.lastIndexOf('.');
		return (dotIndex == -1 || dotIndex == fileName.length() - 1) ?
			"" : fileName.substring(dotIndex + 1);
	}
}