package com.music.service; import com.music.dto.ProgressMessage; import org.jaudiotagger.audio.AudioFile; import org.jaudiotagger.audio.AudioFileIO; import org.jaudiotagger.tag.FieldKey; import org.jaudiotagger.tag.Tag; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.messaging.simp.SimpMessagingTemplate; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import java.io.IOException; import java.io.InputStream; import java.nio.file.*; import java.nio.file.attribute.BasicFileAttributes; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; /** * 音乐去重服务(首版:仅实现 MD5 去重) * * 说明: * - 目前实现的是基于 MD5 的二进制级别去重,用于识别完全相同的文件拷贝。 * - 元数据匹配与智能评分策略后续迭代中补充。 */ @Service public class DedupService { private static final Logger log = LoggerFactory.getLogger(DedupService.class); private static final Set AUDIO_EXTENSIONS = new HashSet<>(Arrays.asList( "mp3", "flac", "wav", "m4a", "aac", "ogg", "wma", "ape", "aiff", "aif", "wv", "tta", "opus" )); /** 元数据匹配允许的时长误差(秒) */ private static final int DURATION_TOLERANCE_SECONDS = 5; private final SimpMessagingTemplate messagingTemplate; private final ProgressStore progressStore; public DedupService(SimpMessagingTemplate messagingTemplate, ProgressStore progressStore) { this.messagingTemplate = messagingTemplate; this.progressStore = progressStore; } /** * 异步执行去重任务 */ @Async public void dedup(String taskId, String libraryDir, String trashDir, boolean useMd5, boolean useMetadata, String mode) { Path libraryPath = Paths.get(libraryDir); Path trashPath = Paths.get(trashDir); try { // 基本校验 if (!Files.exists(libraryPath) || !Files.isDirectory(libraryPath)) { sendProgress(taskId, 0, 0, 0, 0, "音乐库目录不存在或不是目录", true); return; } if (!Files.exists(trashPath)) { Files.createDirectories(trashPath); } if (!"copy".equalsIgnoreCase(mode) && !"move".equalsIgnoreCase(mode)) { sendProgress(taskId, 0, 0, 0, 0, "执行模式错误,必须是 copy 或 move", true); return; } if (!useMd5 && !useMetadata) { sendProgress(taskId, 0, 0, 0, 0, "至少需要启用一种去重策略(MD5 或元数据匹配)", true); return; } // 收集所有音频文件 List audioFiles = new ArrayList<>(); Files.walkFileTree(libraryPath, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { if (isAudioFile(file)) { audioFiles.add(file); } return FileVisitResult.CONTINUE; } }); int total = audioFiles.size(); if (total == 0) { sendProgress(taskId, 0, 0, 0, 0, "未在音乐库中找到音频文件", true); return; } AtomicInteger scanned = new AtomicInteger(0); AtomicInteger duplicateGroups = new AtomicInteger(0); AtomicInteger moved = new AtomicInteger(0); AtomicInteger failed = new AtomicInteger(0); sendProgress(taskId, total, 0, 0, 0, "开始扫描音乐库...", false); Map> md5Groups = new HashMap<>(); Map> metadataGroups = new HashMap<>(); // 第一阶段:扫描并根据配置构建分组 for (Path file : audioFiles) { try { if (useMd5) { String md5 = calculateMd5(file); md5Groups.computeIfAbsent(md5, k -> new ArrayList<>()).add(file); } if (useMetadata) { Optional keyOpt = readMetadataKey(file); keyOpt.ifPresent(key -> metadataGroups .computeIfAbsent(key, k -> new ArrayList<>()) .add(file)); } int currentScanned = scanned.incrementAndGet(); if (currentScanned % 50 == 0) { sendProgress(taskId, total, currentScanned, duplicateGroups.get(), moved.get(), String.format("扫描中(%d/%d)", currentScanned, total), false); } } catch (Exception e) { failed.incrementAndGet(); log.warn("扫描文件失败: {}", file, e); } } // 第二阶段:处理 MD5 去重结果(完全二进制重复) if (useMd5) { for (Map.Entry> entry : md5Groups.entrySet()) { List group = entry.getValue(); if (group.size() <= 1) { continue; } duplicateGroups.incrementAndGet(); Path keep = chooseBestFileByScore(group); List duplicates = new ArrayList<>(group); duplicates.remove(keep); moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total, scanned, duplicateGroups, failed)); } } if (useMetadata) { // 第三阶段:处理元数据匹配去重结果 for (Map.Entry> entry : metadataGroups.entrySet()) { List group = entry.getValue(); if (group.size() <= 1) { continue; } duplicateGroups.incrementAndGet(); Path keep = chooseBestFileByScore(group); List duplicates = new ArrayList<>(group); duplicates.remove(keep); moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total, scanned, duplicateGroups, failed)); } } sendProgress(taskId, total, scanned.get(), duplicateGroups.get(), moved.get(), String.format("任务完成!扫描文件: %d, 重复组: %d, 移动/复制文件: %d", scanned.get(), duplicateGroups.get(), moved.get()), true); } catch (Exception e) { log.error("去重任务执行失败", e); sendProgress(taskId, 0, 0, 0, 0, "任务执行失败: " + e.getMessage(), true); } } private boolean isAudioFile(Path file) { String name = file.getFileName().toString().toLowerCase(); int idx = name.lastIndexOf('.'); if (idx <= 0 || idx == name.length() - 1) { return false; } String ext = name.substring(idx + 1); return AUDIO_EXTENSIONS.contains(ext); } private String calculateMd5(Path file) throws IOException, NoSuchAlgorithmException { MessageDigest md = MessageDigest.getInstance("MD5"); try (InputStream is = Files.newInputStream(file); DigestInputStream dis = new DigestInputStream(is, md)) { byte[] buffer = new byte[8192]; // 读取整个文件,结果自动更新到 md 中 while (dis.read(buffer) != -1) { // no-op } } byte[] digest = md.digest(); StringBuilder sb = new StringBuilder(digest.length * 2); for (byte b : digest) { sb.append(String.format("%02x", b)); } return sb.toString(); } /** * 元数据分组键:艺术家 + 标题 + 专辑 + 时长(按 5 秒误差归一) */ private static class MetadataKey { private final String artist; private final String title; private final String album; private final int normalizedDuration; private MetadataKey(String artist, String title, String album, int normalizedDuration) { this.artist = artist; this.title = title; this.album = album; this.normalizedDuration = normalizedDuration; } @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof MetadataKey)) return false; MetadataKey that = (MetadataKey) o; return normalizedDuration == that.normalizedDuration && Objects.equals(artist, that.artist) && Objects.equals(title, that.title) && Objects.equals(album, that.album); } @Override public int hashCode() { return Objects.hash(artist, title, album, normalizedDuration); } } /** * 从音频文件读取用于匹配的元数据键 */ private Optional readMetadataKey(Path file) { try { AudioFile audioFile = AudioFileIO.read(file.toFile()); Tag tag = audioFile.getTag(); if (tag == null) { return Optional.empty(); } String artist = normalize(tag.getFirst(FieldKey.ARTIST)); String title = normalize(tag.getFirst(FieldKey.TITLE)); String album = normalize(tag.getFirst(FieldKey.ALBUM)); int lengthSec = audioFile.getAudioHeader().getTrackLength(); if (artist.isEmpty() || title.isEmpty()) { // 核心标签缺失则跳过元数据分组 return Optional.empty(); } // 将时长按 5 秒误差容忍度归一化 int normalizedDuration = lengthSec / DURATION_TOLERANCE_SECONDS; return Optional.of(new MetadataKey(artist, title, album, normalizedDuration)); } catch (Exception e) { // 标签损坏或不支持的格式时,忽略元数据去重 log.debug("读取元数据失败: {}", file, e); return Optional.empty(); } } private String normalize(String s) { if (s == null) { return ""; } return s.trim().toLowerCase(); } /** * 对一组候选文件进行综合评分,选择最佳保留文件 * * 评分策略: * - 格式优先:FLAC > 其他无损 > 有损 * - 码率优先:高码率得分更高(如果可获取) * - 文件大小:极小文件减分 * - 文件名噪声惩罚:含样本/preview 等噪声词减分 */ private Path chooseBestFileByScore(List candidates) { if (candidates.size() == 1) { return candidates.get(0); } return candidates.stream() .max(Comparator.comparingDouble(this::scoreFile)) .orElse(candidates.get(0)); } private double scoreFile(Path file) { double score = 0.0; String name = file.getFileName().toString().toLowerCase(); String ext = ""; int idx = name.lastIndexOf('.'); if (idx > 0 && idx < name.length() - 1) { ext = name.substring(idx + 1); } // 格式权重 if ("flac".equals(ext)) { score += 100; } else if (Arrays.asList("wav", "ape", "aiff", "aif", "wv", "tta").contains(ext)) { score += 80; } else { score += 50; // 有损格式 } // 文件大小(KB)加权:更大的通常音质更好,但极大文件不再线性加分 try { long size = Files.size(file); double sizeKB = size / 1024.0; if (sizeKB < 128) { score -= 30; // 极小文件,疑似样本/损坏 } else { score += Math.min(sizeKB / 100.0, 40.0); } } catch (IOException e) { // 忽略大小获取失败 } // 文件名噪声惩罚 if (name.contains("sample") || name.contains("preview") || name.contains("demo")) { score -= 20; } if (name.matches(".*\\b(live|remix|karaoke)\\b.*")) { // 某些版本可能不是首选,略微扣分(具体偏好可根据需要调整) score -= 5; } // TODO:如有需要,可从音频头中读取比特率,进一步加权 return score; } /** * 将重复文件移动/复制到回收站,并更新统计与进度 * * @return 实际成功移动/复制的文件数量 */ private int handleDuplicates(List duplicates, Path keep, Path trashPath, String mode, String taskId, int total, AtomicInteger scanned, AtomicInteger duplicateGroups, AtomicInteger failed) { int movedCount = 0; for (Path dup : duplicates) { try { Path target = resolveTargetFile(trashPath, dup.getFileName().toString()); if ("move".equalsIgnoreCase(mode)) { Files.move(dup, target, StandardCopyOption.REPLACE_EXISTING); } else { Files.copy(dup, target, StandardCopyOption.REPLACE_EXISTING); } movedCount++; sendProgress(taskId, total, scanned.get(), duplicateGroups.get(), movedCount, String.format("重复文件: %s (保留: %s)", dup.getFileName(), keep.getFileName()), false); } catch (Exception e) { failed.incrementAndGet(); log.warn("处理重复文件失败: {}", dup, e); } } return movedCount; } /** * 解析回收站中的目标文件名,处理重名冲突 */ private Path resolveTargetFile(Path targetDir, String fileName) throws IOException { Path target = targetDir.resolve(fileName); if (!Files.exists(target)) { return target; } int lastDot = fileName.lastIndexOf('.'); String base = lastDot > 0 ? fileName.substring(0, lastDot) : fileName; String ext = lastDot > 0 ? fileName.substring(lastDot) : ""; int n = 1; while (Files.exists(target)) { String next = base + " (" + n + ")" + ext; target = targetDir.resolve(next); n++; } return target; } /** * 发送进度消息 * * 字段语义(供前端展示用): * - total:扫描到的音频文件总数 * - processed:已扫描文件数 * - success:重复组数量 * - failed:移动/复制的重复文件数量 * * 由于进度字段在不同任务中的含义略有差异,前端可根据 type === "dedup" 做专门映射。 */ private void sendProgress(String taskId, int total, int processed, int success, int failed, String message, boolean completed) { ProgressMessage pm = new ProgressMessage(); pm.setTaskId(taskId); pm.setType("dedup"); pm.setTotal(total); pm.setProcessed(processed); pm.setSuccess(success); pm.setFailed(failed); pm.setCurrentFile(null); pm.setMessage(message); pm.setCompleted(completed); progressStore.put(pm); messagingTemplate.convertAndSend("/topic/progress/" + taskId, pm); } }