提交代码

2026-01-29 18:26:02 +08:00
parent 981b4ecf42
commit 7531b6c466
47 changed files with 7257 additions and 16 deletions
--- a/backend/src/main/java/com/music/service/DedupService.java
+++ b/backend/src/main/java/com/music/service/DedupService.java
@@ -0,0 +1,454 @@
+package com.music.service;
+
+import com.music.dto.ProgressMessage;
+import org.jaudiotagger.audio.AudioFile;
+import org.jaudiotagger.audio.AudioFileIO;
+import org.jaudiotagger.tag.FieldKey;
+import org.jaudiotagger.tag.Tag;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.messaging.simp.SimpMessagingTemplate;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Service;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.*;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * 音乐去重服务（首版：仅实现 MD5 去重）
+ *
+ * 说明：
+ * - 目前实现的是基于 MD5 的二进制级别去重，用于识别完全相同的文件拷贝。
+ * - 元数据匹配与智能评分策略后续迭代中补充。
+ */
+@Service
+public class DedupService {
+
+    private static final Logger log = LoggerFactory.getLogger(DedupService.class);
+
+    private static final Set<String> AUDIO_EXTENSIONS = new HashSet<>(Arrays.asList(
+            "mp3", "flac", "wav", "m4a", "aac", "ogg", "wma", "ape", "aiff", "aif", "wv", "tta", "opus"
+    ));
+
+    /** 元数据匹配允许的时长误差（秒） */
+    private static final int DURATION_TOLERANCE_SECONDS = 5;
+
+    private final SimpMessagingTemplate messagingTemplate;
+    private final ProgressStore progressStore;
+
+    public DedupService(SimpMessagingTemplate messagingTemplate, ProgressStore progressStore) {
+        this.messagingTemplate = messagingTemplate;
+        this.progressStore = progressStore;
+    }
+
+    /**
+     * 异步执行去重任务
+     */
+    @Async
+    public void dedup(String taskId,
+                      String libraryDir,
+                      String trashDir,
+                      boolean useMd5,
+                      boolean useMetadata,
+                      String mode) {
+
+        Path libraryPath = Paths.get(libraryDir);
+        Path trashPath = Paths.get(trashDir);
+
+        try {
+            // 基本校验
+            if (!Files.exists(libraryPath) || !Files.isDirectory(libraryPath)) {
+                sendProgress(taskId, 0, 0, 0, 0,
+                        "音乐库目录不存在或不是目录", true);
+                return;
+            }
+
+            if (!Files.exists(trashPath)) {
+                Files.createDirectories(trashPath);
+            }
+
+            if (!"copy".equalsIgnoreCase(mode) && !"move".equalsIgnoreCase(mode)) {
+                sendProgress(taskId, 0, 0, 0, 0,
+                        "执行模式错误，必须是 copy 或 move", true);
+                return;
+            }
+
+            if (!useMd5 && !useMetadata) {
+                sendProgress(taskId, 0, 0, 0, 0,
+                        "至少需要启用一种去重策略（MD5 或元数据匹配）", true);
+                return;
+            }
+
+            // 收集所有音频文件
+            List<Path> audioFiles = new ArrayList<>();
+            Files.walkFileTree(libraryPath, new SimpleFileVisitor<Path>() {
+                @Override
+                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
+                    if (isAudioFile(file)) {
+                        audioFiles.add(file);
+                    }
+                    return FileVisitResult.CONTINUE;
+                }
+            });
+
+            int total = audioFiles.size();
+            if (total == 0) {
+                sendProgress(taskId, 0, 0, 0, 0,
+                        "未在音乐库中找到音频文件", true);
+                return;
+            }
+
+            AtomicInteger scanned = new AtomicInteger(0);
+            AtomicInteger duplicateGroups = new AtomicInteger(0);
+            AtomicInteger moved = new AtomicInteger(0);
+            AtomicInteger failed = new AtomicInteger(0);
+
+            sendProgress(taskId, total, 0, 0, 0,
+                    "开始扫描音乐库...", false);
+
+            Map<String, List<Path>> md5Groups = new HashMap<>();
+            Map<MetadataKey, List<Path>> metadataGroups = new HashMap<>();
+
+            // 第一阶段：扫描并根据配置构建分组
+            for (Path file : audioFiles) {
+                try {
+                    if (useMd5) {
+                        String md5 = calculateMd5(file);
+                        md5Groups.computeIfAbsent(md5, k -> new ArrayList<>()).add(file);
+                    }
+
+                    if (useMetadata) {
+                        Optional<MetadataKey> keyOpt = readMetadataKey(file);
+                        keyOpt.ifPresent(key -> metadataGroups
+                                .computeIfAbsent(key, k -> new ArrayList<>())
+                                .add(file));
+                    }
+
+                    int currentScanned = scanned.incrementAndGet();
+                    if (currentScanned % 50 == 0) {
+                        sendProgress(taskId, total, currentScanned,
+                                duplicateGroups.get(), moved.get(),
+                                String.format("扫描中（%d/%d）", currentScanned, total),
+                                false);
+                    }
+                } catch (Exception e) {
+                    failed.incrementAndGet();
+                    log.warn("扫描文件失败: {}", file, e);
+                }
+            }
+
+            // 第二阶段：处理 MD5 去重结果（完全二进制重复）
+            if (useMd5) {
+                for (Map.Entry<String, List<Path>> entry : md5Groups.entrySet()) {
+                    List<Path> group = entry.getValue();
+                    if (group.size() <= 1) {
+                        continue;
+                    }
+
+                    duplicateGroups.incrementAndGet();
+
+                    Path keep = chooseBestFileByScore(group);
+                    List<Path> duplicates = new ArrayList<>(group);
+                    duplicates.remove(keep);
+
+                    moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
+                            scanned, duplicateGroups, failed));
+                }
+            }
+
+            if (useMetadata) {
+                // 第三阶段：处理元数据匹配去重结果
+                for (Map.Entry<MetadataKey, List<Path>> entry : metadataGroups.entrySet()) {
+                    List<Path> group = entry.getValue();
+                    if (group.size() <= 1) {
+                        continue;
+                    }
+
+                    duplicateGroups.incrementAndGet();
+
+                    Path keep = chooseBestFileByScore(group);
+                    List<Path> duplicates = new ArrayList<>(group);
+                    duplicates.remove(keep);
+
+                    moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
+                            scanned, duplicateGroups, failed));
+                }
+            }
+
+            sendProgress(taskId, total, scanned.get(),
+                    duplicateGroups.get(), moved.get(),
+                    String.format("任务完成！扫描文件: %d, 重复组: %d, 移动/复制文件: %d",
+                            scanned.get(), duplicateGroups.get(), moved.get()),
+                    true);
+
+        } catch (Exception e) {
+            log.error("去重任务执行失败", e);
+            sendProgress(taskId, 0, 0, 0, 0,
+                    "任务执行失败: " + e.getMessage(), true);
+        }
+    }
+
+    private boolean isAudioFile(Path file) {
+        String name = file.getFileName().toString().toLowerCase();
+        int idx = name.lastIndexOf('.');
+        if (idx <= 0 || idx == name.length() - 1) {
+            return false;
+        }
+        String ext = name.substring(idx + 1);
+        return AUDIO_EXTENSIONS.contains(ext);
+    }
+
+    private String calculateMd5(Path file) throws IOException, NoSuchAlgorithmException {
+        MessageDigest md = MessageDigest.getInstance("MD5");
+        try (InputStream is = Files.newInputStream(file);
+             DigestInputStream dis = new DigestInputStream(is, md)) {
+            byte[] buffer = new byte[8192];
+            // 读取整个文件，结果自动更新到 md 中
+            while (dis.read(buffer) != -1) {
+                // no-op
+            }
+        }
+        byte[] digest = md.digest();
+        StringBuilder sb = new StringBuilder(digest.length * 2);
+        for (byte b : digest) {
+            sb.append(String.format("%02x", b));
+        }
+        return sb.toString();
+    }
+
+    /**
+     * 元数据分组键：艺术家 + 标题 + 专辑 + 时长（按 5 秒误差归一）
+     */
+    private static class MetadataKey {
+        private final String artist;
+        private final String title;
+        private final String album;
+        private final int normalizedDuration;
+
+        private MetadataKey(String artist, String title, String album, int normalizedDuration) {
+            this.artist = artist;
+            this.title = title;
+            this.album = album;
+            this.normalizedDuration = normalizedDuration;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (!(o instanceof MetadataKey)) return false;
+            MetadataKey that = (MetadataKey) o;
+            return normalizedDuration == that.normalizedDuration &&
+                    Objects.equals(artist, that.artist) &&
+                    Objects.equals(title, that.title) &&
+                    Objects.equals(album, that.album);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(artist, title, album, normalizedDuration);
+        }
+    }
+
+    /**
+     * 从音频文件读取用于匹配的元数据键
+     */
+    private Optional<MetadataKey> readMetadataKey(Path file) {
+        try {
+            AudioFile audioFile = AudioFileIO.read(file.toFile());
+            Tag tag = audioFile.getTag();
+            if (tag == null) {
+                return Optional.empty();
+            }
+
+            String artist = normalize(tag.getFirst(FieldKey.ARTIST));
+            String title = normalize(tag.getFirst(FieldKey.TITLE));
+            String album = normalize(tag.getFirst(FieldKey.ALBUM));
+            int lengthSec = audioFile.getAudioHeader().getTrackLength();
+
+            if (artist.isEmpty() || title.isEmpty()) {
+                // 核心标签缺失则跳过元数据分组
+                return Optional.empty();
+            }
+
+            // 将时长按 5 秒误差容忍度归一化
+            int normalizedDuration = lengthSec / DURATION_TOLERANCE_SECONDS;
+
+            return Optional.of(new MetadataKey(artist, title, album, normalizedDuration));
+        } catch (Exception e) {
+            // 标签损坏或不支持的格式时，忽略元数据去重
+            log.debug("读取元数据失败: {}", file, e);
+            return Optional.empty();
+        }
+    }
+
+    private String normalize(String s) {
+        if (s == null) {
+            return "";
+        }
+        return s.trim().toLowerCase();
+    }
+
+    /**
+     * 对一组候选文件进行综合评分，选择最佳保留文件
+     *
+     * 评分策略：
+     * - 格式优先：FLAC > 其他无损 > 有损
+     * - 码率优先：高码率得分更高（如果可获取）
+     * - 文件大小：极小文件减分
+     * - 文件名噪声惩罚：含样本/preview 等噪声词减分
+     */
+    private Path chooseBestFileByScore(List<Path> candidates) {
+        if (candidates.size() == 1) {
+            return candidates.get(0);
+        }
+
+        return candidates.stream()
+                .max(Comparator.comparingDouble(this::scoreFile))
+                .orElse(candidates.get(0));
+    }
+
+    private double scoreFile(Path file) {
+        double score = 0.0;
+
+        String name = file.getFileName().toString().toLowerCase();
+        String ext = "";
+        int idx = name.lastIndexOf('.');
+        if (idx > 0 && idx < name.length() - 1) {
+            ext = name.substring(idx + 1);
+        }
+
+        // 格式权重
+        if ("flac".equals(ext)) {
+            score += 100;
+        } else if (Arrays.asList("wav", "ape", "aiff", "aif", "wv", "tta").contains(ext)) {
+            score += 80;
+        } else {
+            score += 50; // 有损格式
+        }
+
+        // 文件大小（KB）加权：更大的通常音质更好，但极大文件不再线性加分
+        try {
+            long size = Files.size(file);
+            double sizeKB = size / 1024.0;
+            if (sizeKB < 128) {
+                score -= 30; // 极小文件，疑似样本/损坏
+            } else {
+                score += Math.min(sizeKB / 100.0, 40.0);
+            }
+        } catch (IOException e) {
+            // 忽略大小获取失败
+        }
+
+        // 文件名噪声惩罚
+        if (name.contains("sample") || name.contains("preview") || name.contains("demo")) {
+            score -= 20;
+        }
+        if (name.matches(".*\\b(live|remix|karaoke)\\b.*")) {
+            // 某些版本可能不是首选，略微扣分（具体偏好可根据需要调整）
+            score -= 5;
+        }
+
+        // TODO：如有需要，可从音频头中读取比特率，进一步加权
+
+        return score;
+    }
+
+    /**
+     * 将重复文件移动/复制到回收站，并更新统计与进度
+     *
+     * @return 实际成功移动/复制的文件数量
+     */
+    private int handleDuplicates(List<Path> duplicates,
+                                 Path keep,
+                                 Path trashPath,
+                                 String mode,
+                                 String taskId,
+                                 int total,
+                                 AtomicInteger scanned,
+                                 AtomicInteger duplicateGroups,
+                                 AtomicInteger failed) {
+        int movedCount = 0;
+        for (Path dup : duplicates) {
+            try {
+                Path target = resolveTargetFile(trashPath, dup.getFileName().toString());
+                if ("move".equalsIgnoreCase(mode)) {
+                    Files.move(dup, target, StandardCopyOption.REPLACE_EXISTING);
+                } else {
+                    Files.copy(dup, target, StandardCopyOption.REPLACE_EXISTING);
+                }
+                movedCount++;
+                sendProgress(taskId, total, scanned.get(),
+                        duplicateGroups.get(), movedCount,
+                        String.format("重复文件: %s (保留: %s)",
+                                dup.getFileName(), keep.getFileName()),
+                        false);
+            } catch (Exception e) {
+                failed.incrementAndGet();
+                log.warn("处理重复文件失败: {}", dup, e);
+            }
+        }
+        return movedCount;
+    }
+
+    /**
+     * 解析回收站中的目标文件名，处理重名冲突
+     */
+    private Path resolveTargetFile(Path targetDir, String fileName) throws IOException {
+        Path target = targetDir.resolve(fileName);
+        if (!Files.exists(target)) {
+            return target;
+        }
+
+        int lastDot = fileName.lastIndexOf('.');
+        String base = lastDot > 0 ? fileName.substring(0, lastDot) : fileName;
+        String ext = lastDot > 0 ? fileName.substring(lastDot) : "";
+        int n = 1;
+        while (Files.exists(target)) {
+            String next = base + " (" + n + ")" + ext;
+            target = targetDir.resolve(next);
+            n++;
+        }
+        return target;
+    }
+
+    /**
+     * 发送进度消息
+     *
+     * 字段语义（供前端展示用）：
+     * - total：扫描到的音频文件总数
+     * - processed：已扫描文件数
+     * - success：重复组数量
+     * - failed：移动/复制的重复文件数量
+     *
+     * 由于进度字段在不同任务中的含义略有差异，前端可根据 type === "dedup" 做专门映射。
+     */
+    private void sendProgress(String taskId,
+                             int total,
+                             int processed,
+                             int success,
+                             int failed,
+                             String message,
+                             boolean completed) {
+        ProgressMessage pm = new ProgressMessage();
+        pm.setTaskId(taskId);
+        pm.setType("dedup");
+        pm.setTotal(total);
+        pm.setProcessed(processed);
+        pm.setSuccess(success);
+        pm.setFailed(failed);
+        pm.setCurrentFile(null);
+        pm.setMessage(message);
+        pm.setCompleted(completed);
+
+        progressStore.put(pm);
+        messagingTemplate.convertAndSend("/topic/progress/" + taskId, pm);
+    }
+}
+