MyTool/backend/src/main/java/com/music/service/DedupService.java

package com.music.service;

import com.music.dto.ProgressMessage;
import org.jaudiotagger.audio.AudioFile;
import org.jaudiotagger.audio.AudioFileIO;
import org.jaudiotagger.tag.FieldKey;
import org.jaudiotagger.tag.Tag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.messaging.simp.SimpMessagingTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * 音乐去重服务（首版：仅实现 MD5 去重）
 *
 * 说明：
 * - 目前实现的是基于 MD5 的二进制级别去重，用于识别完全相同的文件拷贝。
 * - 元数据匹配与智能评分策略后续迭代中补充。
 */
@Service
public class DedupService {

    private static final Logger log = LoggerFactory.getLogger(DedupService.class);

    private static final Set<String> AUDIO_EXTENSIONS = new HashSet<>(Arrays.asList(
            "mp3", "flac", "wav", "m4a", "aac", "ogg", "wma", "ape", "aiff", "aif", "wv", "tta", "opus"
    ));

    /** 元数据匹配允许的时长误差（秒） */
    private static final int DURATION_TOLERANCE_SECONDS = 5;

    private final SimpMessagingTemplate messagingTemplate;
    private final ProgressStore progressStore;

    public DedupService(SimpMessagingTemplate messagingTemplate, ProgressStore progressStore) {
        this.messagingTemplate = messagingTemplate;
        this.progressStore = progressStore;
    }

    /**
     * 异步执行去重任务
     */
    @Async
    public void dedup(String taskId,
                      String libraryDir,
                      String trashDir,
                      boolean useMd5,
                      boolean useMetadata,
                      String mode) {

        Path libraryPath = Paths.get(libraryDir);
        Path trashPath = Paths.get(trashDir);

        try {
            // 基本校验
            if (!Files.exists(libraryPath) || !Files.isDirectory(libraryPath)) {
                sendProgress(taskId, 0, 0, 0, 0,
                        "音乐库目录不存在或不是目录", true);
                return;
            }

            if (!Files.exists(trashPath)) {
                Files.createDirectories(trashPath);
            }

            if (!"copy".equalsIgnoreCase(mode) && !"move".equalsIgnoreCase(mode)) {
                sendProgress(taskId, 0, 0, 0, 0,
                        "执行模式错误，必须是 copy 或 move", true);
                return;
            }

            if (!useMd5 && !useMetadata) {
                sendProgress(taskId, 0, 0, 0, 0,
                        "至少需要启用一种去重策略（MD5 或元数据匹配）", true);
                return;
            }

            // 收集所有音频文件
            List<Path> audioFiles = new ArrayList<>();
            Files.walkFileTree(libraryPath, new SimpleFileVisitor<Path>() {
                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
                    if (isAudioFile(file)) {
                        audioFiles.add(file);
                    }
                    return FileVisitResult.CONTINUE;
                }
            });

            int total = audioFiles.size();
            if (total == 0) {
                sendProgress(taskId, 0, 0, 0, 0,
                        "未在音乐库中找到音频文件", true);
                return;
            }

            AtomicInteger scanned = new AtomicInteger(0);
            AtomicInteger duplicateGroups = new AtomicInteger(0);
            AtomicInteger moved = new AtomicInteger(0);
            AtomicInteger failed = new AtomicInteger(0);

            sendProgress(taskId, total, 0, 0, 0,
                    "开始扫描音乐库...", false);

            Map<String, List<Path>> md5Groups = new HashMap<>();
            Map<MetadataKey, List<Path>> metadataGroups = new HashMap<>();

            // 第一阶段：扫描并根据配置构建分组
            for (Path file : audioFiles) {
                try {
                    if (useMd5) {
                        String md5 = calculateMd5(file);
                        md5Groups.computeIfAbsent(md5, k -> new ArrayList<>()).add(file);
                    }

                    if (useMetadata) {
                        Optional<MetadataKey> keyOpt = readMetadataKey(file);
                        keyOpt.ifPresent(key -> metadataGroups
                                .computeIfAbsent(key, k -> new ArrayList<>())
                                .add(file));
                    }

                    int currentScanned = scanned.incrementAndGet();
                    if (currentScanned % 50 == 0) {
                        sendProgress(taskId, total, currentScanned,
                                duplicateGroups.get(), moved.get(),
                                String.format("扫描中（%d/%d）", currentScanned, total),
                                false);
                    }
                } catch (Exception e) {
                    failed.incrementAndGet();
                    log.warn("扫描文件失败: {}", file, e);
                }
            }

            // 第二阶段：处理 MD5 去重结果（完全二进制重复）
            if (useMd5) {
                for (Map.Entry<String, List<Path>> entry : md5Groups.entrySet()) {
                    List<Path> group = entry.getValue();
                    if (group.size() <= 1) {
                        continue;
                    }

                    duplicateGroups.incrementAndGet();

                    Path keep = chooseBestFileByScore(group);
                    List<Path> duplicates = new ArrayList<>(group);
                    duplicates.remove(keep);

                    moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
                            scanned, duplicateGroups, failed));
                }
            }

            if (useMetadata) {
                // 第三阶段：处理元数据匹配去重结果
                for (Map.Entry<MetadataKey, List<Path>> entry : metadataGroups.entrySet()) {
                    List<Path> group = entry.getValue();
                    if (group.size() <= 1) {
                        continue;
                    }

                    duplicateGroups.incrementAndGet();

                    Path keep = chooseBestFileByScore(group);
                    List<Path> duplicates = new ArrayList<>(group);
                    duplicates.remove(keep);

                    moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
                            scanned, duplicateGroups, failed));
                }
            }

            sendProgress(taskId, total, scanned.get(),
                    duplicateGroups.get(), moved.get(),
                    String.format("任务完成！扫描文件: %d, 重复组: %d, 移动/复制文件: %d",
                            scanned.get(), duplicateGroups.get(), moved.get()),
                    true);

        } catch (Exception e) {
            log.error("去重任务执行失败", e);
            sendProgress(taskId, 0, 0, 0, 0,
                    "任务执行失败: " + e.getMessage(), true);
        }
    }

    private boolean isAudioFile(Path file) {
        String name = file.getFileName().toString().toLowerCase();
        int idx = name.lastIndexOf('.');
        if (idx <= 0 || idx == name.length() - 1) {
            return false;
        }
        String ext = name.substring(idx + 1);
        return AUDIO_EXTENSIONS.contains(ext);
    }

    private String calculateMd5(Path file) throws IOException, NoSuchAlgorithmException {
        MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = Files.newInputStream(file);
             DigestInputStream dis = new DigestInputStream(is, md)) {
            byte[] buffer = new byte[8192];
            // 读取整个文件，结果自动更新到 md 中
            while (dis.read(buffer) != -1) {
                // no-op
            }
        }
        byte[] digest = md.digest();
        StringBuilder sb = new StringBuilder(digest.length * 2);
        for (byte b : digest) {
            sb.append(String.format("%02x", b));
        }
        return sb.toString();
    }

    /**
     * 元数据分组键：艺术家 + 标题 + 专辑 + 时长（按 5 秒误差归一）
     */
    private static class MetadataKey {
        private final String artist;
        private final String title;
        private final String album;
        private final int normalizedDuration;

        private MetadataKey(String artist, String title, String album, int normalizedDuration) {
            this.artist = artist;
            this.title = title;
            this.album = album;
            this.normalizedDuration = normalizedDuration;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (!(o instanceof MetadataKey)) return false;
            MetadataKey that = (MetadataKey) o;
            return normalizedDuration == that.normalizedDuration &&
                    Objects.equals(artist, that.artist) &&
                    Objects.equals(title, that.title) &&
                    Objects.equals(album, that.album);
        }

        @Override
        public int hashCode() {
            return Objects.hash(artist, title, album, normalizedDuration);
        }
    }

    /**
     * 从音频文件读取用于匹配的元数据键
     */
    private Optional<MetadataKey> readMetadataKey(Path file) {
        try {
            AudioFile audioFile = AudioFileIO.read(file.toFile());
            Tag tag = audioFile.getTag();
            if (tag == null) {
                return Optional.empty();
            }

            String artist = normalize(tag.getFirst(FieldKey.ARTIST));
            String title = normalize(tag.getFirst(FieldKey.TITLE));
            String album = normalize(tag.getFirst(FieldKey.ALBUM));
            int lengthSec = audioFile.getAudioHeader().getTrackLength();

            if (artist.isEmpty() || title.isEmpty()) {
                // 核心标签缺失则跳过元数据分组
                return Optional.empty();
            }

            // 将时长按 5 秒误差容忍度归一化
            int normalizedDuration = lengthSec / DURATION_TOLERANCE_SECONDS;

            return Optional.of(new MetadataKey(artist, title, album, normalizedDuration));
        } catch (Exception e) {
            // 标签损坏或不支持的格式时，忽略元数据去重
            log.debug("读取元数据失败: {}", file, e);
            return Optional.empty();
        }
    }

    private String normalize(String s) {
        if (s == null) {
            return "";
        }
        return s.trim().toLowerCase();
    }

    /**
     * 对一组候选文件进行综合评分，选择最佳保留文件
     *
     * 评分策略：
     * - 格式优先：FLAC > 其他无损 > 有损
     * - 码率优先：高码率得分更高（如果可获取）
     * - 文件大小：极小文件减分
     * - 文件名噪声惩罚：含样本/preview 等噪声词减分
     */
    private Path chooseBestFileByScore(List<Path> candidates) {
        if (candidates.size() == 1) {
            return candidates.get(0);
        }

        return candidates.stream()
                .max(Comparator.comparingDouble(this::scoreFile))
                .orElse(candidates.get(0));
    }

    private double scoreFile(Path file) {
        double score = 0.0;

        String name = file.getFileName().toString().toLowerCase();
        String ext = "";
        int idx = name.lastIndexOf('.');
        if (idx > 0 && idx < name.length() - 1) {
            ext = name.substring(idx + 1);
        }

        // 格式权重
        if ("flac".equals(ext)) {
            score += 100;
        } else if (Arrays.asList("wav", "ape", "aiff", "aif", "wv", "tta").contains(ext)) {
            score += 80;
        } else {
            score += 50; // 有损格式
        }

        // 文件大小（KB）加权：更大的通常音质更好，但极大文件不再线性加分
        try {
            long size = Files.size(file);
            double sizeKB = size / 1024.0;
            if (sizeKB < 128) {
                score -= 30; // 极小文件，疑似样本/损坏
            } else {
                score += Math.min(sizeKB / 100.0, 40.0);
            }
        } catch (IOException e) {
            // 忽略大小获取失败
        }

        // 文件名噪声惩罚
        if (name.contains("sample") || name.contains("preview") || name.contains("demo")) {
            score -= 20;
        }
        if (name.matches(".*\\b(live|remix|karaoke)\\b.*")) {
            // 某些版本可能不是首选，略微扣分（具体偏好可根据需要调整）
            score -= 5;
        }

        // TODO：如有需要，可从音频头中读取比特率，进一步加权

        return score;
    }

    /**
     * 将重复文件移动/复制到回收站，并更新统计与进度
     *
     * @return 实际成功移动/复制的文件数量
     */
    private int handleDuplicates(List<Path> duplicates,
                                 Path keep,
                                 Path trashPath,
                                 String mode,
                                 String taskId,
                                 int total,
                                 AtomicInteger scanned,
                                 AtomicInteger duplicateGroups,
                                 AtomicInteger failed) {
        int movedCount = 0;
        for (Path dup : duplicates) {
            try {
                Path target = resolveTargetFile(trashPath, dup.getFileName().toString());
                if ("move".equalsIgnoreCase(mode)) {
                    Files.move(dup, target, StandardCopyOption.REPLACE_EXISTING);
                } else {
                    Files.copy(dup, target, StandardCopyOption.REPLACE_EXISTING);
                }
                movedCount++;
                sendProgress(taskId, total, scanned.get(),
                        duplicateGroups.get(), movedCount,
                        String.format("重复文件: %s (保留: %s)",
                                dup.getFileName(), keep.getFileName()),
                        false);
            } catch (Exception e) {
                failed.incrementAndGet();
                log.warn("处理重复文件失败: {}", dup, e);
            }
        }
        return movedCount;
    }

    /**
     * 解析回收站中的目标文件名，处理重名冲突
     */
    private Path resolveTargetFile(Path targetDir, String fileName) throws IOException {
        Path target = targetDir.resolve(fileName);
        if (!Files.exists(target)) {
            return target;
        }

        int lastDot = fileName.lastIndexOf('.');
        String base = lastDot > 0 ? fileName.substring(0, lastDot) : fileName;
        String ext = lastDot > 0 ? fileName.substring(lastDot) : "";
        int n = 1;
        while (Files.exists(target)) {
            String next = base + " (" + n + ")" + ext;
            target = targetDir.resolve(next);
            n++;
        }
        return target;
    }

    /**
     * 发送进度消息
     *
     * 字段语义（供前端展示用）：
     * - total：扫描到的音频文件总数
     * - processed：已扫描文件数
     * - success：重复组数量
     * - failed：移动/复制的重复文件数量
     *
     * 由于进度字段在不同任务中的含义略有差异，前端可根据 type === "dedup" 做专门映射。
     */
    private void sendProgress(String taskId,
                             int total,
                             int processed,
                             int success,
                             int failed,
                             String message,
                             boolean completed) {
        ProgressMessage pm = new ProgressMessage();
        pm.setTaskId(taskId);
        pm.setType("dedup");
        pm.setTotal(total);
        pm.setProcessed(processed);
        pm.setSuccess(success);
        pm.setFailed(failed);
        pm.setCurrentFile(null);
        pm.setMessage(message);
        pm.setCompleted(completed);

        progressStore.put(pm);
        messagingTemplate.convertAndSend("/topic/progress/" + taskId, pm);
    }
}