提交代码

This commit is contained in:
liu
2026-01-29 18:26:02 +08:00
parent 981b4ecf42
commit 7531b6c466
47 changed files with 7257 additions and 16 deletions

View File

@@ -0,0 +1,454 @@
package com.music.service;
import com.music.dto.ProgressMessage;
import org.jaudiotagger.audio.AudioFile;
import org.jaudiotagger.audio.AudioFileIO;
import org.jaudiotagger.tag.FieldKey;
import org.jaudiotagger.tag.Tag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.messaging.simp.SimpMessagingTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
/**
* 音乐去重服务(首版:仅实现 MD5 去重)
*
* 说明:
* - 目前实现的是基于 MD5 的二进制级别去重,用于识别完全相同的文件拷贝。
* - 元数据匹配与智能评分策略后续迭代中补充。
*/
@Service
public class DedupService {
private static final Logger log = LoggerFactory.getLogger(DedupService.class);
private static final Set<String> AUDIO_EXTENSIONS = new HashSet<>(Arrays.asList(
"mp3", "flac", "wav", "m4a", "aac", "ogg", "wma", "ape", "aiff", "aif", "wv", "tta", "opus"
));
/** 元数据匹配允许的时长误差(秒) */
private static final int DURATION_TOLERANCE_SECONDS = 5;
private final SimpMessagingTemplate messagingTemplate;
private final ProgressStore progressStore;
public DedupService(SimpMessagingTemplate messagingTemplate, ProgressStore progressStore) {
this.messagingTemplate = messagingTemplate;
this.progressStore = progressStore;
}
/**
* 异步执行去重任务
*/
@Async
public void dedup(String taskId,
String libraryDir,
String trashDir,
boolean useMd5,
boolean useMetadata,
String mode) {
Path libraryPath = Paths.get(libraryDir);
Path trashPath = Paths.get(trashDir);
try {
// 基本校验
if (!Files.exists(libraryPath) || !Files.isDirectory(libraryPath)) {
sendProgress(taskId, 0, 0, 0, 0,
"音乐库目录不存在或不是目录", true);
return;
}
if (!Files.exists(trashPath)) {
Files.createDirectories(trashPath);
}
if (!"copy".equalsIgnoreCase(mode) && !"move".equalsIgnoreCase(mode)) {
sendProgress(taskId, 0, 0, 0, 0,
"执行模式错误,必须是 copy 或 move", true);
return;
}
if (!useMd5 && !useMetadata) {
sendProgress(taskId, 0, 0, 0, 0,
"至少需要启用一种去重策略MD5 或元数据匹配)", true);
return;
}
// 收集所有音频文件
List<Path> audioFiles = new ArrayList<>();
Files.walkFileTree(libraryPath, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
if (isAudioFile(file)) {
audioFiles.add(file);
}
return FileVisitResult.CONTINUE;
}
});
int total = audioFiles.size();
if (total == 0) {
sendProgress(taskId, 0, 0, 0, 0,
"未在音乐库中找到音频文件", true);
return;
}
AtomicInteger scanned = new AtomicInteger(0);
AtomicInteger duplicateGroups = new AtomicInteger(0);
AtomicInteger moved = new AtomicInteger(0);
AtomicInteger failed = new AtomicInteger(0);
sendProgress(taskId, total, 0, 0, 0,
"开始扫描音乐库...", false);
Map<String, List<Path>> md5Groups = new HashMap<>();
Map<MetadataKey, List<Path>> metadataGroups = new HashMap<>();
// 第一阶段:扫描并根据配置构建分组
for (Path file : audioFiles) {
try {
if (useMd5) {
String md5 = calculateMd5(file);
md5Groups.computeIfAbsent(md5, k -> new ArrayList<>()).add(file);
}
if (useMetadata) {
Optional<MetadataKey> keyOpt = readMetadataKey(file);
keyOpt.ifPresent(key -> metadataGroups
.computeIfAbsent(key, k -> new ArrayList<>())
.add(file));
}
int currentScanned = scanned.incrementAndGet();
if (currentScanned % 50 == 0) {
sendProgress(taskId, total, currentScanned,
duplicateGroups.get(), moved.get(),
String.format("扫描中(%d/%d", currentScanned, total),
false);
}
} catch (Exception e) {
failed.incrementAndGet();
log.warn("扫描文件失败: {}", file, e);
}
}
// 第二阶段:处理 MD5 去重结果(完全二进制重复)
if (useMd5) {
for (Map.Entry<String, List<Path>> entry : md5Groups.entrySet()) {
List<Path> group = entry.getValue();
if (group.size() <= 1) {
continue;
}
duplicateGroups.incrementAndGet();
Path keep = chooseBestFileByScore(group);
List<Path> duplicates = new ArrayList<>(group);
duplicates.remove(keep);
moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
scanned, duplicateGroups, failed));
}
}
if (useMetadata) {
// 第三阶段:处理元数据匹配去重结果
for (Map.Entry<MetadataKey, List<Path>> entry : metadataGroups.entrySet()) {
List<Path> group = entry.getValue();
if (group.size() <= 1) {
continue;
}
duplicateGroups.incrementAndGet();
Path keep = chooseBestFileByScore(group);
List<Path> duplicates = new ArrayList<>(group);
duplicates.remove(keep);
moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
scanned, duplicateGroups, failed));
}
}
sendProgress(taskId, total, scanned.get(),
duplicateGroups.get(), moved.get(),
String.format("任务完成!扫描文件: %d, 重复组: %d, 移动/复制文件: %d",
scanned.get(), duplicateGroups.get(), moved.get()),
true);
} catch (Exception e) {
log.error("去重任务执行失败", e);
sendProgress(taskId, 0, 0, 0, 0,
"任务执行失败: " + e.getMessage(), true);
}
}
private boolean isAudioFile(Path file) {
String name = file.getFileName().toString().toLowerCase();
int idx = name.lastIndexOf('.');
if (idx <= 0 || idx == name.length() - 1) {
return false;
}
String ext = name.substring(idx + 1);
return AUDIO_EXTENSIONS.contains(ext);
}
private String calculateMd5(Path file) throws IOException, NoSuchAlgorithmException {
MessageDigest md = MessageDigest.getInstance("MD5");
try (InputStream is = Files.newInputStream(file);
DigestInputStream dis = new DigestInputStream(is, md)) {
byte[] buffer = new byte[8192];
// 读取整个文件,结果自动更新到 md 中
while (dis.read(buffer) != -1) {
// no-op
}
}
byte[] digest = md.digest();
StringBuilder sb = new StringBuilder(digest.length * 2);
for (byte b : digest) {
sb.append(String.format("%02x", b));
}
return sb.toString();
}
/**
* 元数据分组键:艺术家 + 标题 + 专辑 + 时长(按 5 秒误差归一)
*/
private static class MetadataKey {
private final String artist;
private final String title;
private final String album;
private final int normalizedDuration;
private MetadataKey(String artist, String title, String album, int normalizedDuration) {
this.artist = artist;
this.title = title;
this.album = album;
this.normalizedDuration = normalizedDuration;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MetadataKey)) return false;
MetadataKey that = (MetadataKey) o;
return normalizedDuration == that.normalizedDuration &&
Objects.equals(artist, that.artist) &&
Objects.equals(title, that.title) &&
Objects.equals(album, that.album);
}
@Override
public int hashCode() {
return Objects.hash(artist, title, album, normalizedDuration);
}
}
/**
* 从音频文件读取用于匹配的元数据键
*/
private Optional<MetadataKey> readMetadataKey(Path file) {
try {
AudioFile audioFile = AudioFileIO.read(file.toFile());
Tag tag = audioFile.getTag();
if (tag == null) {
return Optional.empty();
}
String artist = normalize(tag.getFirst(FieldKey.ARTIST));
String title = normalize(tag.getFirst(FieldKey.TITLE));
String album = normalize(tag.getFirst(FieldKey.ALBUM));
int lengthSec = audioFile.getAudioHeader().getTrackLength();
if (artist.isEmpty() || title.isEmpty()) {
// 核心标签缺失则跳过元数据分组
return Optional.empty();
}
// 将时长按 5 秒误差容忍度归一化
int normalizedDuration = lengthSec / DURATION_TOLERANCE_SECONDS;
return Optional.of(new MetadataKey(artist, title, album, normalizedDuration));
} catch (Exception e) {
// 标签损坏或不支持的格式时,忽略元数据去重
log.debug("读取元数据失败: {}", file, e);
return Optional.empty();
}
}
private String normalize(String s) {
if (s == null) {
return "";
}
return s.trim().toLowerCase();
}
/**
* 对一组候选文件进行综合评分,选择最佳保留文件
*
* 评分策略:
* - 格式优先FLAC > 其他无损 > 有损
* - 码率优先:高码率得分更高(如果可获取)
* - 文件大小:极小文件减分
* - 文件名噪声惩罚:含样本/preview 等噪声词减分
*/
private Path chooseBestFileByScore(List<Path> candidates) {
if (candidates.size() == 1) {
return candidates.get(0);
}
return candidates.stream()
.max(Comparator.comparingDouble(this::scoreFile))
.orElse(candidates.get(0));
}
private double scoreFile(Path file) {
double score = 0.0;
String name = file.getFileName().toString().toLowerCase();
String ext = "";
int idx = name.lastIndexOf('.');
if (idx > 0 && idx < name.length() - 1) {
ext = name.substring(idx + 1);
}
// 格式权重
if ("flac".equals(ext)) {
score += 100;
} else if (Arrays.asList("wav", "ape", "aiff", "aif", "wv", "tta").contains(ext)) {
score += 80;
} else {
score += 50; // 有损格式
}
// 文件大小KB加权更大的通常音质更好但极大文件不再线性加分
try {
long size = Files.size(file);
double sizeKB = size / 1024.0;
if (sizeKB < 128) {
score -= 30; // 极小文件,疑似样本/损坏
} else {
score += Math.min(sizeKB / 100.0, 40.0);
}
} catch (IOException e) {
// 忽略大小获取失败
}
// 文件名噪声惩罚
if (name.contains("sample") || name.contains("preview") || name.contains("demo")) {
score -= 20;
}
if (name.matches(".*\\b(live|remix|karaoke)\\b.*")) {
// 某些版本可能不是首选,略微扣分(具体偏好可根据需要调整)
score -= 5;
}
// TODO如有需要可从音频头中读取比特率进一步加权
return score;
}
/**
* 将重复文件移动/复制到回收站,并更新统计与进度
*
* @return 实际成功移动/复制的文件数量
*/
private int handleDuplicates(List<Path> duplicates,
Path keep,
Path trashPath,
String mode,
String taskId,
int total,
AtomicInteger scanned,
AtomicInteger duplicateGroups,
AtomicInteger failed) {
int movedCount = 0;
for (Path dup : duplicates) {
try {
Path target = resolveTargetFile(trashPath, dup.getFileName().toString());
if ("move".equalsIgnoreCase(mode)) {
Files.move(dup, target, StandardCopyOption.REPLACE_EXISTING);
} else {
Files.copy(dup, target, StandardCopyOption.REPLACE_EXISTING);
}
movedCount++;
sendProgress(taskId, total, scanned.get(),
duplicateGroups.get(), movedCount,
String.format("重复文件: %s (保留: %s)",
dup.getFileName(), keep.getFileName()),
false);
} catch (Exception e) {
failed.incrementAndGet();
log.warn("处理重复文件失败: {}", dup, e);
}
}
return movedCount;
}
/**
* 解析回收站中的目标文件名,处理重名冲突
*/
private Path resolveTargetFile(Path targetDir, String fileName) throws IOException {
Path target = targetDir.resolve(fileName);
if (!Files.exists(target)) {
return target;
}
int lastDot = fileName.lastIndexOf('.');
String base = lastDot > 0 ? fileName.substring(0, lastDot) : fileName;
String ext = lastDot > 0 ? fileName.substring(lastDot) : "";
int n = 1;
while (Files.exists(target)) {
String next = base + " (" + n + ")" + ext;
target = targetDir.resolve(next);
n++;
}
return target;
}
/**
* 发送进度消息
*
* 字段语义(供前端展示用):
* - total扫描到的音频文件总数
* - processed已扫描文件数
* - success重复组数量
* - failed移动/复制的重复文件数量
*
* 由于进度字段在不同任务中的含义略有差异,前端可根据 type === "dedup" 做专门映射。
*/
private void sendProgress(String taskId,
int total,
int processed,
int success,
int failed,
String message,
boolean completed) {
ProgressMessage pm = new ProgressMessage();
pm.setTaskId(taskId);
pm.setType("dedup");
pm.setTotal(total);
pm.setProcessed(processed);
pm.setSuccess(success);
pm.setFailed(failed);
pm.setCurrentFile(null);
pm.setMessage(message);
pm.setCompleted(completed);
progressStore.put(pm);
messagingTemplate.convertAndSend("/topic/progress/" + taskId, pm);
}
}