455 lines
16 KiB
Java
455 lines
16 KiB
Java
package com.music.service;
|
||
|
||
import com.music.dto.ProgressMessage;
|
||
import org.jaudiotagger.audio.AudioFile;
|
||
import org.jaudiotagger.audio.AudioFileIO;
|
||
import org.jaudiotagger.tag.FieldKey;
|
||
import org.jaudiotagger.tag.Tag;
|
||
import org.slf4j.Logger;
|
||
import org.slf4j.LoggerFactory;
|
||
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
||
import org.springframework.scheduling.annotation.Async;
|
||
import org.springframework.stereotype.Service;
|
||
|
||
import java.io.IOException;
|
||
import java.io.InputStream;
|
||
import java.nio.file.*;
|
||
import java.nio.file.attribute.BasicFileAttributes;
|
||
import java.security.DigestInputStream;
|
||
import java.security.MessageDigest;
|
||
import java.security.NoSuchAlgorithmException;
|
||
import java.util.*;
|
||
import java.util.concurrent.atomic.AtomicInteger;
|
||
|
||
/**
|
||
* 音乐去重服务(首版:仅实现 MD5 去重)
|
||
*
|
||
* 说明:
|
||
* - 目前实现的是基于 MD5 的二进制级别去重,用于识别完全相同的文件拷贝。
|
||
* - 元数据匹配与智能评分策略后续迭代中补充。
|
||
*/
|
||
@Service
|
||
public class DedupService {
|
||
|
||
private static final Logger log = LoggerFactory.getLogger(DedupService.class);
|
||
|
||
private static final Set<String> AUDIO_EXTENSIONS = new HashSet<>(Arrays.asList(
|
||
"mp3", "flac", "wav", "m4a", "aac", "ogg", "wma", "ape", "aiff", "aif", "wv", "tta", "opus"
|
||
));
|
||
|
||
/** 元数据匹配允许的时长误差(秒) */
|
||
private static final int DURATION_TOLERANCE_SECONDS = 5;
|
||
|
||
private final SimpMessagingTemplate messagingTemplate;
|
||
private final ProgressStore progressStore;
|
||
|
||
public DedupService(SimpMessagingTemplate messagingTemplate, ProgressStore progressStore) {
|
||
this.messagingTemplate = messagingTemplate;
|
||
this.progressStore = progressStore;
|
||
}
|
||
|
||
/**
|
||
* 异步执行去重任务
|
||
*/
|
||
@Async
|
||
public void dedup(String taskId,
|
||
String libraryDir,
|
||
String trashDir,
|
||
boolean useMd5,
|
||
boolean useMetadata,
|
||
String mode) {
|
||
|
||
Path libraryPath = Paths.get(libraryDir);
|
||
Path trashPath = Paths.get(trashDir);
|
||
|
||
try {
|
||
// 基本校验
|
||
if (!Files.exists(libraryPath) || !Files.isDirectory(libraryPath)) {
|
||
sendProgress(taskId, 0, 0, 0, 0,
|
||
"音乐库目录不存在或不是目录", true);
|
||
return;
|
||
}
|
||
|
||
if (!Files.exists(trashPath)) {
|
||
Files.createDirectories(trashPath);
|
||
}
|
||
|
||
if (!"copy".equalsIgnoreCase(mode) && !"move".equalsIgnoreCase(mode)) {
|
||
sendProgress(taskId, 0, 0, 0, 0,
|
||
"执行模式错误,必须是 copy 或 move", true);
|
||
return;
|
||
}
|
||
|
||
if (!useMd5 && !useMetadata) {
|
||
sendProgress(taskId, 0, 0, 0, 0,
|
||
"至少需要启用一种去重策略(MD5 或元数据匹配)", true);
|
||
return;
|
||
}
|
||
|
||
// 收集所有音频文件
|
||
List<Path> audioFiles = new ArrayList<>();
|
||
Files.walkFileTree(libraryPath, new SimpleFileVisitor<Path>() {
|
||
@Override
|
||
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
|
||
if (isAudioFile(file)) {
|
||
audioFiles.add(file);
|
||
}
|
||
return FileVisitResult.CONTINUE;
|
||
}
|
||
});
|
||
|
||
int total = audioFiles.size();
|
||
if (total == 0) {
|
||
sendProgress(taskId, 0, 0, 0, 0,
|
||
"未在音乐库中找到音频文件", true);
|
||
return;
|
||
}
|
||
|
||
AtomicInteger scanned = new AtomicInteger(0);
|
||
AtomicInteger duplicateGroups = new AtomicInteger(0);
|
||
AtomicInteger moved = new AtomicInteger(0);
|
||
AtomicInteger failed = new AtomicInteger(0);
|
||
|
||
sendProgress(taskId, total, 0, 0, 0,
|
||
"开始扫描音乐库...", false);
|
||
|
||
Map<String, List<Path>> md5Groups = new HashMap<>();
|
||
Map<MetadataKey, List<Path>> metadataGroups = new HashMap<>();
|
||
|
||
// 第一阶段:扫描并根据配置构建分组
|
||
for (Path file : audioFiles) {
|
||
try {
|
||
if (useMd5) {
|
||
String md5 = calculateMd5(file);
|
||
md5Groups.computeIfAbsent(md5, k -> new ArrayList<>()).add(file);
|
||
}
|
||
|
||
if (useMetadata) {
|
||
Optional<MetadataKey> keyOpt = readMetadataKey(file);
|
||
keyOpt.ifPresent(key -> metadataGroups
|
||
.computeIfAbsent(key, k -> new ArrayList<>())
|
||
.add(file));
|
||
}
|
||
|
||
int currentScanned = scanned.incrementAndGet();
|
||
if (currentScanned % 50 == 0) {
|
||
sendProgress(taskId, total, currentScanned,
|
||
duplicateGroups.get(), moved.get(),
|
||
String.format("扫描中(%d/%d)", currentScanned, total),
|
||
false);
|
||
}
|
||
} catch (Exception e) {
|
||
failed.incrementAndGet();
|
||
log.warn("扫描文件失败: {}", file, e);
|
||
}
|
||
}
|
||
|
||
// 第二阶段:处理 MD5 去重结果(完全二进制重复)
|
||
if (useMd5) {
|
||
for (Map.Entry<String, List<Path>> entry : md5Groups.entrySet()) {
|
||
List<Path> group = entry.getValue();
|
||
if (group.size() <= 1) {
|
||
continue;
|
||
}
|
||
|
||
duplicateGroups.incrementAndGet();
|
||
|
||
Path keep = chooseBestFileByScore(group);
|
||
List<Path> duplicates = new ArrayList<>(group);
|
||
duplicates.remove(keep);
|
||
|
||
moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
|
||
scanned, duplicateGroups, failed));
|
||
}
|
||
}
|
||
|
||
if (useMetadata) {
|
||
// 第三阶段:处理元数据匹配去重结果
|
||
for (Map.Entry<MetadataKey, List<Path>> entry : metadataGroups.entrySet()) {
|
||
List<Path> group = entry.getValue();
|
||
if (group.size() <= 1) {
|
||
continue;
|
||
}
|
||
|
||
duplicateGroups.incrementAndGet();
|
||
|
||
Path keep = chooseBestFileByScore(group);
|
||
List<Path> duplicates = new ArrayList<>(group);
|
||
duplicates.remove(keep);
|
||
|
||
moved.addAndGet(handleDuplicates(duplicates, keep, trashPath, mode, taskId, total,
|
||
scanned, duplicateGroups, failed));
|
||
}
|
||
}
|
||
|
||
sendProgress(taskId, total, scanned.get(),
|
||
duplicateGroups.get(), moved.get(),
|
||
String.format("任务完成!扫描文件: %d, 重复组: %d, 移动/复制文件: %d",
|
||
scanned.get(), duplicateGroups.get(), moved.get()),
|
||
true);
|
||
|
||
} catch (Exception e) {
|
||
log.error("去重任务执行失败", e);
|
||
sendProgress(taskId, 0, 0, 0, 0,
|
||
"任务执行失败: " + e.getMessage(), true);
|
||
}
|
||
}
|
||
|
||
private boolean isAudioFile(Path file) {
|
||
String name = file.getFileName().toString().toLowerCase();
|
||
int idx = name.lastIndexOf('.');
|
||
if (idx <= 0 || idx == name.length() - 1) {
|
||
return false;
|
||
}
|
||
String ext = name.substring(idx + 1);
|
||
return AUDIO_EXTENSIONS.contains(ext);
|
||
}
|
||
|
||
private String calculateMd5(Path file) throws IOException, NoSuchAlgorithmException {
|
||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||
try (InputStream is = Files.newInputStream(file);
|
||
DigestInputStream dis = new DigestInputStream(is, md)) {
|
||
byte[] buffer = new byte[8192];
|
||
// 读取整个文件,结果自动更新到 md 中
|
||
while (dis.read(buffer) != -1) {
|
||
// no-op
|
||
}
|
||
}
|
||
byte[] digest = md.digest();
|
||
StringBuilder sb = new StringBuilder(digest.length * 2);
|
||
for (byte b : digest) {
|
||
sb.append(String.format("%02x", b));
|
||
}
|
||
return sb.toString();
|
||
}
|
||
|
||
/**
|
||
* 元数据分组键:艺术家 + 标题 + 专辑 + 时长(按 5 秒误差归一)
|
||
*/
|
||
private static class MetadataKey {
|
||
private final String artist;
|
||
private final String title;
|
||
private final String album;
|
||
private final int normalizedDuration;
|
||
|
||
private MetadataKey(String artist, String title, String album, int normalizedDuration) {
|
||
this.artist = artist;
|
||
this.title = title;
|
||
this.album = album;
|
||
this.normalizedDuration = normalizedDuration;
|
||
}
|
||
|
||
@Override
|
||
public boolean equals(Object o) {
|
||
if (this == o) return true;
|
||
if (!(o instanceof MetadataKey)) return false;
|
||
MetadataKey that = (MetadataKey) o;
|
||
return normalizedDuration == that.normalizedDuration &&
|
||
Objects.equals(artist, that.artist) &&
|
||
Objects.equals(title, that.title) &&
|
||
Objects.equals(album, that.album);
|
||
}
|
||
|
||
@Override
|
||
public int hashCode() {
|
||
return Objects.hash(artist, title, album, normalizedDuration);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 从音频文件读取用于匹配的元数据键
|
||
*/
|
||
private Optional<MetadataKey> readMetadataKey(Path file) {
|
||
try {
|
||
AudioFile audioFile = AudioFileIO.read(file.toFile());
|
||
Tag tag = audioFile.getTag();
|
||
if (tag == null) {
|
||
return Optional.empty();
|
||
}
|
||
|
||
String artist = normalize(tag.getFirst(FieldKey.ARTIST));
|
||
String title = normalize(tag.getFirst(FieldKey.TITLE));
|
||
String album = normalize(tag.getFirst(FieldKey.ALBUM));
|
||
int lengthSec = audioFile.getAudioHeader().getTrackLength();
|
||
|
||
if (artist.isEmpty() || title.isEmpty()) {
|
||
// 核心标签缺失则跳过元数据分组
|
||
return Optional.empty();
|
||
}
|
||
|
||
// 将时长按 5 秒误差容忍度归一化
|
||
int normalizedDuration = lengthSec / DURATION_TOLERANCE_SECONDS;
|
||
|
||
return Optional.of(new MetadataKey(artist, title, album, normalizedDuration));
|
||
} catch (Exception e) {
|
||
// 标签损坏或不支持的格式时,忽略元数据去重
|
||
log.debug("读取元数据失败: {}", file, e);
|
||
return Optional.empty();
|
||
}
|
||
}
|
||
|
||
private String normalize(String s) {
|
||
if (s == null) {
|
||
return "";
|
||
}
|
||
return s.trim().toLowerCase();
|
||
}
|
||
|
||
/**
|
||
* 对一组候选文件进行综合评分,选择最佳保留文件
|
||
*
|
||
* 评分策略:
|
||
* - 格式优先:FLAC > 其他无损 > 有损
|
||
* - 码率优先:高码率得分更高(如果可获取)
|
||
* - 文件大小:极小文件减分
|
||
* - 文件名噪声惩罚:含样本/preview 等噪声词减分
|
||
*/
|
||
private Path chooseBestFileByScore(List<Path> candidates) {
|
||
if (candidates.size() == 1) {
|
||
return candidates.get(0);
|
||
}
|
||
|
||
return candidates.stream()
|
||
.max(Comparator.comparingDouble(this::scoreFile))
|
||
.orElse(candidates.get(0));
|
||
}
|
||
|
||
private double scoreFile(Path file) {
|
||
double score = 0.0;
|
||
|
||
String name = file.getFileName().toString().toLowerCase();
|
||
String ext = "";
|
||
int idx = name.lastIndexOf('.');
|
||
if (idx > 0 && idx < name.length() - 1) {
|
||
ext = name.substring(idx + 1);
|
||
}
|
||
|
||
// 格式权重
|
||
if ("flac".equals(ext)) {
|
||
score += 100;
|
||
} else if (Arrays.asList("wav", "ape", "aiff", "aif", "wv", "tta").contains(ext)) {
|
||
score += 80;
|
||
} else {
|
||
score += 50; // 有损格式
|
||
}
|
||
|
||
// 文件大小(KB)加权:更大的通常音质更好,但极大文件不再线性加分
|
||
try {
|
||
long size = Files.size(file);
|
||
double sizeKB = size / 1024.0;
|
||
if (sizeKB < 128) {
|
||
score -= 30; // 极小文件,疑似样本/损坏
|
||
} else {
|
||
score += Math.min(sizeKB / 100.0, 40.0);
|
||
}
|
||
} catch (IOException e) {
|
||
// 忽略大小获取失败
|
||
}
|
||
|
||
// 文件名噪声惩罚
|
||
if (name.contains("sample") || name.contains("preview") || name.contains("demo")) {
|
||
score -= 20;
|
||
}
|
||
if (name.matches(".*\\b(live|remix|karaoke)\\b.*")) {
|
||
// 某些版本可能不是首选,略微扣分(具体偏好可根据需要调整)
|
||
score -= 5;
|
||
}
|
||
|
||
// TODO:如有需要,可从音频头中读取比特率,进一步加权
|
||
|
||
return score;
|
||
}
|
||
|
||
/**
|
||
* 将重复文件移动/复制到回收站,并更新统计与进度
|
||
*
|
||
* @return 实际成功移动/复制的文件数量
|
||
*/
|
||
private int handleDuplicates(List<Path> duplicates,
|
||
Path keep,
|
||
Path trashPath,
|
||
String mode,
|
||
String taskId,
|
||
int total,
|
||
AtomicInteger scanned,
|
||
AtomicInteger duplicateGroups,
|
||
AtomicInteger failed) {
|
||
int movedCount = 0;
|
||
for (Path dup : duplicates) {
|
||
try {
|
||
Path target = resolveTargetFile(trashPath, dup.getFileName().toString());
|
||
if ("move".equalsIgnoreCase(mode)) {
|
||
Files.move(dup, target, StandardCopyOption.REPLACE_EXISTING);
|
||
} else {
|
||
Files.copy(dup, target, StandardCopyOption.REPLACE_EXISTING);
|
||
}
|
||
movedCount++;
|
||
sendProgress(taskId, total, scanned.get(),
|
||
duplicateGroups.get(), movedCount,
|
||
String.format("重复文件: %s (保留: %s)",
|
||
dup.getFileName(), keep.getFileName()),
|
||
false);
|
||
} catch (Exception e) {
|
||
failed.incrementAndGet();
|
||
log.warn("处理重复文件失败: {}", dup, e);
|
||
}
|
||
}
|
||
return movedCount;
|
||
}
|
||
|
||
/**
|
||
* 解析回收站中的目标文件名,处理重名冲突
|
||
*/
|
||
private Path resolveTargetFile(Path targetDir, String fileName) throws IOException {
|
||
Path target = targetDir.resolve(fileName);
|
||
if (!Files.exists(target)) {
|
||
return target;
|
||
}
|
||
|
||
int lastDot = fileName.lastIndexOf('.');
|
||
String base = lastDot > 0 ? fileName.substring(0, lastDot) : fileName;
|
||
String ext = lastDot > 0 ? fileName.substring(lastDot) : "";
|
||
int n = 1;
|
||
while (Files.exists(target)) {
|
||
String next = base + " (" + n + ")" + ext;
|
||
target = targetDir.resolve(next);
|
||
n++;
|
||
}
|
||
return target;
|
||
}
|
||
|
||
/**
|
||
* 发送进度消息
|
||
*
|
||
* 字段语义(供前端展示用):
|
||
* - total:扫描到的音频文件总数
|
||
* - processed:已扫描文件数
|
||
* - success:重复组数量
|
||
* - failed:移动/复制的重复文件数量
|
||
*
|
||
* 由于进度字段在不同任务中的含义略有差异,前端可根据 type === "dedup" 做专门映射。
|
||
*/
|
||
private void sendProgress(String taskId,
|
||
int total,
|
||
int processed,
|
||
int success,
|
||
int failed,
|
||
String message,
|
||
boolean completed) {
|
||
ProgressMessage pm = new ProgressMessage();
|
||
pm.setTaskId(taskId);
|
||
pm.setType("dedup");
|
||
pm.setTotal(total);
|
||
pm.setProcessed(processed);
|
||
pm.setSuccess(success);
|
||
pm.setFailed(failed);
|
||
pm.setCurrentFile(null);
|
||
pm.setMessage(message);
|
||
pm.setCompleted(completed);
|
||
|
||
progressStore.put(pm);
|
||
messagingTemplate.convertAndSend("/topic/progress/" + taskId, pm);
|
||
}
|
||
}
|
||
|