Files
MusicWorkshop/backend/tests/test_library_postprocess.py
T
2026-04-30 14:34:28 +08:00

465 lines
17 KiB
Python

import shutil
import tempfile
import unittest
from pathlib import Path
from backend.app.library_postprocess import DedupeRunner, OrganizeRunner
from backend.app.task_constants import create_empty_task_stats
from backend.app.task_store import TaskStore
from backend.app.task_stream import TaskStreamManager
class DedupeRunnerTests(unittest.TestCase):
def setUp(self):
self.root = Path(tempfile.mkdtemp())
self.input_dir = self.root / 'input'
self.output_dir = self.root / 'output'
self.trash_dir = self.root / 'trash'
self.input_dir.mkdir()
self.output_dir.mkdir()
self.trash_dir.mkdir()
self.task_store = TaskStore(self.root / 'music_workshop.db')
self.runner = DedupeRunner(self.task_store, _NoopPreprocessor(), TaskStreamManager())
self.runner._safe_probe_audio = lambda file_path: self.library_audio_props.get(file_path, {})
self.runner._safe_read_library_tags = lambda file_path: self.library_tags.get(file_path, {})
self.library_audio_props = {}
self.library_tags = {}
def test_trashes_lower_quality_batch_duplicate(self):
task = self._create_task()
first_path = self._write_source('Artist/Album/01.flac')
second_path = self._write_source('Artist/Album/01-copy.flac')
first_item = self._insert_matched_item(
task['task_id'],
first_path,
recording_id='recording-1',
confidence=88.0,
audio_props={'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 900000, 'channels': 2, 'duration_seconds': 201}
)
second_item = self._insert_matched_item(
task['task_id'],
second_path,
recording_id='recording-1',
confidence=95.0,
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1500000, 'channels': 2, 'duration_seconds': 201}
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
first_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
second_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][1]
self.assertEqual(first_item['dedupe_status'], 'duplicate_trashed')
self.assertFalse(first_item['is_active'])
self.assertEqual(first_item['duplicate_of_item_id'], second_item['id'])
self.assertTrue(Path(first_item['trash_file_path']).exists())
self.assertEqual(second_item['dedupe_status'], 'unique')
self.assertEqual(stats['dedupe']['batch_duplicates'], 1)
self.assertEqual(stats['dedupe']['kept_items'], 1)
log_types = {
log['event_type']
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
}
self.assertIn('dedupe.lookup_started', log_types)
self.assertIn('dedupe.item_duplicate', log_types)
self.assertIn('dedupe.item_unique', log_types)
def test_keeps_existing_library_file_by_default(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1',
'musicbrainzalbumid': 'release-1',
'date': '2024-01-01'
}
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'duplicate_trashed')
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
self.assertTrue(Path(item['trash_file_path']).exists())
self.assertEqual(stats['dedupe']['library_duplicates'], 1)
self.assertEqual(stats['dedupe']['replaced_library_items'], 0)
def test_replaces_lower_quality_library_file_when_enabled(self):
task = self._create_task(replace=True)
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'MP3',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 128000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1',
'musicbrainzalbumid': 'release-1',
'date': '2024-01-01'
}
item = self._insert_matched_item(
task['task_id'],
source_path,
recording_id='recording-1',
confidence=96.0,
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1600000, 'channels': 2, 'duration_seconds': 201}
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config(replace=True))
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'duplicate_replaced')
self.assertTrue(Path(item['current_file_path']).exists())
self.assertFalse(library_path.exists())
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
self.assertEqual(stats['dedupe']['replaced_library_items'], 1)
self.assertEqual(stats['dedupe']['kept_items'], 1)
def test_version_mismatch_does_not_dedupe_on_text_key(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Singles/2024 - Song/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'albumartist': 'Artist',
'date': '2024-01-01'
}
item = self._insert_matched_item(
task['task_id'],
source_path,
recording_id=None,
release_id=None,
title='Song (Live)',
duration_seconds=201
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'unique')
self.assertEqual(stats['dedupe']['library_duplicates'], 0)
def test_marks_item_failed_when_duplicate_source_file_is_missing(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1'
}
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
Path(source_path).unlink()
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'failed')
self.assertEqual(item['dedupe_reason'], 'source_missing')
self.assertEqual(stats['dedupe']['failed_items'], 1)
def _create_task(self, replace: bool = False) -> dict:
return self.task_store.create_task_if_idle(self._config(replace=replace))
def _config(self, replace: bool = False) -> dict:
return {
'input': str(self.input_dir),
'output': str(self.output_dir),
'trash': str(self.trash_dir),
'advancedStrategy': {
'replaceLowQualityDuplicates': replace
}
}
def _write_source(self, relative_path: str) -> str:
path = self.input_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'audio')
return str(path.resolve(strict=False))
def _write_library(self, relative_path: str) -> Path:
path = self.output_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'library-audio')
return path
def _insert_matched_item(
self,
task_id: str,
source_path: str,
*,
recording_id: str | None = 'recording-1',
release_id: str | None = 'release-1',
title: str = 'Song',
duration_seconds: int = 201,
confidence: float = 92.0,
audio_props: dict | None = None
) -> dict:
path = Path(source_path)
return self.task_store.insert_task_item(
task_id,
original_path=source_path,
current_file_path=source_path,
relative_path=path.relative_to(self.input_dir).as_posix(),
filename=path.name,
extension=path.suffix.lower(),
size_bytes=path.stat().st_size,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_reason='authoritative_auto_match',
match_message='matched',
match_source='musicbrainz',
match_confidence=confidence,
match_is_authoritative=1,
audio_props_json=audio_props or {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': duration_seconds
},
matched_metadata_json={
'title': title,
'artist': 'Artist',
'artists': ['Artist'],
'album': 'Album',
'album_artist': 'Artist',
'track_number': 1,
'disc_number': 1,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': duration_seconds,
'recording_id': recording_id,
'release_id': release_id,
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': recording_id} if recording_id else {}
}
)
class OrganizeRunnerTests(unittest.TestCase):
def setUp(self):
self.root = Path(tempfile.mkdtemp())
self.input_dir = self.root / 'input'
self.output_dir = self.root / 'output'
self.trash_dir = self.root / 'trash'
self.input_dir.mkdir()
self.output_dir.mkdir()
self.trash_dir.mkdir()
self.task_store = TaskStore(self.root / 'music_workshop.db')
self.runner = OrganizeRunner(self.task_store, TaskStreamManager())
def test_builds_single_disc_album_path(self):
task = self._create_task()
item = self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['organize_status'], 'organized')
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song.flac')
self.assertTrue(Path(item['library_file_path']).exists())
log_types = {
log['event_type']
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
}
self.assertIn('organize.path_planned', log_types)
self.assertIn('organize.item_organized', log_types)
def test_places_multi_disc_release_under_disc_folder(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac', disc_number=2, track_number=7)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/Disc 2/07 - Song.flac')
def test_places_missing_album_track_under_singles(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album=None, title='Loose Song', year=2023)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Singles/2023 - Loose Song/01 - Loose Song.flac')
def test_places_non_ascii_album_artist_under_hash_bucket(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album_artist='周杰伦')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertTrue(item['library_relative_path'].startswith('#/周杰伦/Album/'))
def test_resolves_target_collisions_with_suffix(self):
task = self._create_task()
target = self.output_dir / 'A' / 'Artist' / 'Album' / '01 - Song.flac'
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(b'existing')
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song (2).flac')
self.assertEqual(stats['organize']['collision_resolved'], 1)
def test_moves_failed_item_to_organize_trash(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
output_root = self.output_dir.resolve(strict=False)
original_move = self.runner._move_file
def failing_move(source: Path, destination: Path):
if output_root in destination.resolve(strict=False).parents:
raise OSError('blocked')
return original_move(source, destination)
self.runner._move_file = failing_move
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['organize_status'], 'trashed')
self.assertTrue(Path(item['trash_file_path']).exists())
self.assertEqual(stats['organize']['failed_items'], 1)
self.assertEqual(stats['organize']['trashed_items'], 1)
def _create_task(self) -> dict:
return self.task_store.create_task_if_idle(self._config())
def _config(self) -> dict:
return {
'input': str(self.input_dir),
'output': str(self.output_dir),
'trash': str(self.trash_dir)
}
def _insert_organize_item(
self,
task_id: str,
relative_path: str,
*,
title: str = 'Song',
album: str | None = 'Album',
album_artist: str = 'Artist',
track_number: int = 1,
disc_number: int = 1,
year: int = 2024
) -> dict:
path = self.input_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'audio')
return self.task_store.insert_task_item(
task_id,
original_path=str(path.resolve(strict=False)),
current_file_path=str(path.resolve(strict=False)),
relative_path=relative_path,
filename=path.name,
extension=path.suffix.lower(),
size_bytes=path.stat().st_size,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_reason='authoritative_auto_match',
match_message='matched',
dedupe_status='unique',
organize_status='pending',
matched_metadata_json={
'title': title,
'artist': album_artist,
'artists': [album_artist],
'album': album,
'album_artist': album_artist,
'track_number': track_number,
'disc_number': disc_number,
'release_date': f'{year}-01-01',
'year': year,
'duration_seconds': 201,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
}
)
class _NoopPreprocessor:
def probe_audio(self, _file_path: str) -> dict:
return {}
if __name__ == '__main__':
unittest.main()