import shutil import tempfile import unittest from pathlib import Path from backend.app.library_postprocess import DedupeRunner, OrganizeRunner from backend.app.task_constants import create_empty_task_stats from backend.app.task_store import TaskStore from backend.app.task_stream import TaskStreamManager class DedupeRunnerTests(unittest.TestCase): def setUp(self): self.root = Path(tempfile.mkdtemp()) self.input_dir = self.root / 'input' self.output_dir = self.root / 'output' self.trash_dir = self.root / 'trash' self.input_dir.mkdir() self.output_dir.mkdir() self.trash_dir.mkdir() self.task_store = TaskStore(self.root / 'music_workshop.db') self.runner = DedupeRunner(self.task_store, _NoopPreprocessor(), TaskStreamManager()) self.runner._safe_probe_audio = lambda file_path: self.library_audio_props.get(file_path, {}) self.runner._safe_read_library_tags = lambda file_path: self.library_tags.get(file_path, {}) self.library_audio_props = {} self.library_tags = {} def test_trashes_lower_quality_batch_duplicate(self): task = self._create_task() first_path = self._write_source('Artist/Album/01.flac') second_path = self._write_source('Artist/Album/01-copy.flac') first_item = self._insert_matched_item( task['task_id'], first_path, recording_id='recording-1', confidence=88.0, audio_props={'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 900000, 'channels': 2, 'duration_seconds': 201} ) second_item = self._insert_matched_item( task['task_id'], second_path, recording_id='recording-1', confidence=95.0, audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1500000, 'channels': 2, 'duration_seconds': 201} ) stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) first_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] second_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][1] self.assertEqual(first_item['dedupe_status'], 'duplicate_trashed') self.assertFalse(first_item['is_active']) self.assertEqual(first_item['duplicate_of_item_id'], second_item['id']) self.assertTrue(Path(first_item['trash_file_path']).exists()) self.assertEqual(second_item['dedupe_status'], 'unique') self.assertEqual(stats['dedupe']['batch_duplicates'], 1) self.assertEqual(stats['dedupe']['kept_items'], 1) log_types = { log['event_type'] for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs'] } self.assertIn('dedupe.lookup_started', log_types) self.assertIn('dedupe.item_duplicate', log_types) self.assertIn('dedupe.item_unique', log_types) def test_keeps_existing_library_file_by_default(self): task = self._create_task() source_path = self._write_source('Artist/Album/01.flac') library_path = self._write_library('A/Artist/Album/01 - Song.flac') self.library_audio_props[str(library_path)] = { 'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 700000, 'channels': 2, 'duration_seconds': 201 } self.library_tags[str(library_path)] = { 'title': 'Song', 'artist': 'Artist', 'album': 'Album', 'albumartist': 'Artist', 'tracknumber': '1', 'discnumber': '1', 'musicbrainzrecordingid': 'recording-1', 'musicbrainzalbumid': 'release-1', 'date': '2024-01-01' } item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1') stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['dedupe_status'], 'duplicate_trashed') self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False))) self.assertTrue(Path(item['trash_file_path']).exists()) self.assertEqual(stats['dedupe']['library_duplicates'], 1) self.assertEqual(stats['dedupe']['replaced_library_items'], 0) def test_replaces_lower_quality_library_file_when_enabled(self): task = self._create_task(replace=True) source_path = self._write_source('Artist/Album/01.flac') library_path = self._write_library('A/Artist/Album/01 - Song.flac') self.library_audio_props[str(library_path)] = { 'codec': 'MP3', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 128000, 'channels': 2, 'duration_seconds': 201 } self.library_tags[str(library_path)] = { 'title': 'Song', 'artist': 'Artist', 'album': 'Album', 'albumartist': 'Artist', 'tracknumber': '1', 'discnumber': '1', 'musicbrainzrecordingid': 'recording-1', 'musicbrainzalbumid': 'release-1', 'date': '2024-01-01' } item = self._insert_matched_item( task['task_id'], source_path, recording_id='recording-1', confidence=96.0, audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1600000, 'channels': 2, 'duration_seconds': 201} ) stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config(replace=True)) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['dedupe_status'], 'duplicate_replaced') self.assertTrue(Path(item['current_file_path']).exists()) self.assertFalse(library_path.exists()) self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False))) self.assertEqual(stats['dedupe']['replaced_library_items'], 1) self.assertEqual(stats['dedupe']['kept_items'], 1) def test_version_mismatch_does_not_dedupe_on_text_key(self): task = self._create_task() source_path = self._write_source('Artist/Album/01.flac') library_path = self._write_library('A/Artist/Singles/2024 - Song/01 - Song.flac') self.library_audio_props[str(library_path)] = { 'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 700000, 'channels': 2, 'duration_seconds': 201 } self.library_tags[str(library_path)] = { 'title': 'Song', 'artist': 'Artist', 'albumartist': 'Artist', 'date': '2024-01-01' } item = self._insert_matched_item( task['task_id'], source_path, recording_id=None, release_id=None, title='Song (Live)', duration_seconds=201 ) stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['dedupe_status'], 'unique') self.assertEqual(stats['dedupe']['library_duplicates'], 0) def test_marks_item_failed_when_duplicate_source_file_is_missing(self): task = self._create_task() source_path = self._write_source('Artist/Album/01.flac') library_path = self._write_library('A/Artist/Album/01 - Song.flac') self.library_audio_props[str(library_path)] = { 'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 700000, 'channels': 2, 'duration_seconds': 201 } self.library_tags[str(library_path)] = { 'title': 'Song', 'artist': 'Artist', 'album': 'Album', 'albumartist': 'Artist', 'tracknumber': '1', 'discnumber': '1', 'musicbrainzrecordingid': 'recording-1' } item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1') Path(source_path).unlink() stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['dedupe_status'], 'failed') self.assertEqual(item['dedupe_reason'], 'source_missing') self.assertEqual(stats['dedupe']['failed_items'], 1) def _create_task(self, replace: bool = False) -> dict: return self.task_store.create_task_if_idle(self._config(replace=replace)) def _config(self, replace: bool = False) -> dict: return { 'input': str(self.input_dir), 'output': str(self.output_dir), 'trash': str(self.trash_dir), 'advancedStrategy': { 'replaceLowQualityDuplicates': replace } } def _write_source(self, relative_path: str) -> str: path = self.input_dir / relative_path path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(b'audio') return str(path.resolve(strict=False)) def _write_library(self, relative_path: str) -> Path: path = self.output_dir / relative_path path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(b'library-audio') return path def _insert_matched_item( self, task_id: str, source_path: str, *, recording_id: str | None = 'recording-1', release_id: str | None = 'release-1', title: str = 'Song', duration_seconds: int = 201, confidence: float = 92.0, audio_props: dict | None = None ) -> dict: path = Path(source_path) return self.task_store.insert_task_item( task_id, original_path=source_path, current_file_path=source_path, relative_path=path.relative_to(self.input_dir).as_posix(), filename=path.name, extension=path.suffix.lower(), size_bytes=path.stat().st_size, modified_at='2024-01-01T00:00:00Z', local_cover=None, local_lyric=None, scan_status='queued', scan_reason=None, scan_message=None, preprocess_status='completed', match_status='matched', match_reason='authoritative_auto_match', match_message='matched', match_source='musicbrainz', match_confidence=confidence, match_is_authoritative=1, audio_props_json=audio_props or { 'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 700000, 'channels': 2, 'duration_seconds': duration_seconds }, matched_metadata_json={ 'title': title, 'artist': 'Artist', 'artists': ['Artist'], 'album': 'Album', 'album_artist': 'Artist', 'track_number': 1, 'disc_number': 1, 'release_date': '2024-01-01', 'year': 2024, 'duration_seconds': duration_seconds, 'recording_id': recording_id, 'release_id': release_id, 'release_group_id': 'group-1', 'source_ids': {'musicbrainz_recording_id': recording_id} if recording_id else {} } ) class OrganizeRunnerTests(unittest.TestCase): def setUp(self): self.root = Path(tempfile.mkdtemp()) self.input_dir = self.root / 'input' self.output_dir = self.root / 'output' self.trash_dir = self.root / 'trash' self.input_dir.mkdir() self.output_dir.mkdir() self.trash_dir.mkdir() self.task_store = TaskStore(self.root / 'music_workshop.db') self.runner = OrganizeRunner(self.task_store, TaskStreamManager()) def test_builds_single_disc_album_path(self): task = self._create_task() item = self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac') stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['organize_status'], 'organized') self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song.flac') self.assertTrue(Path(item['library_file_path']).exists()) log_types = { log['event_type'] for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs'] } self.assertIn('organize.path_planned', log_types) self.assertIn('organize.item_organized', log_types) def test_places_multi_disc_release_under_disc_folder(self): task = self._create_task() self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac', disc_number=2, track_number=7) stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['library_relative_path'], 'A/Artist/Album/Disc 2/07 - Song.flac') def test_places_missing_album_track_under_singles(self): task = self._create_task() self._insert_organize_item(task['task_id'], 'Artist/source.flac', album=None, title='Loose Song', year=2023) stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['library_relative_path'], 'A/Artist/Singles/2023 - Loose Song/01 - Loose Song.flac') def test_places_non_ascii_album_artist_under_hash_bucket(self): task = self._create_task() self._insert_organize_item(task['task_id'], 'Artist/source.flac', album_artist='周杰伦') stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertTrue(item['library_relative_path'].startswith('#/周杰伦/Album/')) def test_resolves_target_collisions_with_suffix(self): task = self._create_task() target = self.output_dir / 'A' / 'Artist' / 'Album' / '01 - Song.flac' target.parent.mkdir(parents=True, exist_ok=True) target.write_bytes(b'existing') self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac') stats = create_empty_task_stats() self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song (2).flac') self.assertEqual(stats['organize']['collision_resolved'], 1) def test_moves_failed_item_to_organize_trash(self): task = self._create_task() self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac') stats = create_empty_task_stats() output_root = self.output_dir.resolve(strict=False) original_move = self.runner._move_file def failing_move(source: Path, destination: Path): if output_root in destination.resolve(strict=False).parents: raise OSError('blocked') return original_move(source, destination) self.runner._move_file = failing_move self.runner.run(task['task_id'], stats, self._config()) item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0] self.assertEqual(item['organize_status'], 'trashed') self.assertTrue(Path(item['trash_file_path']).exists()) self.assertEqual(stats['organize']['failed_items'], 1) self.assertEqual(stats['organize']['trashed_items'], 1) def _create_task(self) -> dict: return self.task_store.create_task_if_idle(self._config()) def _config(self) -> dict: return { 'input': str(self.input_dir), 'output': str(self.output_dir), 'trash': str(self.trash_dir) } def _insert_organize_item( self, task_id: str, relative_path: str, *, title: str = 'Song', album: str | None = 'Album', album_artist: str = 'Artist', track_number: int = 1, disc_number: int = 1, year: int = 2024 ) -> dict: path = self.input_dir / relative_path path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(b'audio') return self.task_store.insert_task_item( task_id, original_path=str(path.resolve(strict=False)), current_file_path=str(path.resolve(strict=False)), relative_path=relative_path, filename=path.name, extension=path.suffix.lower(), size_bytes=path.stat().st_size, modified_at='2024-01-01T00:00:00Z', local_cover=None, local_lyric=None, scan_status='queued', scan_reason=None, scan_message=None, preprocess_status='completed', match_status='matched', match_reason='authoritative_auto_match', match_message='matched', dedupe_status='unique', organize_status='pending', matched_metadata_json={ 'title': title, 'artist': album_artist, 'artists': [album_artist], 'album': album, 'album_artist': album_artist, 'track_number': track_number, 'disc_number': disc_number, 'release_date': f'{year}-01-01', 'year': year, 'duration_seconds': 201, 'recording_id': 'recording-1', 'release_id': 'release-1', 'release_group_id': 'group-1', 'source_ids': {'musicbrainz_recording_id': 'recording-1'} } ) class _NoopPreprocessor: def probe_audio(self, _file_path: str) -> dict: return {} if __name__ == '__main__': unittest.main()