465 lines
17 KiB
Python
465 lines
17 KiB
Python
import shutil
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
from backend.app.library_postprocess import DedupeRunner, OrganizeRunner
|
|
from backend.app.task_constants import create_empty_task_stats
|
|
from backend.app.task_store import TaskStore
|
|
from backend.app.task_stream import TaskStreamManager
|
|
|
|
|
|
class DedupeRunnerTests(unittest.TestCase):
|
|
def setUp(self):
|
|
self.root = Path(tempfile.mkdtemp())
|
|
self.input_dir = self.root / 'input'
|
|
self.output_dir = self.root / 'output'
|
|
self.trash_dir = self.root / 'trash'
|
|
self.input_dir.mkdir()
|
|
self.output_dir.mkdir()
|
|
self.trash_dir.mkdir()
|
|
self.task_store = TaskStore(self.root / 'music_workshop.db')
|
|
self.runner = DedupeRunner(self.task_store, _NoopPreprocessor(), TaskStreamManager())
|
|
self.runner._safe_probe_audio = lambda file_path: self.library_audio_props.get(file_path, {})
|
|
self.runner._safe_read_library_tags = lambda file_path: self.library_tags.get(file_path, {})
|
|
self.library_audio_props = {}
|
|
self.library_tags = {}
|
|
|
|
def test_trashes_lower_quality_batch_duplicate(self):
|
|
task = self._create_task()
|
|
first_path = self._write_source('Artist/Album/01.flac')
|
|
second_path = self._write_source('Artist/Album/01-copy.flac')
|
|
|
|
first_item = self._insert_matched_item(
|
|
task['task_id'],
|
|
first_path,
|
|
recording_id='recording-1',
|
|
confidence=88.0,
|
|
audio_props={'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 900000, 'channels': 2, 'duration_seconds': 201}
|
|
)
|
|
second_item = self._insert_matched_item(
|
|
task['task_id'],
|
|
second_path,
|
|
recording_id='recording-1',
|
|
confidence=95.0,
|
|
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1500000, 'channels': 2, 'duration_seconds': 201}
|
|
)
|
|
|
|
stats = create_empty_task_stats()
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
first_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
second_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][1]
|
|
|
|
self.assertEqual(first_item['dedupe_status'], 'duplicate_trashed')
|
|
self.assertFalse(first_item['is_active'])
|
|
self.assertEqual(first_item['duplicate_of_item_id'], second_item['id'])
|
|
self.assertTrue(Path(first_item['trash_file_path']).exists())
|
|
self.assertEqual(second_item['dedupe_status'], 'unique')
|
|
self.assertEqual(stats['dedupe']['batch_duplicates'], 1)
|
|
self.assertEqual(stats['dedupe']['kept_items'], 1)
|
|
log_types = {
|
|
log['event_type']
|
|
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
|
|
}
|
|
self.assertIn('dedupe.lookup_started', log_types)
|
|
self.assertIn('dedupe.item_duplicate', log_types)
|
|
self.assertIn('dedupe.item_unique', log_types)
|
|
|
|
def test_keeps_existing_library_file_by_default(self):
|
|
task = self._create_task()
|
|
source_path = self._write_source('Artist/Album/01.flac')
|
|
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
|
|
self.library_audio_props[str(library_path)] = {
|
|
'codec': 'FLAC',
|
|
'bit_depth': 16,
|
|
'sample_rate': 44100,
|
|
'bitrate': 700000,
|
|
'channels': 2,
|
|
'duration_seconds': 201
|
|
}
|
|
self.library_tags[str(library_path)] = {
|
|
'title': 'Song',
|
|
'artist': 'Artist',
|
|
'album': 'Album',
|
|
'albumartist': 'Artist',
|
|
'tracknumber': '1',
|
|
'discnumber': '1',
|
|
'musicbrainzrecordingid': 'recording-1',
|
|
'musicbrainzalbumid': 'release-1',
|
|
'date': '2024-01-01'
|
|
}
|
|
|
|
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
|
|
stats = create_empty_task_stats()
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['dedupe_status'], 'duplicate_trashed')
|
|
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
|
|
self.assertTrue(Path(item['trash_file_path']).exists())
|
|
self.assertEqual(stats['dedupe']['library_duplicates'], 1)
|
|
self.assertEqual(stats['dedupe']['replaced_library_items'], 0)
|
|
|
|
def test_replaces_lower_quality_library_file_when_enabled(self):
|
|
task = self._create_task(replace=True)
|
|
source_path = self._write_source('Artist/Album/01.flac')
|
|
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
|
|
self.library_audio_props[str(library_path)] = {
|
|
'codec': 'MP3',
|
|
'bit_depth': 16,
|
|
'sample_rate': 44100,
|
|
'bitrate': 128000,
|
|
'channels': 2,
|
|
'duration_seconds': 201
|
|
}
|
|
self.library_tags[str(library_path)] = {
|
|
'title': 'Song',
|
|
'artist': 'Artist',
|
|
'album': 'Album',
|
|
'albumartist': 'Artist',
|
|
'tracknumber': '1',
|
|
'discnumber': '1',
|
|
'musicbrainzrecordingid': 'recording-1',
|
|
'musicbrainzalbumid': 'release-1',
|
|
'date': '2024-01-01'
|
|
}
|
|
|
|
item = self._insert_matched_item(
|
|
task['task_id'],
|
|
source_path,
|
|
recording_id='recording-1',
|
|
confidence=96.0,
|
|
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1600000, 'channels': 2, 'duration_seconds': 201}
|
|
)
|
|
stats = create_empty_task_stats()
|
|
self.runner.run(task['task_id'], stats, self._config(replace=True))
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['dedupe_status'], 'duplicate_replaced')
|
|
self.assertTrue(Path(item['current_file_path']).exists())
|
|
self.assertFalse(library_path.exists())
|
|
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
|
|
self.assertEqual(stats['dedupe']['replaced_library_items'], 1)
|
|
self.assertEqual(stats['dedupe']['kept_items'], 1)
|
|
|
|
def test_version_mismatch_does_not_dedupe_on_text_key(self):
|
|
task = self._create_task()
|
|
source_path = self._write_source('Artist/Album/01.flac')
|
|
library_path = self._write_library('A/Artist/Singles/2024 - Song/01 - Song.flac')
|
|
self.library_audio_props[str(library_path)] = {
|
|
'codec': 'FLAC',
|
|
'bit_depth': 16,
|
|
'sample_rate': 44100,
|
|
'bitrate': 700000,
|
|
'channels': 2,
|
|
'duration_seconds': 201
|
|
}
|
|
self.library_tags[str(library_path)] = {
|
|
'title': 'Song',
|
|
'artist': 'Artist',
|
|
'albumartist': 'Artist',
|
|
'date': '2024-01-01'
|
|
}
|
|
|
|
item = self._insert_matched_item(
|
|
task['task_id'],
|
|
source_path,
|
|
recording_id=None,
|
|
release_id=None,
|
|
title='Song (Live)',
|
|
duration_seconds=201
|
|
)
|
|
stats = create_empty_task_stats()
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['dedupe_status'], 'unique')
|
|
self.assertEqual(stats['dedupe']['library_duplicates'], 0)
|
|
|
|
def test_marks_item_failed_when_duplicate_source_file_is_missing(self):
|
|
task = self._create_task()
|
|
source_path = self._write_source('Artist/Album/01.flac')
|
|
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
|
|
self.library_audio_props[str(library_path)] = {
|
|
'codec': 'FLAC',
|
|
'bit_depth': 16,
|
|
'sample_rate': 44100,
|
|
'bitrate': 700000,
|
|
'channels': 2,
|
|
'duration_seconds': 201
|
|
}
|
|
self.library_tags[str(library_path)] = {
|
|
'title': 'Song',
|
|
'artist': 'Artist',
|
|
'album': 'Album',
|
|
'albumartist': 'Artist',
|
|
'tracknumber': '1',
|
|
'discnumber': '1',
|
|
'musicbrainzrecordingid': 'recording-1'
|
|
}
|
|
|
|
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
|
|
Path(source_path).unlink()
|
|
stats = create_empty_task_stats()
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['dedupe_status'], 'failed')
|
|
self.assertEqual(item['dedupe_reason'], 'source_missing')
|
|
self.assertEqual(stats['dedupe']['failed_items'], 1)
|
|
|
|
def _create_task(self, replace: bool = False) -> dict:
|
|
return self.task_store.create_task_if_idle(self._config(replace=replace))
|
|
|
|
def _config(self, replace: bool = False) -> dict:
|
|
return {
|
|
'input': str(self.input_dir),
|
|
'output': str(self.output_dir),
|
|
'trash': str(self.trash_dir),
|
|
'advancedStrategy': {
|
|
'replaceLowQualityDuplicates': replace
|
|
}
|
|
}
|
|
|
|
def _write_source(self, relative_path: str) -> str:
|
|
path = self.input_dir / relative_path
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(b'audio')
|
|
return str(path.resolve(strict=False))
|
|
|
|
def _write_library(self, relative_path: str) -> Path:
|
|
path = self.output_dir / relative_path
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(b'library-audio')
|
|
return path
|
|
|
|
def _insert_matched_item(
|
|
self,
|
|
task_id: str,
|
|
source_path: str,
|
|
*,
|
|
recording_id: str | None = 'recording-1',
|
|
release_id: str | None = 'release-1',
|
|
title: str = 'Song',
|
|
duration_seconds: int = 201,
|
|
confidence: float = 92.0,
|
|
audio_props: dict | None = None
|
|
) -> dict:
|
|
path = Path(source_path)
|
|
return self.task_store.insert_task_item(
|
|
task_id,
|
|
original_path=source_path,
|
|
current_file_path=source_path,
|
|
relative_path=path.relative_to(self.input_dir).as_posix(),
|
|
filename=path.name,
|
|
extension=path.suffix.lower(),
|
|
size_bytes=path.stat().st_size,
|
|
modified_at='2024-01-01T00:00:00Z',
|
|
local_cover=None,
|
|
local_lyric=None,
|
|
scan_status='queued',
|
|
scan_reason=None,
|
|
scan_message=None,
|
|
preprocess_status='completed',
|
|
match_status='matched',
|
|
match_reason='authoritative_auto_match',
|
|
match_message='matched',
|
|
match_source='musicbrainz',
|
|
match_confidence=confidence,
|
|
match_is_authoritative=1,
|
|
audio_props_json=audio_props or {
|
|
'codec': 'FLAC',
|
|
'bit_depth': 16,
|
|
'sample_rate': 44100,
|
|
'bitrate': 700000,
|
|
'channels': 2,
|
|
'duration_seconds': duration_seconds
|
|
},
|
|
matched_metadata_json={
|
|
'title': title,
|
|
'artist': 'Artist',
|
|
'artists': ['Artist'],
|
|
'album': 'Album',
|
|
'album_artist': 'Artist',
|
|
'track_number': 1,
|
|
'disc_number': 1,
|
|
'release_date': '2024-01-01',
|
|
'year': 2024,
|
|
'duration_seconds': duration_seconds,
|
|
'recording_id': recording_id,
|
|
'release_id': release_id,
|
|
'release_group_id': 'group-1',
|
|
'source_ids': {'musicbrainz_recording_id': recording_id} if recording_id else {}
|
|
}
|
|
)
|
|
|
|
|
|
class OrganizeRunnerTests(unittest.TestCase):
|
|
def setUp(self):
|
|
self.root = Path(tempfile.mkdtemp())
|
|
self.input_dir = self.root / 'input'
|
|
self.output_dir = self.root / 'output'
|
|
self.trash_dir = self.root / 'trash'
|
|
self.input_dir.mkdir()
|
|
self.output_dir.mkdir()
|
|
self.trash_dir.mkdir()
|
|
self.task_store = TaskStore(self.root / 'music_workshop.db')
|
|
self.runner = OrganizeRunner(self.task_store, TaskStreamManager())
|
|
|
|
def test_builds_single_disc_album_path(self):
|
|
task = self._create_task()
|
|
item = self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
|
|
stats = create_empty_task_stats()
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['organize_status'], 'organized')
|
|
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song.flac')
|
|
self.assertTrue(Path(item['library_file_path']).exists())
|
|
log_types = {
|
|
log['event_type']
|
|
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
|
|
}
|
|
self.assertIn('organize.path_planned', log_types)
|
|
self.assertIn('organize.item_organized', log_types)
|
|
|
|
def test_places_multi_disc_release_under_disc_folder(self):
|
|
task = self._create_task()
|
|
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac', disc_number=2, track_number=7)
|
|
stats = create_empty_task_stats()
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/Disc 2/07 - Song.flac')
|
|
|
|
def test_places_missing_album_track_under_singles(self):
|
|
task = self._create_task()
|
|
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album=None, title='Loose Song', year=2023)
|
|
stats = create_empty_task_stats()
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['library_relative_path'], 'A/Artist/Singles/2023 - Loose Song/01 - Loose Song.flac')
|
|
|
|
def test_places_non_ascii_album_artist_under_hash_bucket(self):
|
|
task = self._create_task()
|
|
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album_artist='周杰伦')
|
|
stats = create_empty_task_stats()
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertTrue(item['library_relative_path'].startswith('#/周杰伦/Album/'))
|
|
|
|
def test_resolves_target_collisions_with_suffix(self):
|
|
task = self._create_task()
|
|
target = self.output_dir / 'A' / 'Artist' / 'Album' / '01 - Song.flac'
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
target.write_bytes(b'existing')
|
|
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
|
|
stats = create_empty_task_stats()
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song (2).flac')
|
|
self.assertEqual(stats['organize']['collision_resolved'], 1)
|
|
|
|
def test_moves_failed_item_to_organize_trash(self):
|
|
task = self._create_task()
|
|
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
|
|
stats = create_empty_task_stats()
|
|
output_root = self.output_dir.resolve(strict=False)
|
|
original_move = self.runner._move_file
|
|
|
|
def failing_move(source: Path, destination: Path):
|
|
if output_root in destination.resolve(strict=False).parents:
|
|
raise OSError('blocked')
|
|
return original_move(source, destination)
|
|
|
|
self.runner._move_file = failing_move
|
|
|
|
self.runner.run(task['task_id'], stats, self._config())
|
|
|
|
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
|
|
self.assertEqual(item['organize_status'], 'trashed')
|
|
self.assertTrue(Path(item['trash_file_path']).exists())
|
|
self.assertEqual(stats['organize']['failed_items'], 1)
|
|
self.assertEqual(stats['organize']['trashed_items'], 1)
|
|
|
|
def _create_task(self) -> dict:
|
|
return self.task_store.create_task_if_idle(self._config())
|
|
|
|
def _config(self) -> dict:
|
|
return {
|
|
'input': str(self.input_dir),
|
|
'output': str(self.output_dir),
|
|
'trash': str(self.trash_dir)
|
|
}
|
|
|
|
def _insert_organize_item(
|
|
self,
|
|
task_id: str,
|
|
relative_path: str,
|
|
*,
|
|
title: str = 'Song',
|
|
album: str | None = 'Album',
|
|
album_artist: str = 'Artist',
|
|
track_number: int = 1,
|
|
disc_number: int = 1,
|
|
year: int = 2024
|
|
) -> dict:
|
|
path = self.input_dir / relative_path
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_bytes(b'audio')
|
|
return self.task_store.insert_task_item(
|
|
task_id,
|
|
original_path=str(path.resolve(strict=False)),
|
|
current_file_path=str(path.resolve(strict=False)),
|
|
relative_path=relative_path,
|
|
filename=path.name,
|
|
extension=path.suffix.lower(),
|
|
size_bytes=path.stat().st_size,
|
|
modified_at='2024-01-01T00:00:00Z',
|
|
local_cover=None,
|
|
local_lyric=None,
|
|
scan_status='queued',
|
|
scan_reason=None,
|
|
scan_message=None,
|
|
preprocess_status='completed',
|
|
match_status='matched',
|
|
match_reason='authoritative_auto_match',
|
|
match_message='matched',
|
|
dedupe_status='unique',
|
|
organize_status='pending',
|
|
matched_metadata_json={
|
|
'title': title,
|
|
'artist': album_artist,
|
|
'artists': [album_artist],
|
|
'album': album,
|
|
'album_artist': album_artist,
|
|
'track_number': track_number,
|
|
'disc_number': disc_number,
|
|
'release_date': f'{year}-01-01',
|
|
'year': year,
|
|
'duration_seconds': 201,
|
|
'recording_id': 'recording-1',
|
|
'release_id': 'release-1',
|
|
'release_group_id': 'group-1',
|
|
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
|
|
}
|
|
)
|
|
|
|
|
|
class _NoopPreprocessor:
|
|
def probe_audio(self, _file_path: str) -> dict:
|
|
return {}
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|