import math import os import struct import tempfile import unittest import wave from pathlib import Path from unittest.mock import patch from backend.app.matcher import MatchProviderError from backend.app.preprocessor import PreprocessDependencyError, Preprocessor from backend.app.scanner import Scanner from backend.app.task_runner import TaskRunner from backend.app.task_store import TaskStore from backend.app.task_stream import TaskStreamManager class TaskRunnerPreprocessTests(unittest.TestCase): def test_task_runner_completes_full_pipeline(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'Artist' / 'Album' / '01.wav' source_file.parent.mkdir(parents=True) _write_wave_file(source_file, duration_seconds=8) stable_timestamp = source_file.stat().st_mtime - 120 os.utime(source_file, (stable_timestamp, stable_timestamp)) db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=StaticMatcher() ) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) task_runner.start_task(task['task_id'], config_snapshot) persisted_task = task_store.get_task(task['task_id']) persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items'] self.assertEqual(persisted_task['status'], 'completed') self.assertEqual(persisted_task['current_stage'], 'complete') self.assertEqual(persisted_task['stage_states']['preprocess'], 'completed') self.assertEqual(persisted_task['stage_states']['match'], 'completed') self.assertEqual(persisted_task['stage_states']['dedupe'], 'completed') self.assertEqual(persisted_task['stage_states']['organize'], 'completed') self.assertEqual(persisted_task['stats']['scan']['queued'], 1) self.assertEqual(persisted_task['stats']['preprocess']['input_items'], 1) self.assertEqual(persisted_task['stats']['preprocess']['converted_items'], 1) self.assertEqual(persisted_task['stats']['preprocess']['output_items'], 1) self.assertEqual(persisted_task['stats']['match']['matched_authoritative'], 1) self.assertEqual(persisted_task['stats']['dedupe']['kept_items'], 1) self.assertEqual(persisted_task['stats']['organize']['moved_items'], 1) self.assertEqual(len(persisted_items), 1) item = persisted_items[0] self.assertEqual(item['preprocess_status'], 'completed') self.assertEqual(item['match_status'], 'matched') self.assertEqual(item['dedupe_status'], 'unique') self.assertEqual(item['organize_status'], 'organized') self.assertTrue(item['current_file_path'].endswith('.flac')) self.assertTrue(Path(item['current_file_path']).exists()) self.assertTrue(str(output_dir) in item['current_file_path']) self.assertEqual(item['audio_props_json']['codec'], 'FLAC') self.assertTrue(item['acoustic_fingerprint']) self.assertEqual(item['matched_metadata_json']['release_id'], 'release-1') self.assertEqual(item['library_relative_path'], 'M/Matched Artist/Matched Album/01 - Matched Song.flac') def test_task_runner_fails_fast_when_preprocess_dependencies_are_missing(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'single.flac' source_file.write_bytes(b'not-real-audio') stable_timestamp = source_file.stat().st_mtime - 120 os.utime(source_file, (stable_timestamp, stable_timestamp)) db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) preprocessor = Preprocessor() task_runner = TaskRunner( task_store, Scanner(), preprocessor, TaskStreamManager(), matcher=StaticMatcher() ) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) with patch.object( preprocessor, 'check_dependencies', side_effect=PreprocessDependencyError('missing preprocess dependencies') ): task_runner.start_task(task['task_id'], config_snapshot) persisted_task = task_store.get_task(task['task_id']) self.assertEqual(persisted_task['status'], 'failed') self.assertEqual(persisted_task['current_stage'], 'preprocess') self.assertEqual(persisted_task['stage_states']['preprocess'], 'failed') self.assertEqual( persisted_task['error_message'], 'missing preprocess dependencies' ) def test_task_runner_marks_match_item_failed_when_provider_errors(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'Artist' / 'Album' / '01.wav' source_file.parent.mkdir(parents=True) _write_wave_file(source_file, duration_seconds=8) stable_timestamp = source_file.stat().st_mtime - 120 os.utime(source_file, (stable_timestamp, stable_timestamp)) db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=ErrorMatcher() ) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) task_runner.start_task(task['task_id'], config_snapshot) persisted_task = task_store.get_task(task['task_id']) persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items'] self.assertEqual(persisted_task['status'], 'completed') self.assertEqual(persisted_task['stats']['match']['failed_items'], 1) self.assertEqual(persisted_items[0]['match_status'], 'failed') self.assertEqual(persisted_items[0]['match_reason'], 'provider_error') self.assertFalse(source_file.exists()) self.assertFalse(persisted_items[0]['is_active']) self.assertTrue(Path(persisted_items[0]['current_file_path']).exists()) self.assertTrue(Path(persisted_items[0]['trash_file_path']).exists()) self.assertIn('/trash/match_failed/', persisted_items[0]['current_file_path']) def test_task_runner_quarantines_historical_exception_before_scan(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'failed.flac' source_file.write_bytes(b'audio') stable_timestamp = source_file.stat().st_mtime - 120 os.utime(source_file, (stable_timestamp, stable_timestamp)) db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } previous_task = task_store.create_task_if_idle(config_snapshot) previous_item = task_store.insert_task_item( previous_task['task_id'], original_path=str(source_file), current_file_path=str(source_file), relative_path='failed.flac', filename='failed.flac', extension='.flac', size_bytes=source_file.stat().st_size, modified_at=None, local_cover=None, local_lyric=None, scan_status='queued', scan_reason=None, scan_message=None, preprocess_status='completed', match_status='failed', match_reason='provider_error', match_message='provider exploded' ) task_store.update_task(previous_task['task_id'], status='completed', current_stage='complete') task = task_store.create_task_if_idle(config_snapshot) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=StaticMatcher() ) task_runner.start_task(task['task_id'], config_snapshot) persisted_task = task_store.get_task(task['task_id']) quarantined_item = task_store.get_exception_source_item(previous_item['id']) self.assertEqual(persisted_task['status'], 'completed') self.assertEqual(persisted_task['stats']['scan']['total_found'], 0) self.assertEqual(persisted_task['stats']['scan']['queued'], 0) self.assertFalse(source_file.exists()) self.assertFalse(quarantined_item['is_active']) self.assertTrue(Path(quarantined_item['trash_file_path']).exists()) self.assertIn('/trash/match_failed/', quarantined_item['trash_file_path']) def test_task_runner_skips_exception_with_existing_trash_path(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'duplicate.flac' source_file.write_bytes(b'audio') existing_trash = trash_dir / 'duplicates' / 'old-task' / '1_duplicate.flac' existing_trash.parent.mkdir(parents=True) existing_trash.write_bytes(b'audio') db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) item = task_store.insert_task_item( task['task_id'], original_path=str(source_file), current_file_path=str(existing_trash), relative_path='duplicate.flac', filename='duplicate.flac', extension='.flac', size_bytes=source_file.stat().st_size, modified_at=None, local_cover=None, local_lyric=None, scan_status='queued', scan_reason=None, scan_message=None, preprocess_status='completed', match_status='matched', dedupe_status='duplicate_trashed', trash_file_path=str(existing_trash) ) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=StaticMatcher() ) task_runner._quarantine_exception_items(task['task_id'], config_snapshot, scope='current') persisted_item = task_store.get_exception_source_item(item['id']) self.assertTrue(source_file.exists()) self.assertTrue(existing_trash.exists()) self.assertEqual(persisted_item['trash_file_path'], str(existing_trash)) def test_task_runner_quarantines_converted_exception_and_original_source(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' workspace_dir = root / 'workspace' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() workspace_dir.mkdir() source_file = input_dir / 'source.wav' converted_file = workspace_dir / 'source.flac' source_file.write_bytes(b'wav') converted_file.write_bytes(b'flac') db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) item = task_store.insert_task_item( task['task_id'], original_path=str(source_file), current_file_path=str(converted_file), relative_path='source.wav', filename='source.flac', extension='.flac', size_bytes=converted_file.stat().st_size, modified_at=None, local_cover=None, local_lyric=None, scan_status='queued', scan_reason=None, scan_message=None, preprocess_status='completed', match_status='low_score', match_reason='score_below_threshold', match_message='候选分数不足' ) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=StaticMatcher() ) task_runner._quarantine_exception_items(task['task_id'], config_snapshot, scope='current') quarantined_item = task_store.get_exception_source_item(item['id']) moved_paths = sorted((trash_dir / 'low_score' / task['task_id']).glob('*')) self.assertFalse(source_file.exists()) self.assertFalse(converted_file.exists()) self.assertEqual(len(moved_paths), 2) self.assertFalse(quarantined_item['is_active']) self.assertTrue(Path(quarantined_item['current_file_path']).exists()) self.assertTrue(Path(quarantined_item['trash_file_path']).exists()) self.assertEqual( Path(quarantined_item['current_file_path']).name, f'{item["id"]}_source.flac' ) def test_task_runner_logs_provider_warnings_and_continues_matching(self): root = Path(tempfile.mkdtemp()) input_dir = root / 'input' output_dir = root / 'output' trash_dir = root / 'trash' input_dir.mkdir() output_dir.mkdir() trash_dir.mkdir() source_file = input_dir / 'Artist' / 'Album' / '01.wav' source_file.parent.mkdir(parents=True) _write_wave_file(source_file, duration_seconds=8) stable_timestamp = source_file.stat().st_mtime - 120 os.utime(source_file, (stable_timestamp, stable_timestamp)) db_path = root / 'music_workshop.db' task_store = TaskStore(db_path) task_runner = TaskRunner( task_store, Scanner(), Preprocessor(), TaskStreamManager(), matcher=WarningMatcher() ) config_snapshot = { 'input': str(input_dir), 'output': str(output_dir), 'trash': str(trash_dir) } task = task_store.create_task_if_idle(config_snapshot) task_runner.start_task(task['task_id'], config_snapshot) persisted_task = task_store.get_task(task['task_id']) persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items'] persisted_logs = task_store.list_task_logs(task['task_id'], 1, 50)['logs'] self.assertEqual(persisted_task['status'], 'completed') self.assertEqual(persisted_task['stats']['match']['matched_fallback'], 1) self.assertEqual(persisted_task['stats']['match']['provider_warnings'], 2) self.assertEqual(persisted_task['stats']['match']['failed_items'], 0) self.assertEqual(persisted_items[0]['match_status'], 'matched_fallback') self.assertEqual( [log['event_type'] for log in persisted_logs if log['event_type'] == 'match.provider_skipped'], ['match.provider_skipped', 'match.provider_skipped'] ) class StaticMatcher: def match_item(self, item, album_group, config): return { 'status': 'matched', 'reason': 'authoritative_auto_match', 'message': '静态测试匹配成功', 'source': 'musicbrainz', 'confidence': 93.5, 'is_authoritative': True, 'matched_metadata_json': { 'title': 'Matched Song', 'artist': 'Matched Artist', 'artists': ['Matched Artist'], 'album': 'Matched Album', 'album_artist': 'Matched Artist', 'track_number': 1, 'disc_number': 1, 'release_date': '2024-01-01', 'year': 2024, 'duration_seconds': 8.0, 'recording_id': 'recording-1', 'release_id': 'release-1', 'release_group_id': 'release-group-1', 'source_ids': {'musicbrainz_recording_id': 'recording-1'} }, 'match_candidates_json': [ { 'provider': 'musicbrainz', 'score': 93.5, 'score_breakdown': {'title': 20}, 'is_authoritative': True, 'recording_id': 'recording-1', 'release_id': 'release-1', 'release_group_id': 'release-group-1', 'source_ids': {'musicbrainz_recording_id': 'recording-1'} } ], 'match_enrichment_json': { 'cover': {'selected_source': None, 'candidates': []}, 'lyrics': {'selected_source': None, 'candidates': []}, 'genres': {'selected_source': None, 'candidates': []}, 'tags': {'selected_source': None, 'candidates': []} }, 'provider_warnings': [] } class ErrorMatcher: def match_item(self, item, album_group, config): raise MatchProviderError('musicbrainz', 'provider exploded') class WarningMatcher: def match_item(self, item, album_group, config): return { 'status': 'matched_fallback', 'reason': 'fallback_auto_match', 'message': 'Fallback 候选自动匹配成功,得分 88.0', 'source': 'qq', 'confidence': 88.0, 'is_authoritative': False, 'matched_metadata_json': { 'title': 'Matched Song', 'artist': 'Matched Artist', 'artists': ['Matched Artist'], 'album': 'Matched Album', 'album_artist': 'Matched Artist', 'track_number': 1, 'disc_number': 1, 'release_date': '2024-01-01', 'year': 2024, 'duration_seconds': 8.0, 'recording_id': None, 'release_id': None, 'release_group_id': None, 'source_ids': {'qq_song_mid': 'song-1'} }, 'match_candidates_json': [ { 'provider': 'qq', 'score': 88.0, 'score_breakdown': {'title': 20}, 'is_authoritative': False, 'recording_id': None, 'release_id': None, 'release_group_id': None, 'source_ids': {'qq_song_mid': 'song-1'} } ], 'match_enrichment_json': { 'cover': {'selected_source': None, 'candidates': []}, 'lyrics': {'selected_source': None, 'candidates': []}, 'genres': {'selected_source': None, 'candidates': []}, 'tags': {'selected_source': None, 'candidates': []} }, 'provider_warnings': [ { 'provider': 'acoustid', 'message': 'acoustid 请求失败 (HTTP 400) {"error":{"code":4,"message":"invalid API key"}}' }, { 'provider': 'spotify', 'message': 'spotify 请求失败 (HTTP 403) Active premium subscription required for the owner of the app.' } ] } def _write_wave_file(path: Path, *, duration_seconds: int): sample_rate = 44100 frequency = 440.0 amplitude = 16000 total_frames = sample_rate * duration_seconds with wave.open(str(path), 'wb') as handle: handle.setnchannels(1) handle.setsampwidth(2) handle.setframerate(sample_rate) frames = bytearray() for index in range(total_frames): sample = int(amplitude * math.sin((2.0 * math.pi * frequency * index) / sample_rate)) frames.extend(struct.pack('