Add MusicWorkshop application

This commit is contained in:
liumangmang
2026-04-30 14:34:28 +08:00
parent 4cb403c956
commit 796f19990f
62 changed files with 21614 additions and 2168 deletions
+97
View File
@@ -0,0 +1,97 @@
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_test_{next(tempfile._get_candidate_names())}.db'
)
import backend.app.main as main_module
import backend.app.metadata_status as metadata_status_module
from backend.app.schemas import ConfigPayload
from backend.app.storage import ConfigStore
class ConfigApiTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.store = ConfigStore(self.db_path)
self.previous_store = main_module.store
main_module.store = self.store
def tearDown(self):
main_module.store = self.previous_store
def test_get_config_returns_defaults(self):
data = main_module.get_config()
self.assertIn('advancedStrategy', data)
self.assertNotIn('metadataStatus', data)
self.assertEqual(data['schedule']['cron'], '0 2 * * *')
self.assertEqual(data['metadata']['acoustidUrl'], 'https://api.acoustid.org/v2')
def test_put_config_persists_changes(self):
payload = self.store.get_config()
payload['input'] = '/tmp/input'
payload['advancedStrategy']['replaceLowQualityDuplicates'] = True
expected_statuses = build_metadata_statuses()
with patch.object(main_module, 'probe_metadata_services', return_value=expected_statuses):
saved_payload = main_module.update_config(ConfigPayload.model_validate(payload))
read_payload = self.store.get_config()
self.assertEqual(saved_payload['config']['input'], '/tmp/input')
self.assertEqual(read_payload['input'], '/tmp/input')
self.assertTrue(read_payload['advancedStrategy']['replaceLowQualityDuplicates'])
self.assertEqual(saved_payload['metadataStatus'], expected_statuses)
def test_get_metadata_status_returns_probe_results(self):
expected_statuses = build_metadata_statuses()
with patch.object(main_module, 'probe_metadata_services', return_value=expected_statuses) as probe:
response = main_module.get_config_metadata_status()
self.assertEqual(response, {'metadataStatus': expected_statuses})
probe.assert_called_once_with(self.store.get_config()['metadata'])
def test_probe_metadata_services_skips_missing_credentials(self):
payload = self.store.get_config()
with patch.object(
metadata_status_module,
'probe_url',
return_value={'status': 'online', 'latencyMs': 100, 'message': '可达 (HTTP 200)'}
) as probe_url:
statuses = metadata_status_module.probe_metadata_services(payload['metadata'])
self.assertEqual(statuses['acoustid']['status'], 'none')
self.assertEqual(statuses['musicbrainz']['status'], 'online')
self.assertEqual(statuses['netease']['status'], 'online')
self.assertEqual(statuses['qq']['status'], 'online')
self.assertEqual(statuses['spotify']['status'], 'none')
self.assertEqual(statuses['discogs']['status'], 'none')
self.assertEqual(statuses['lastfm']['status'], 'none')
self.assertEqual(statuses['genius']['status'], 'none')
self.assertEqual(probe_url.call_count, 3)
def build_metadata_statuses():
return {
'acoustid': {'status': 'none', 'latencyMs': None, 'message': '缺失凭据,跳过测试'},
'musicbrainz': {'status': 'online', 'latencyMs': 123, 'message': '可达 (HTTP 200)'},
'netease': {'status': 'online', 'latencyMs': 42, 'message': '可达 (HTTP 200)'},
'qq': {'status': 'warning', 'latencyMs': 680, 'message': '高延迟 (HTTP 200)'},
'spotify': {'status': 'none', 'latencyMs': None, 'message': '缺失凭据,跳过测试'},
'discogs': {'status': 'none', 'latencyMs': None, 'message': '缺失凭据,跳过测试'},
'lastfm': {'status': 'none', 'latencyMs': None, 'message': '缺失凭据,跳过测试'},
'genius': {'status': 'none', 'latencyMs': None, 'message': '缺失凭据,跳过测试'}
}
if __name__ == '__main__':
unittest.main()
+216
View File
@@ -0,0 +1,216 @@
import os
import tempfile
import unittest
from pathlib import Path
try:
from fastapi.testclient import TestClient
except ModuleNotFoundError:
TestClient = None
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_exception_api_{next(tempfile._get_candidate_names())}.db'
)
try:
from backend.app.exception_service import ExceptionItemNotFoundError
from backend.app.schemas import (
ExceptionDetailPayload,
ExceptionListResponse,
ExceptionSummaryPayload
)
import backend.app.main as main_module
except ModuleNotFoundError as error:
main_module = None
ExceptionItemNotFoundError = None
ExceptionDetailPayload = None
ExceptionListResponse = None
ExceptionSummaryPayload = None
FASTAPI_IMPORT_ERROR = error
else:
FASTAPI_IMPORT_ERROR = None
@unittest.skipIf(main_module is None, f'api deps unavailable: {FASTAPI_IMPORT_ERROR}')
class ExceptionApiTests(unittest.TestCase):
def setUp(self):
self.previous_service = main_module.exception_service
self.fake_service = _FakeExceptionService()
main_module.exception_service = self.fake_service
self.client = TestClient(main_module.app) if TestClient else None
def tearDown(self):
main_module.exception_service = self.previous_service
def test_get_exception_summary_serializes_payload(self):
response = main_module.get_exception_summary()
payload = ExceptionSummaryPayload.model_validate(response)
self.assertEqual(payload.total, 6)
self.assertEqual(payload.counts_by_type['duplicates'], 2)
self.assertEqual(self.fake_service.summary_calls, 1)
def test_get_exception_items_passes_filters_and_pagination(self):
response = main_module.get_exception_items(
type='duplicates',
resolution_status='resolved',
page=2,
page_size=25
)
payload = ExceptionListResponse.model_validate(response)
self.assertEqual(payload.page, 2)
self.assertEqual(payload.page_size, 25)
self.assertEqual(payload.total, 1)
self.assertEqual(payload.items[0].exception_id, 101)
self.assertEqual(payload.items[0].exception_type, 'duplicates')
self.assertEqual(
self.fake_service.list_calls,
[{'type': 'duplicates', 'resolution_status': 'resolved', 'page': 2, 'page_size': 25}]
)
def test_get_exception_item_serializes_detail_payload(self):
response = main_module.get_exception_item(101)
payload = ExceptionDetailPayload.model_validate(response)
self.assertEqual(payload.exception_id, 101)
self.assertEqual(payload.filename, 'duplicate.flac')
self.assertEqual(payload.dedupe_decision_json['comparison_scope'], 'library')
self.assertEqual(self.fake_service.detail_calls, [101])
def test_get_exception_item_not_found_raises_service_error(self):
with self.assertRaises(ExceptionItemNotFoundError):
main_module.get_exception_item(999)
response = main_module.exception_item_not_found_error_handler(
None,
ExceptionItemNotFoundError(999)
)
self.assertEqual(response.status_code, 404)
def test_streams_exception_audio_with_range_support(self):
if self.client is None:
self.skipTest('fastapi test client unavailable')
audio_path = Path(tempfile.gettempdir()) / f'exception-audio-{next(tempfile._get_candidate_names())}.mp3'
audio_path.write_bytes(b'0123456789abcdef')
self.fake_service.audio_path = audio_path
try:
full_response = self.client.get('/api/exceptions/items/101/audio')
self.assertEqual(full_response.status_code, 200)
self.assertEqual(full_response.content, b'0123456789abcdef')
self.assertEqual(full_response.headers['accept-ranges'], 'bytes')
range_response = self.client.get(
'/api/exceptions/items/101/audio',
headers={'Range': 'bytes=4-7'}
)
self.assertEqual(range_response.status_code, 206)
self.assertEqual(range_response.content, b'4567')
self.assertEqual(range_response.headers['content-range'], 'bytes 4-7/16')
finally:
if audio_path.exists():
audio_path.unlink()
class _FakeExceptionService:
def __init__(self):
self.summary_calls = 0
self.list_calls: list[dict] = []
self.detail_calls: list[int] = []
self.audio_path: Path | None = None
def get_summary(self) -> dict:
self.summary_calls += 1
return {
'total': 6,
'counts_by_type': {
'missing_tags': 1,
'duplicates': 2,
'match_failed': 1,
'low_score': 1,
'convert_failed': 0,
'organize_failed': 1
},
'scanned_at': '2024-01-03T12:00:00Z'
}
def get_items(
self,
exception_type: str = 'all',
page: int = 1,
page_size: int = 50,
resolution_status: str = 'open'
) -> dict:
self.list_calls.append(
{
'type': exception_type,
'resolution_status': resolution_status,
'page': page,
'page_size': page_size
}
)
return {
'items': [self._detail_payload()],
'page': page,
'page_size': page_size,
'total': 1
}
def get_item(self, exception_id: int) -> dict:
self.detail_calls.append(exception_id)
if exception_id != 101:
raise ExceptionItemNotFoundError(exception_id)
return self._detail_payload()
def resolve_audio_path(self, exception_id: int) -> Path:
self.detail_calls.append(exception_id)
if exception_id != 101 or self.audio_path is None:
raise FileNotFoundError(f'No playable audio found for exception item: {exception_id}')
return self.audio_path
def _detail_payload(self) -> dict:
return {
'exception_id': 101,
'task_id': 'task-123',
'task_started_at': '2024-01-01T08:00:00Z',
'exception_type': 'duplicates',
'exception_stage': 'dedupe',
'exception_reason_code': 'library_duplicate',
'exception_message': '输出库中已存在重复文件,保留库内文件',
'captured_at': '2024-01-03T12:00:00Z',
'filename': 'duplicate.flac',
'relative_path': 'Artist/Album/duplicate.flac',
'original_path': '/tmp/input/duplicate.flac',
'current_file_path': '/tmp/trash/duplicate.flac',
'trash_file_path': '/tmp/trash/duplicate.flac',
'audio_props_json': {'codec': 'FLAC'},
'original_tags_json': {'title': 'Song'},
'matched_metadata_json': {'title': 'Song'},
'duplicate_of_path': '/tmp/output/Artist/Old.flac',
'dedupe_decision_json': {'comparison_scope': 'library'},
'library_relative_path': None,
'library_file_path': None,
'match_source': 'musicbrainz',
'match_confidence': 91.2,
'preview_available': False,
'available_actions': [],
'exception_resolution_status': 'open',
'exception_resolution_json': None,
'workflow_state': 'open',
'raw_metadata': {'title': 'Song'},
'metadata_draft': {'title': 'Song'},
'effective_metadata': {'title': 'Song', 'artist': 'Artist', 'album_artist': 'Artist'},
'can_ingest': True,
'pending_ingest': False,
'display_title': 'Song',
'display_reason': '输出库中已存在重复文件,保留库内文件',
'type_label': '文件重复',
'preprocess_artifacts_json': None,
'match_candidates_json': None,
'match_enrichment_json': None,
'organize_decision_json': None
}
if __name__ == '__main__':
unittest.main()
+440
View File
@@ -0,0 +1,440 @@
import os
import tempfile
import unittest
from pathlib import Path
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_exception_service_{next(tempfile._get_candidate_names())}.db'
)
from backend.app.exception_service import ExceptionItemNotFoundError, ExceptionService
from backend.app.task_store import TaskStore
class ExceptionServiceTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.task_store = TaskStore(self.db_path)
self.service = ExceptionService(self.task_store)
self.task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
def test_empty_summary_list_and_detail_not_found(self):
summary = self.service.get_summary()
self.assertEqual(summary['total'], 0)
self.assertEqual(
summary['counts_by_type'],
{
'missing_tags': 0,
'duplicates': 0,
'match_failed': 0,
'low_score': 0,
'convert_failed': 0,
'organize_failed': 0
}
)
page = self.service.get_items()
self.assertEqual(page['items'], [])
self.assertEqual(page['total'], 0)
with self.assertRaises(ExceptionItemNotFoundError):
self.service.get_item(9999)
def test_maps_exception_types_and_applies_priority(self):
missing_tags = self._insert_item(
filename='missing-tags.flac',
preprocess_status='warning',
preprocess_reason='cover_missing,metadata_failed',
preprocess_message='无法提取有效元数据'
)
low_score = self._insert_item(
filename='low-score.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Likely Match'}
)
match_failed = self._insert_item(
filename='match-failed.flac',
match_status='not_found',
match_reason='no_candidate',
match_message='MusicBrainz 查无此曲'
)
convert_failed = self._insert_item(
filename='convert-failed.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败'
)
duplicate = self._insert_item(
filename='duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='输出库中已存在重复文件,保留库内文件',
duplicate_of_path='/tmp/output/Artist/Old.flac',
dedupe_decision_json={
'comparison_scope': 'library',
'identity_basis': 'recording_id',
'compared_candidates': [
{'side': 'kept', 'path': '/tmp/output/Artist/Old.flac'},
{'side': 'trashed', 'path': '/tmp/input/duplicate.flac'}
]
},
trash_file_path='/tmp/trash/duplicates/task-1/duplicate.flac'
)
organize_failed = self._insert_item(
filename='organize-failed.flac',
organize_status='failed',
organize_reason='target_conflict',
organize_message='整理入库失败'
)
priority_item = self._insert_item(
filename='priority.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
dedupe_status='failed',
dedupe_reason='trash_move_failed',
dedupe_message='重复检测失败',
organize_status='trashed',
organize_reason='manual_review',
organize_message='已移入回收站等待人工处理'
)
summary = self.service.get_summary()
self.assertEqual(summary['total'], 7)
self.assertEqual(summary['counts_by_type']['missing_tags'], 1)
self.assertEqual(summary['counts_by_type']['duplicates'], 1)
self.assertEqual(summary['counts_by_type']['match_failed'], 1)
self.assertEqual(summary['counts_by_type']['low_score'], 1)
self.assertEqual(summary['counts_by_type']['convert_failed'], 1)
self.assertEqual(summary['counts_by_type']['organize_failed'], 2)
items = self.service.get_items(page_size=20)['items']
indexed = {item['filename']: item for item in items}
self.assertEqual(indexed['missing-tags.flac']['exception_type'], 'missing_tags')
self.assertEqual(
indexed['missing-tags.flac']['available_actions'],
['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file']
)
self.assertEqual(indexed['low-score.flac']['display_title'], 'Likely Match')
self.assertEqual(indexed['low-score.flac']['exception_type'], 'low_score')
self.assertFalse(indexed['low-score.flac']['can_ingest'])
self.assertEqual(indexed['low-score.flac']['workflow_state'], 'open')
self.assertEqual(indexed['match-failed.flac']['exception_type'], 'match_failed')
self.assertEqual(indexed['convert-failed.flac']['exception_type'], 'convert_failed')
self.assertEqual(indexed['duplicate.flac']['exception_type'], 'duplicates')
self.assertEqual(indexed['organize-failed.flac']['exception_type'], 'organize_failed')
self.assertEqual(indexed['priority.flac']['exception_type'], 'organize_failed')
self.assertEqual(indexed['priority.flac']['exception_stage'], 'organize')
self.assertEqual(indexed['priority.flac']['exception_reason_code'], 'manual_review')
self.assertEqual(
indexed['priority.flac']['available_actions'],
['edit_target_path', 'move_to_review_trash', 'ignore_exception', 'delete_file']
)
self.assertEqual(missing_tags['id'], indexed['missing-tags.flac']['exception_id'])
self.assertEqual(low_score['id'], indexed['low-score.flac']['exception_id'])
self.assertEqual(match_failed['id'], indexed['match-failed.flac']['exception_id'])
self.assertEqual(convert_failed['id'], indexed['convert-failed.flac']['exception_id'])
self.assertEqual(duplicate['id'], indexed['duplicate.flac']['exception_id'])
self.assertEqual(organize_failed['id'], indexed['organize-failed.flac']['exception_id'])
self.assertEqual(priority_item['id'], indexed['priority.flac']['exception_id'])
def test_duplicate_detail_preserves_comparison_data(self):
duplicate = self._insert_item(
filename='duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='输出库中已存在重复文件,保留库内文件',
duplicate_of_path='/tmp/output/Artist/Old.flac',
dedupe_decision_json={
'comparison_scope': 'library',
'identity_basis': 'recording_id',
'quality_breakdown': {
'kept': {'total': 88.0},
'trashed': {'total': 72.0}
},
'compared_candidates': [
{'side': 'kept', 'path': '/tmp/output/Artist/Old.flac', 'quality_score': 88.0},
{'side': 'trashed', 'path': '/tmp/input/duplicate.flac', 'quality_score': 72.0}
]
},
trash_file_path='/tmp/trash/duplicates/task-1/duplicate.flac'
)
detail = self.service.get_item(duplicate['id'])
self.assertEqual(detail['exception_type'], 'duplicates')
self.assertEqual(detail['duplicate_of_path'], '/tmp/output/Artist/Old.flac')
self.assertEqual(detail['trash_file_path'], '/tmp/trash/duplicates/task-1/duplicate.flac')
self.assertEqual(detail['dedupe_decision_json']['comparison_scope'], 'library')
self.assertEqual(len(detail['dedupe_decision_json']['compared_candidates']), 2)
self.assertFalse(detail['preview_available'])
def test_resolution_filter_hides_resolved_by_default(self):
resolved_item = self._insert_item(
filename='resolved.flac',
match_status='not_found',
match_reason='no_candidate',
match_message='未找到匹配',
exception_resolution_status='resolved',
exception_resolution_json={
'before_snapshot': {
'exception_type': 'match_failed',
'exception_stage': 'match',
'exception_reason_code': 'no_candidate',
'exception_message': '未找到匹配'
}
}
)
open_item = self._insert_item(
filename='open.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配失败'
)
open_page = self.service.get_items()
resolved_page = self.service.get_items(resolution_status='resolved')
self.assertEqual([item['exception_id'] for item in open_page['items']], [open_item['id']])
self.assertEqual([item['exception_id'] for item in resolved_page['items']], [resolved_item['id']])
def test_candidate_selected_item_remains_open_and_pending_ingest(self):
item = self._insert_item(
filename='candidate-selected.flac',
match_status='matched_fallback',
match_reason='manual_candidate_selected',
match_message='已手动确认匹配候选',
matched_metadata_json={'title': 'Song', 'artist': 'Artist', 'album_artist': 'Artist'},
exception_resolution_json={
'workflow_state': 'candidate_selected',
'metadata_draft': {'title': 'Song', 'artist': 'Artist', 'album_artist': 'Artist'},
'before_snapshot': {
'exception_type': 'low_score',
'exception_stage': 'match',
'exception_reason_code': 'score_gap_too_small',
'exception_message': '匹配候选分数过低'
}
}
)
open_page = self.service.get_items()
indexed = {row['filename']: row for row in open_page['items']}
detail = indexed['candidate-selected.flac']
self.assertEqual(item['id'], detail['exception_id'])
self.assertEqual(detail['workflow_state'], 'ready_to_ingest')
self.assertTrue(detail['pending_ingest'])
self.assertTrue(detail['can_ingest'])
self.assertEqual(detail['exception_type'], 'low_score')
self.assertIn('save_and_organize', detail['available_actions'])
def test_effective_metadata_derives_album_artist_for_ingest(self):
item = self._insert_item(
filename='derived-album-artist.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={
'title': 'Song',
'artist': 'Artist A feat. Guest',
'album': 'Album X'
}
)
self._insert_item(
filename='derived-album-artist-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={
'title': 'Song 2',
'artist': 'Artist A',
'album': 'Album X'
}
)
detail = self.service.get_item(item['id'])
self.assertEqual(detail['effective_metadata']['album_artist'], 'Artist A')
self.assertEqual(detail['normalization_strategy'], 'main_artist_feat')
self.assertTrue(detail['can_ingest'])
self.assertEqual(detail['workflow_state'], 'ready_to_ingest')
def test_filters_and_paginates_by_captured_at_desc(self):
older_duplicate = self._insert_item(
filename='older-duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='重复文件'
)
newest_match_failed = self._insert_item(
filename='newest-match-failed.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配服务请求失败'
)
middle_convert_failed = self._insert_item(
filename='middle-convert-failed.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败'
)
self._set_updated_at(older_duplicate['id'], '2024-01-01T00:00:00Z')
self._set_updated_at(middle_convert_failed['id'], '2024-01-02T00:00:00Z')
self._set_updated_at(newest_match_failed['id'], '2024-01-03T00:00:00Z')
first_page = self.service.get_items(page=1, page_size=2)
second_page = self.service.get_items(page=2, page_size=2)
duplicate_page = self.service.get_items('duplicates', page=1, page_size=10)
self.assertEqual(first_page['total'], 3)
self.assertEqual(
[item['filename'] for item in first_page['items']],
['newest-match-failed.flac', 'middle-convert-failed.flac']
)
self.assertEqual([item['filename'] for item in second_page['items']], ['older-duplicate.flac'])
self.assertEqual(duplicate_page['total'], 1)
self.assertEqual(duplicate_page['items'][0]['filename'], 'older-duplicate.flac')
def test_summary_counts_without_triggering_metadata_normalization(self):
self._insert_item(
filename='low-score.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Song'}
)
self._insert_item(
filename='match-failed.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配服务请求失败'
)
def fail_normalize(*args, **kwargs):
raise AssertionError('get_summary should not normalize metadata')
self.service.metadata_normalizer.normalize_item = fail_normalize
summary = self.service.get_summary()
self.assertEqual(summary['total'], 2)
self.assertEqual(summary['counts_by_type']['low_score'], 1)
self.assertEqual(summary['counts_by_type']['match_failed'], 1)
def test_get_items_only_normalizes_current_page(self):
first_item = self._insert_item(
filename='page-1.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 1', 'artist': 'Artist A', 'album': 'Album X'}
)
second_item = self._insert_item(
filename='page-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 2', 'artist': 'Artist A', 'album': 'Album X'}
)
third_item = self._insert_item(
filename='page-3.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 3', 'artist': 'Artist A', 'album': 'Album X'}
)
self._set_updated_at(first_item['id'], '2030-01-03T00:00:00Z')
self._set_updated_at(second_item['id'], '2030-01-02T00:00:00Z')
self._set_updated_at(third_item['id'], '2030-01-01T00:00:00Z')
calls = []
original_normalize_item = self.service.metadata_normalizer.normalize_item
def tracked_normalize(item, metadata_patch=None, cache=None):
calls.append(item['id'])
return original_normalize_item(item, metadata_patch, cache)
self.service.metadata_normalizer.normalize_item = tracked_normalize
page = self.service.get_items(page=1, page_size=1)
self.assertEqual(page['total'], 3)
self.assertEqual([item['filename'] for item in page['items']], ['page-1.flac'])
self.assertEqual(calls, [first_item['id']])
def test_get_items_reuses_task_level_normalization_cache_within_page(self):
list_all_calls = []
original_list_all_task_items = self.task_store.list_all_task_items
def tracked_list_all_task_items(task_id, active_only=True):
list_all_calls.append((task_id, active_only))
return original_list_all_task_items(task_id, active_only=active_only)
self.task_store.list_all_task_items = tracked_list_all_task_items
self._insert_item(
filename='shared-1.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Shared 1', 'artist': 'Artist A feat. Guest', 'album': 'Album X'}
)
self._insert_item(
filename='shared-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Shared 2', 'artist': 'Artist A', 'album': 'Album X'}
)
page = self.service.get_items(page=1, page_size=2)
self.assertEqual(len(page['items']), 2)
self.assertEqual(len(list_all_calls), 1)
self.assertTrue(all(item['can_ingest'] for item in page['items']))
def _insert_item(self, **overrides):
filename = overrides.pop('filename', f'item-{next(tempfile._get_candidate_names())}.flac')
extension = Path(filename).suffix or '.flac'
return self.task_store.insert_task_item(
self.task['task_id'],
original_path=f'/tmp/input/{filename}',
current_file_path=f'/tmp/input/{filename}',
relative_path=f'Artist/Album/{filename}',
filename=filename,
extension=extension,
size_bytes=123456,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
**overrides
)
def _set_updated_at(self, item_id: int, timestamp: str):
with self.task_store._connect() as connection:
connection.execute(
'UPDATE task_items SET updated_at = ? WHERE id = ?',
(timestamp, item_id)
)
connection.commit()
if __name__ == '__main__':
unittest.main()
+200
View File
@@ -0,0 +1,200 @@
import os
import tempfile
import unittest
from pathlib import Path
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_library_api_{next(tempfile._get_candidate_names())}.db'
)
from backend.app.storage import ConfigStore
from backend.app.task_store import TaskConflictError
try:
from backend.app.library_service import LibraryTrackNotFoundError
from backend.app.schemas import (
LibraryMoveToExceptionResponse,
LibrarySummaryPayload,
LibraryTracksPageResponse
)
import backend.app.main as main_module
except ModuleNotFoundError as error:
main_module = None
LibraryTrackNotFoundError = None
LibraryMoveToExceptionResponse = None
LibrarySummaryPayload = None
LibraryTracksPageResponse = None
FASTAPI_IMPORT_ERROR = error
else:
FASTAPI_IMPORT_ERROR = None
@unittest.skipIf(main_module is None, f'api deps unavailable: {FASTAPI_IMPORT_ERROR}')
class LibraryApiTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.store = ConfigStore(self.db_path)
config = self.store.get_config()
config['output'] = '/tmp/library-output'
self.store.save_config(config)
self.previous_store = main_module.store
self.previous_service = main_module.library_service
self.fake_service = _FakeLibraryService()
main_module.store = self.store
main_module.library_service = self.fake_service
def tearDown(self):
main_module.store = self.previous_store
main_module.library_service = self.previous_service
def test_get_library_summary_uses_current_output_config(self):
response = main_module.get_library_summary()
payload = LibrarySummaryPayload.model_validate(response)
self.assertEqual(payload.total_tracks, 12)
self.assertEqual(payload.total_albums, 3)
self.assertEqual(payload.total_artists, 2)
self.assertEqual(self.fake_service.summary_calls, ['/tmp/library-output'])
def test_get_library_tracks_passes_pagination_filters_and_serializes_provenance(self):
response = main_module.get_library_tracks(
q='echoes',
artist='Artist A',
album='Album A',
format='FLAC',
has_provenance=True,
page=2,
page_size=25,
sort_by='filename',
sort_order='asc'
)
payload = LibraryTracksPageResponse.model_validate(response)
self.assertEqual(payload.page, 2)
self.assertEqual(payload.page_size, 25)
self.assertEqual(payload.total, 1)
self.assertEqual(payload.items[0].track_id, 'track-1')
self.assertEqual(payload.items[0].ingest_provenance.task_id, 'task-123')
self.assertEqual(
self.fake_service.track_calls,
[
{
'output_dir': '/tmp/library-output',
'q': 'echoes',
'artist': 'Artist A',
'album': 'Album A',
'format': 'FLAC',
'has_provenance': True,
'page': 2,
'page_size': 25,
'sort_by': 'filename',
'sort_order': 'asc'
}
]
)
def test_move_library_track_to_exception_uses_current_config(self):
response = main_module.move_library_track_to_exception('track-1')
payload = LibraryMoveToExceptionResponse.model_validate(response)
self.assertEqual(payload.exception_id, 123)
self.assertEqual(payload.library_relative_path, 'A/Artist A/Album A/01 - Echoes.flac')
self.assertEqual(
self.fake_service.move_calls,
[{'output': '/tmp/library-output', 'trash': '/volume1/docker/navidrome/trash', 'track_id': 'track-1'}]
)
def test_move_library_track_to_exception_maps_conflict_to_409(self):
self.fake_service.move_error = TaskConflictError('active-task')
response = main_module.move_library_track_to_exception('track-1')
self.assertEqual(response.status_code, 409)
def test_library_track_not_found_handler_returns_404(self):
response = main_module.library_track_not_found_error_handler(
None,
LibraryTrackNotFoundError('missing-track')
)
self.assertEqual(response.status_code, 404)
class _FakeLibraryService:
def __init__(self):
self.summary_calls: list[str] = []
self.track_calls: list[dict] = []
self.move_calls: list[dict] = []
self.move_error = None
def get_summary(self, output_dir: str) -> dict:
self.summary_calls.append(output_dir)
return {
'total_tracks': 12,
'total_albums': 3,
'total_artists': 2,
'suspected_duplicates': 1,
'scanned_at': '2024-01-03T12:00:00Z'
}
def get_tracks_page(self, output_dir: str, **kwargs) -> dict:
self.track_calls.append({'output_dir': output_dir, **kwargs})
return {
'items': [
{
'track_id': 'track-1',
'library_relative_path': 'A/Artist A/Album A/01 - Echoes.flac',
'library_file_path': '/tmp/library-output/A/Artist A/Album A/01 - Echoes.flac',
'filename': '01 - Echoes.flac',
'title': 'Echoes',
'artist': 'Artist A',
'album': 'Album A',
'album_artist': 'Artist A',
'track_number': 1,
'disc_number': 1,
'year': 2024,
'duration_seconds': 301.4,
'format': 'FLAC',
'codec': 'FLAC',
'bitrate': 980000,
'sample_rate': 96000,
'bit_depth': 24,
'channels': 2,
'size_bytes': 12345678,
'modified_at': '2024-01-02T12:00:00Z',
'ingest_provenance': {
'task_id': 'task-123',
'organized_at': '2024-01-03T12:00:00Z',
'match_source': 'musicbrainz',
'match_confidence': 95.2,
'dedupe_status': 'unique'
}
}
],
'page': kwargs['page'],
'page_size': kwargs['page_size'],
'total': 1
}
def move_track_to_exception(self, config_snapshot: dict, track_id: str) -> dict:
if self.move_error:
raise self.move_error
self.move_calls.append(
{
'output': config_snapshot['output'],
'trash': config_snapshot['trash'],
'track_id': track_id
}
)
return {
'exception_id': 123,
'library_relative_path': 'A/Artist A/Album A/01 - Echoes.flac',
'trash_file_path': '/tmp/trash/match_failed/task-1/01 - Echoes.flac',
'message': '已移入异常中心,等待重新匹配'
}
if __name__ == '__main__':
unittest.main()
+464
View File
@@ -0,0 +1,464 @@
import shutil
import tempfile
import unittest
from pathlib import Path
from backend.app.library_postprocess import DedupeRunner, OrganizeRunner
from backend.app.task_constants import create_empty_task_stats
from backend.app.task_store import TaskStore
from backend.app.task_stream import TaskStreamManager
class DedupeRunnerTests(unittest.TestCase):
def setUp(self):
self.root = Path(tempfile.mkdtemp())
self.input_dir = self.root / 'input'
self.output_dir = self.root / 'output'
self.trash_dir = self.root / 'trash'
self.input_dir.mkdir()
self.output_dir.mkdir()
self.trash_dir.mkdir()
self.task_store = TaskStore(self.root / 'music_workshop.db')
self.runner = DedupeRunner(self.task_store, _NoopPreprocessor(), TaskStreamManager())
self.runner._safe_probe_audio = lambda file_path: self.library_audio_props.get(file_path, {})
self.runner._safe_read_library_tags = lambda file_path: self.library_tags.get(file_path, {})
self.library_audio_props = {}
self.library_tags = {}
def test_trashes_lower_quality_batch_duplicate(self):
task = self._create_task()
first_path = self._write_source('Artist/Album/01.flac')
second_path = self._write_source('Artist/Album/01-copy.flac')
first_item = self._insert_matched_item(
task['task_id'],
first_path,
recording_id='recording-1',
confidence=88.0,
audio_props={'codec': 'FLAC', 'bit_depth': 16, 'sample_rate': 44100, 'bitrate': 900000, 'channels': 2, 'duration_seconds': 201}
)
second_item = self._insert_matched_item(
task['task_id'],
second_path,
recording_id='recording-1',
confidence=95.0,
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1500000, 'channels': 2, 'duration_seconds': 201}
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
first_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
second_item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][1]
self.assertEqual(first_item['dedupe_status'], 'duplicate_trashed')
self.assertFalse(first_item['is_active'])
self.assertEqual(first_item['duplicate_of_item_id'], second_item['id'])
self.assertTrue(Path(first_item['trash_file_path']).exists())
self.assertEqual(second_item['dedupe_status'], 'unique')
self.assertEqual(stats['dedupe']['batch_duplicates'], 1)
self.assertEqual(stats['dedupe']['kept_items'], 1)
log_types = {
log['event_type']
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
}
self.assertIn('dedupe.lookup_started', log_types)
self.assertIn('dedupe.item_duplicate', log_types)
self.assertIn('dedupe.item_unique', log_types)
def test_keeps_existing_library_file_by_default(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1',
'musicbrainzalbumid': 'release-1',
'date': '2024-01-01'
}
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'duplicate_trashed')
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
self.assertTrue(Path(item['trash_file_path']).exists())
self.assertEqual(stats['dedupe']['library_duplicates'], 1)
self.assertEqual(stats['dedupe']['replaced_library_items'], 0)
def test_replaces_lower_quality_library_file_when_enabled(self):
task = self._create_task(replace=True)
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'MP3',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 128000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1',
'musicbrainzalbumid': 'release-1',
'date': '2024-01-01'
}
item = self._insert_matched_item(
task['task_id'],
source_path,
recording_id='recording-1',
confidence=96.0,
audio_props={'codec': 'FLAC', 'bit_depth': 24, 'sample_rate': 96000, 'bitrate': 1600000, 'channels': 2, 'duration_seconds': 201}
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config(replace=True))
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'duplicate_replaced')
self.assertTrue(Path(item['current_file_path']).exists())
self.assertFalse(library_path.exists())
self.assertEqual(item['duplicate_of_path'], str(library_path.resolve(strict=False)))
self.assertEqual(stats['dedupe']['replaced_library_items'], 1)
self.assertEqual(stats['dedupe']['kept_items'], 1)
def test_version_mismatch_does_not_dedupe_on_text_key(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Singles/2024 - Song/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'albumartist': 'Artist',
'date': '2024-01-01'
}
item = self._insert_matched_item(
task['task_id'],
source_path,
recording_id=None,
release_id=None,
title='Song (Live)',
duration_seconds=201
)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'unique')
self.assertEqual(stats['dedupe']['library_duplicates'], 0)
def test_marks_item_failed_when_duplicate_source_file_is_missing(self):
task = self._create_task()
source_path = self._write_source('Artist/Album/01.flac')
library_path = self._write_library('A/Artist/Album/01 - Song.flac')
self.library_audio_props[str(library_path)] = {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': 201
}
self.library_tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1'
}
item = self._insert_matched_item(task['task_id'], source_path, recording_id='recording-1')
Path(source_path).unlink()
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['dedupe_status'], 'failed')
self.assertEqual(item['dedupe_reason'], 'source_missing')
self.assertEqual(stats['dedupe']['failed_items'], 1)
def _create_task(self, replace: bool = False) -> dict:
return self.task_store.create_task_if_idle(self._config(replace=replace))
def _config(self, replace: bool = False) -> dict:
return {
'input': str(self.input_dir),
'output': str(self.output_dir),
'trash': str(self.trash_dir),
'advancedStrategy': {
'replaceLowQualityDuplicates': replace
}
}
def _write_source(self, relative_path: str) -> str:
path = self.input_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'audio')
return str(path.resolve(strict=False))
def _write_library(self, relative_path: str) -> Path:
path = self.output_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'library-audio')
return path
def _insert_matched_item(
self,
task_id: str,
source_path: str,
*,
recording_id: str | None = 'recording-1',
release_id: str | None = 'release-1',
title: str = 'Song',
duration_seconds: int = 201,
confidence: float = 92.0,
audio_props: dict | None = None
) -> dict:
path = Path(source_path)
return self.task_store.insert_task_item(
task_id,
original_path=source_path,
current_file_path=source_path,
relative_path=path.relative_to(self.input_dir).as_posix(),
filename=path.name,
extension=path.suffix.lower(),
size_bytes=path.stat().st_size,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_reason='authoritative_auto_match',
match_message='matched',
match_source='musicbrainz',
match_confidence=confidence,
match_is_authoritative=1,
audio_props_json=audio_props or {
'codec': 'FLAC',
'bit_depth': 16,
'sample_rate': 44100,
'bitrate': 700000,
'channels': 2,
'duration_seconds': duration_seconds
},
matched_metadata_json={
'title': title,
'artist': 'Artist',
'artists': ['Artist'],
'album': 'Album',
'album_artist': 'Artist',
'track_number': 1,
'disc_number': 1,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': duration_seconds,
'recording_id': recording_id,
'release_id': release_id,
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': recording_id} if recording_id else {}
}
)
class OrganizeRunnerTests(unittest.TestCase):
def setUp(self):
self.root = Path(tempfile.mkdtemp())
self.input_dir = self.root / 'input'
self.output_dir = self.root / 'output'
self.trash_dir = self.root / 'trash'
self.input_dir.mkdir()
self.output_dir.mkdir()
self.trash_dir.mkdir()
self.task_store = TaskStore(self.root / 'music_workshop.db')
self.runner = OrganizeRunner(self.task_store, TaskStreamManager())
def test_builds_single_disc_album_path(self):
task = self._create_task()
item = self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['organize_status'], 'organized')
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song.flac')
self.assertTrue(Path(item['library_file_path']).exists())
log_types = {
log['event_type']
for log in self.task_store.list_task_logs(task['task_id'], 1, 50)['logs']
}
self.assertIn('organize.path_planned', log_types)
self.assertIn('organize.item_organized', log_types)
def test_places_multi_disc_release_under_disc_folder(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac', disc_number=2, track_number=7)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/Disc 2/07 - Song.flac')
def test_places_missing_album_track_under_singles(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album=None, title='Loose Song', year=2023)
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Singles/2023 - Loose Song/01 - Loose Song.flac')
def test_places_non_ascii_album_artist_under_hash_bucket(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/source.flac', album_artist='周杰伦')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertTrue(item['library_relative_path'].startswith('#/周杰伦/Album/'))
def test_resolves_target_collisions_with_suffix(self):
task = self._create_task()
target = self.output_dir / 'A' / 'Artist' / 'Album' / '01 - Song.flac'
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(b'existing')
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['library_relative_path'], 'A/Artist/Album/01 - Song (2).flac')
self.assertEqual(stats['organize']['collision_resolved'], 1)
def test_moves_failed_item_to_organize_trash(self):
task = self._create_task()
self._insert_organize_item(task['task_id'], 'Artist/Album/source.flac')
stats = create_empty_task_stats()
output_root = self.output_dir.resolve(strict=False)
original_move = self.runner._move_file
def failing_move(source: Path, destination: Path):
if output_root in destination.resolve(strict=False).parents:
raise OSError('blocked')
return original_move(source, destination)
self.runner._move_file = failing_move
self.runner.run(task['task_id'], stats, self._config())
item = self.task_store.list_task_items(task['task_id'], None, 1, 10)['items'][0]
self.assertEqual(item['organize_status'], 'trashed')
self.assertTrue(Path(item['trash_file_path']).exists())
self.assertEqual(stats['organize']['failed_items'], 1)
self.assertEqual(stats['organize']['trashed_items'], 1)
def _create_task(self) -> dict:
return self.task_store.create_task_if_idle(self._config())
def _config(self) -> dict:
return {
'input': str(self.input_dir),
'output': str(self.output_dir),
'trash': str(self.trash_dir)
}
def _insert_organize_item(
self,
task_id: str,
relative_path: str,
*,
title: str = 'Song',
album: str | None = 'Album',
album_artist: str = 'Artist',
track_number: int = 1,
disc_number: int = 1,
year: int = 2024
) -> dict:
path = self.input_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'audio')
return self.task_store.insert_task_item(
task_id,
original_path=str(path.resolve(strict=False)),
current_file_path=str(path.resolve(strict=False)),
relative_path=relative_path,
filename=path.name,
extension=path.suffix.lower(),
size_bytes=path.stat().st_size,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_reason='authoritative_auto_match',
match_message='matched',
dedupe_status='unique',
organize_status='pending',
matched_metadata_json={
'title': title,
'artist': album_artist,
'artists': [album_artist],
'album': album,
'album_artist': album_artist,
'track_number': track_number,
'disc_number': disc_number,
'release_date': f'{year}-01-01',
'year': year,
'duration_seconds': 201,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
}
)
class _NoopPreprocessor:
def probe_audio(self, _file_path: str) -> dict:
return {}
if __name__ == '__main__':
unittest.main()
+385
View File
@@ -0,0 +1,385 @@
import shutil
import tempfile
import unittest
from datetime import datetime, timezone
import os
from pathlib import Path
from backend.app.exception_service import ExceptionService
from backend.app.library_service import LibraryService, LibraryTrackNotFoundError
from backend.app.task_store import TaskStore
class LibraryServiceTests(unittest.TestCase):
def setUp(self):
self.root = Path(tempfile.mkdtemp())
self.output_dir = self.root / 'output'
self.output_dir.mkdir()
self.task_store = TaskStore(self.root / 'music_workshop.db')
self.preprocessor = _FakePreprocessor()
self.service = LibraryService(
self.task_store,
self.preprocessor,
read_tags=self.preprocessor.read_tags
)
def tearDown(self):
shutil.rmtree(self.root)
def test_empty_output_dir_returns_empty_summary_and_tracks(self):
summary = self.service.get_summary(str(self.output_dir))
page = self.service.get_tracks_page(str(self.output_dir))
self.assertEqual(summary['total_tracks'], 0)
self.assertEqual(summary['total_albums'], 0)
self.assertEqual(summary['total_artists'], 0)
self.assertEqual(summary['suspected_duplicates'], 0)
self.assertEqual(page['items'], [])
self.assertEqual(page['total'], 0)
def test_scans_metadata_audio_and_filters_tracks(self):
first_path = self._write_library_file('A/Artist A/Album A/01 - Echoes.flac', _timestamp(2024, 1, 1))
second_path = self._write_library_file('B/Artist B/Album B/03 - Neon.mp3', _timestamp(2024, 1, 2))
self.preprocessor.audio_props[str(first_path)] = {
'format': 'FLAC',
'codec': 'FLAC',
'bitrate': 980000,
'sample_rate': 96000,
'bit_depth': 24,
'channels': 2,
'duration_seconds': 301.4
}
self.preprocessor.tags[str(first_path)] = {
'title': 'Echoes',
'artist': 'Artist A',
'album': 'Album A',
'albumartist': 'Artist A',
'tracknumber': '1',
'discnumber': '1',
'date': '2024-01-01'
}
self.preprocessor.audio_props[str(second_path)] = {
'format': 'MP3',
'codec': 'MP3',
'bitrate': 320000,
'sample_rate': 44100,
'bit_depth': 16,
'channels': 2,
'duration_seconds': 240.1
}
self.preprocessor.tags[str(second_path)] = {
'title': 'Neon',
'artist': 'Artist B',
'album': 'Album B',
'albumartist': 'Artist B',
'tracknumber': '3',
'discnumber': '1',
'date': '2023-12-01'
}
task = self._create_completed_task()
self._insert_provenance_item(
task['task_id'],
library_file_path=str(first_path),
library_relative_path='A/Artist A/Album A/01 - Echoes.flac',
updated_at='2024-01-03T09:00:00Z',
match_source='musicbrainz',
match_confidence=94.5,
dedupe_status='unique'
)
page = self.service.get_tracks_page(
str(self.output_dir),
q='echo',
artist='Artist A',
album='Album A',
format='flac',
has_provenance=True
)
self.assertEqual(page['total'], 1)
track = page['items'][0]
self.assertEqual(track['filename'], '01 - Echoes.flac')
self.assertEqual(track['title'], 'Echoes')
self.assertEqual(track['artist'], 'Artist A')
self.assertEqual(track['album'], 'Album A')
self.assertEqual(track['format'], 'FLAC')
self.assertEqual(track['codec'], 'FLAC')
self.assertEqual(track['bit_depth'], 24)
self.assertEqual(track['sample_rate'], 96000)
self.assertEqual(track['ingest_provenance']['task_id'], task['task_id'])
self.assertEqual(track['ingest_provenance']['match_source'], 'musicbrainz')
self.assertEqual(track['ingest_provenance']['dedupe_status'], 'unique')
def test_default_sort_prefers_organized_at_and_falls_back_to_modified_at(self):
newest_path = self._write_library_file('N/Newest/Album/01 - Fresh.flac', _timestamp(2024, 1, 3))
organized_path = self._write_library_file('O/Organized/Album/01 - Sorted.flac', _timestamp(2024, 1, 1))
oldest_path = self._write_library_file('Z/Oldest/Album/01 - Archive.flac', _timestamp(2023, 12, 31))
for path, title, artist in (
(newest_path, 'Fresh', 'Newest'),
(organized_path, 'Sorted', 'Organized'),
(oldest_path, 'Archive', 'Oldest')
):
self.preprocessor.audio_props[str(path)] = {'format': 'FLAC', 'codec': 'FLAC'}
self.preprocessor.tags[str(path)] = {
'title': title,
'artist': artist,
'album': 'Album',
'albumartist': artist,
'tracknumber': '1',
'discnumber': '1'
}
task = self._create_completed_task()
self._insert_provenance_item(
task['task_id'],
library_file_path=str(organized_path),
library_relative_path='O/Organized/Album/01 - Sorted.flac',
updated_at='2024-01-02T12:00:00Z'
)
page = self.service.get_tracks_page(str(self.output_dir))
ordered_titles = [item['title'] for item in page['items']]
self.assertEqual(ordered_titles, ['Fresh', 'Sorted', 'Archive'])
def test_provenance_prefers_absolute_path_then_falls_back_to_relative_path(self):
exact_path = self._write_library_file('A/Artist/Album/01 - Exact.flac', _timestamp(2024, 1, 1))
fallback_path = self._write_library_file('B/Artist/Album/02 - Fallback.flac', _timestamp(2024, 1, 1))
for path, title in ((exact_path, 'Exact'), (fallback_path, 'Fallback')):
self.preprocessor.audio_props[str(path)] = {'format': 'FLAC', 'codec': 'FLAC'}
self.preprocessor.tags[str(path)] = {
'title': title,
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1'
}
old_task = self._create_completed_task()
self._insert_provenance_item(
old_task['task_id'],
library_file_path=str(exact_path),
library_relative_path='A/Artist/Album/01 - Exact.flac',
updated_at='2024-01-01T08:00:00Z',
match_source='exact-source'
)
newer_task = self._create_completed_task()
self._insert_provenance_item(
newer_task['task_id'],
library_file_path='/legacy/output/A/Artist/Album/01 - Exact.flac',
library_relative_path='A/Artist/Album/01 - Exact.flac',
updated_at='2024-01-03T08:00:00Z',
match_source='relative-source'
)
fallback_task = self._create_completed_task()
self._insert_provenance_item(
fallback_task['task_id'],
library_file_path='/legacy/output/B/Artist/Album/02 - Fallback.flac',
library_relative_path='B/Artist/Album/02 - Fallback.flac',
updated_at='2024-01-04T08:00:00Z',
match_source='fallback-source'
)
page = self.service.get_tracks_page(str(self.output_dir), sort_by='filename', sort_order='asc')
exact_track = next(item for item in page['items'] if item['title'] == 'Exact')
fallback_track = next(item for item in page['items'] if item['title'] == 'Fallback')
self.assertEqual(exact_track['ingest_provenance']['match_source'], 'exact-source')
self.assertEqual(fallback_track['ingest_provenance']['match_source'], 'fallback-source')
def test_summary_counts_suspected_duplicates_without_false_live_match(self):
duplicate_one = self._write_library_file('A/Artist/Album/01 - Song.flac', _timestamp(2024, 1, 1))
duplicate_two = self._write_library_file('A/Artist/Album/01 - Song Copy.flac', _timestamp(2024, 1, 2))
studio = self._write_library_file('S/Artist/Singles/2024 - Ballad/01 - Ballad.flac', _timestamp(2024, 1, 3))
live = self._write_library_file('S/Artist/Singles/2024 - Ballad Live/01 - Ballad Live.flac', _timestamp(2024, 1, 4))
for path in (duplicate_one, duplicate_two, studio, live):
self.preprocessor.audio_props[str(path)] = {
'format': 'FLAC',
'codec': 'FLAC',
'duration_seconds': 201
}
self.preprocessor.tags[str(duplicate_one)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1'
}
self.preprocessor.tags[str(duplicate_two)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1',
'discnumber': '1',
'musicbrainzrecordingid': 'recording-1'
}
self.preprocessor.tags[str(studio)] = {
'title': 'Ballad',
'artist': 'Artist',
'albumartist': 'Artist'
}
self.preprocessor.tags[str(live)] = {
'title': 'Ballad (Live)',
'artist': 'Artist',
'albumartist': 'Artist'
}
summary = self.service.get_summary(str(self.output_dir))
self.assertEqual(summary['total_tracks'], 4)
self.assertEqual(summary['suspected_duplicates'], 1)
def test_move_track_to_exception_moves_file_and_creates_match_failed_item(self):
trash_dir = self.root / 'trash'
library_path = self._write_library_file('A/Artist/Album/01 - Song.flac', _timestamp(2024, 1, 5))
self.preprocessor.audio_props[str(library_path)] = {
'format': 'FLAC',
'codec': 'FLAC',
'duration_seconds': 180.25
}
self.preprocessor.tags[str(library_path)] = {
'title': 'Song',
'artist': 'Artist',
'album': 'Album',
'albumartist': 'Artist',
'tracknumber': '1'
}
self.preprocessor.fingerprints[str(library_path)] = {
'fingerprint': 'abc123',
'duration_seconds': 180.0
}
track = self.service.get_tracks_page(str(self.output_dir))['items'][0]
response = self.service.move_track_to_exception(
{
'input': str(self.root / 'input'),
'output': str(self.output_dir),
'trash': str(trash_dir)
},
track['track_id']
)
trash_path = Path(response['trash_file_path'])
self.assertFalse(library_path.exists())
self.assertTrue(trash_path.exists())
self.assertEqual(response['library_relative_path'], 'A/Artist/Album/01 - Song.flac')
self.assertEqual(self.service.get_tracks_page(str(self.output_dir))['total'], 0)
exception_service = ExceptionService(self.task_store)
page = exception_service.get_items('match_failed')
self.assertEqual(page['total'], 1)
exception_item = page['items'][0]
self.assertEqual(exception_item['exception_id'], response['exception_id'])
self.assertEqual(exception_item['exception_type'], 'match_failed')
self.assertEqual(exception_item['exception_reason_code'], 'manual_library_requeue')
self.assertEqual(exception_item['trash_file_path'], str(trash_path))
self.assertEqual(exception_item['library_file_path'], str(library_path))
self.assertIn('retry_match', exception_item['available_actions'])
source_item = self.task_store.get_exception_source_item(response['exception_id'])
self.assertEqual(source_item['current_file_path'], str(trash_path))
self.assertEqual(source_item['original_tags_json']['title'], 'Song')
self.assertEqual(source_item['audio_props_json']['codec'], 'FLAC')
self.assertEqual(source_item['acoustic_fingerprint'], 'abc123')
def test_move_track_to_exception_rejects_unknown_track_id(self):
with self.assertRaises(LibraryTrackNotFoundError):
self.service.move_track_to_exception(
{
'input': str(self.root / 'input'),
'output': str(self.output_dir),
'trash': str(self.root / 'trash')
},
'unknown-track-id'
)
def _write_library_file(self, relative_path: str, modified_at_timestamp: int) -> Path:
path = self.output_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(b'audio-data')
os.utime(path, (float(modified_at_timestamp), float(modified_at_timestamp)))
return path
def _create_completed_task(self) -> dict:
task = self.task_store.create_task_if_idle(
{'input': '', 'output': str(self.output_dir), 'trash': ''}
)
self.task_store.update_task(
task['task_id'],
status='completed',
completed_at='2024-01-01T00:00:00Z'
)
return task
def _insert_provenance_item(
self,
task_id: str,
*,
library_file_path: str,
library_relative_path: str,
updated_at: str,
match_source: str | None = None,
match_confidence: float | None = None,
dedupe_status: str = 'unique'
):
item = self.task_store.insert_task_item(
task_id,
original_path=library_file_path,
current_file_path=library_file_path,
relative_path=library_relative_path,
filename=Path(library_file_path).name,
extension=Path(library_file_path).suffix.lower(),
size_bytes=123,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_source=match_source,
match_confidence=match_confidence,
dedupe_status=dedupe_status,
organize_status='organized',
library_relative_path=library_relative_path,
library_file_path=library_file_path
)
with self.task_store._connect() as connection:
connection.execute(
'UPDATE task_items SET updated_at = ? WHERE id = ?',
(updated_at, item['id'])
)
connection.commit()
class _FakePreprocessor:
def __init__(self):
self.audio_props: dict[str, dict] = {}
self.tags: dict[str, dict] = {}
self.fingerprints: dict[str, dict] = {}
def probe_audio(self, file_path: str) -> dict:
return self.audio_props.get(file_path, {})
def read_tags(self, file_path: str) -> dict:
return self.tags.get(file_path, {})
def calculate_fingerprint(self, file_path: str) -> dict:
return self.fingerprints.get(file_path, {})
def _timestamp(year: int, month: int, day: int) -> int:
return int(datetime(year, month, day, tzinfo=timezone.utc).timestamp())
if __name__ == '__main__':
unittest.main()
+186
View File
@@ -0,0 +1,186 @@
import json
import time
import unittest
from unittest.mock import patch
from urllib import error
from backend.app.matcher import MatchHttpClient, MusicBrainzProvider, SpotifyProvider
class MatchProviderTests(unittest.TestCase):
def test_match_http_client_retries_url_errors(self):
client = MatchHttpClient()
with patch(
'backend.app.matcher.request.urlopen',
side_effect=[
error.URLError('temporary dns error'),
FakeResponse({'ok': True})
]
) as mock_urlopen:
payload = client.request_json('test', 'https://example.com', retries=1)
self.assertEqual(payload['ok'], True)
self.assertEqual(mock_urlopen.call_count, 2)
def test_match_http_client_retries_timeout(self):
client = MatchHttpClient()
with patch(
'backend.app.matcher.request.urlopen',
side_effect=[
TimeoutError('timeout'),
FakeResponse({'ok': True})
]
) as mock_urlopen:
payload = client.request_json('test', 'https://example.com', retries=1)
self.assertEqual(payload['ok'], True)
self.assertEqual(mock_urlopen.call_count, 2)
def test_musicbrainz_requests_use_user_agent_and_throttle(self):
client = MatchHttpClient()
provider = MusicBrainzProvider(client)
observed_headers = []
def fake_urlopen(req, timeout):
observed_headers.append(dict(req.header_items()))
return FakeResponse({'recordings': []})
with patch('backend.app.matcher.request.urlopen', side_effect=fake_urlopen) as mock_urlopen:
with patch('backend.app.matcher.time.sleep') as mock_sleep:
with patch(
'backend.app.matcher.time.monotonic',
side_effect=[0.0, 0.0, 0.1, 0.1]
):
provider._request_json(
'musicbrainz',
'https://musicbrainz.org/ws/2/recording',
params={'fmt': 'json'}
)
provider._request_json(
'musicbrainz',
'https://musicbrainz.org/ws/2/recording',
params={'fmt': 'json'}
)
self.assertEqual(mock_urlopen.call_count, 2)
self.assertTrue(any('User-agent' in headers or 'User-Agent' in headers for headers in observed_headers))
self.assertTrue(mock_sleep.called)
def test_spotify_provider_refreshes_expired_token(self):
provider = SpotifyProvider(MatchHttpClient())
config = {
'metadata': {
'spotifyUrl': 'https://api.spotify.com/v1',
'spotifyClientId': 'spotify-id',
'spotifySecret': 'spotify-secret'
}
}
observed_authorization = []
token_counter = {'value': 0}
def fake_urlopen(req, timeout):
url = req.full_url
if 'api/token' in url:
token_counter['value'] += 1
return FakeResponse(
{
'access_token': f'token-{token_counter["value"]}',
'expires_in': 3600
}
)
observed_authorization.append(req.headers.get('Authorization'))
return FakeResponse(
{
'tracks': {
'items': [
{
'id': 'track-1',
'name': 'Song Title',
'artists': [{'name': 'Song Artist'}],
'album': {
'id': 'album-1',
'name': 'Album Name',
'release_date': '2024-01-01',
'images': []
},
'track_number': 1,
'disc_number': 1,
'duration_ms': 201000
}
]
}
}
)
with patch('backend.app.matcher.request.urlopen', side_effect=fake_urlopen):
provider.search(
{
'title': 'Song Title',
'artist': 'Song Artist',
'album': 'Album Name'
},
config
)
cache_key = 'spotify-id:spotify-secret'
provider._token_cache[cache_key]['expires_at'] = time.time() - 1
provider.search(
{
'title': 'Song Title',
'artist': 'Song Artist',
'album': 'Album Name'
},
config
)
self.assertEqual(token_counter['value'], 2)
self.assertEqual(observed_authorization, ['Bearer token-1', 'Bearer token-2'])
def test_spotify_provider_skips_when_credentials_are_missing(self):
provider = SpotifyProvider(MatchHttpClient())
config = {
'metadata': {
'spotifyUrl': 'https://api.spotify.com/v1',
'spotifyClientId': '',
'spotifySecret': ''
}
}
with patch('backend.app.matcher.request.urlopen') as mock_urlopen:
candidates = provider.search(
{
'title': 'Song Title',
'artist': 'Song Artist',
'album': 'Album Name'
},
config
)
self.assertEqual(candidates, [])
self.assertEqual(mock_urlopen.call_count, 0)
class FakeHeaders:
def get_content_charset(self):
return 'utf-8'
class FakeResponse:
def __init__(self, payload):
self._payload = json.dumps(payload).encode('utf-8')
self.headers = FakeHeaders()
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
return self._payload
if __name__ == '__main__':
unittest.main()
+461
View File
@@ -0,0 +1,461 @@
import copy
import unittest
from backend.app.matcher import MatchProviderError, Matcher
DEFAULT_CONFIG = {
'advancedStrategy': {
'metadataFallback': True,
'downloadAssets': False
},
'metadata': {
'acoustidUrl': 'https://api.acoustid.org/v2',
'acoustidClientKey': 'client-key',
'musicbrainz': 'https://musicbrainz.org/ws/2/',
'netease': 'http://localhost:3000',
'qq': 'http://localhost:3300',
'spotifyUrl': 'https://api.spotify.com/v1',
'spotifyClientId': 'spotify-id',
'spotifySecret': 'spotify-secret',
'discogsUrl': 'https://api.discogs.com',
'discogsToken': '',
'lastfmUrl': 'https://ws.audioscrobbler.com/2.0/',
'lastfmKey': '',
'geniusUrl': 'https://api.genius.com',
'geniusToken': ''
}
}
class MatcherTests(unittest.TestCase):
def test_matches_authoritative_acoustid_candidate(self):
item = build_item()
candidate = build_candidate(
provider='acoustid',
is_authoritative=True,
fingerprint_confidence=0.98
)
matcher = build_matcher(
acoustid_candidates=[candidate]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'matched')
self.assertEqual(result['source'], 'acoustid')
self.assertTrue(result['is_authoritative'])
self.assertGreaterEqual(result['confidence'], 85)
def test_matches_musicbrainz_text_candidate_without_fingerprint(self):
item = build_item()
candidate = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.92
)
matcher = build_matcher(
musicbrainz_candidates=[candidate]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'matched')
self.assertEqual(result['source'], 'musicbrainz')
self.assertTrue(result['is_authoritative'])
self.assertEqual(result['matched_metadata_json']['release_id'], 'release-main')
def test_matches_fallback_candidate_when_authoritative_missing(self):
item = build_item()
candidate = build_candidate(
provider='spotify',
is_authoritative=False,
search_confidence=0.9,
source_ids={
'spotify_track_id': 'track-1',
'spotify_album_id': 'album-1'
}
)
matcher = build_matcher(
spotify_candidates=[candidate]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'matched_fallback')
self.assertEqual(result['source'], 'spotify')
self.assertFalse(result['is_authoritative'])
def test_respects_repair_provider_scope(self):
item = build_item()
authoritative = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.95
)
fallback = build_candidate(
provider='spotify',
is_authoritative=False,
search_confidence=0.9,
source_ids={
'spotify_track_id': 'track-1',
'spotify_album_id': 'album-1'
}
)
matcher = build_matcher(
musicbrainz_candidates=[authoritative],
spotify_candidates=[fallback]
)
scoped_config = copy.deepcopy(DEFAULT_CONFIG)
scoped_config['repair_provider_scope'] = ['spotify']
result = matcher.match_item(item, [item], scoped_config)
self.assertEqual(result['status'], 'matched_fallback')
self.assertEqual(result['source'], 'spotify')
self.assertFalse(result['is_authoritative'])
def test_returns_low_score_when_gap_is_too_small(self):
item = build_item()
first = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.95,
release_id='release-a'
)
second = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.94,
release_id='release-b',
source_ids={
'musicbrainz_recording_id': 'recording-b',
'musicbrainz_release_id': 'release-b',
'musicbrainz_release_group_id': 'release-group-b'
}
)
matcher = build_matcher(
musicbrainz_candidates=[first, second]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'low_score')
self.assertEqual(result['reason'], 'score_gap_too_small')
self.assertEqual(len(result['match_candidates_json']), 2)
def test_returns_not_found_when_no_candidates_exist(self):
item = build_item()
matcher = build_matcher()
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'not_found')
self.assertIsNone(result['matched_metadata_json'])
self.assertEqual(result['match_candidates_json'], [])
def test_skips_acoustid_error_and_matches_musicbrainz_text_candidate(self):
item = build_item()
candidate = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.92
)
matcher = build_matcher(
acoustid_error=MatchProviderError('acoustid', 'acoustid failed'),
musicbrainz_candidates=[candidate]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'matched')
self.assertEqual(result['source'], 'musicbrainz')
self.assertEqual(len(result['provider_warnings']), 1)
self.assertEqual(result['provider_warnings'][0]['provider'], 'acoustid')
def test_skips_musicbrainz_error_and_uses_fallback_candidate(self):
item = build_item()
candidate = build_candidate(
provider='spotify',
is_authoritative=False,
search_confidence=0.9,
source_ids={
'spotify_track_id': 'track-1',
'spotify_album_id': 'album-1'
}
)
matcher = build_matcher(
musicbrainz_error=MatchProviderError('musicbrainz', 'musicbrainz failed'),
spotify_candidates=[candidate]
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'matched_fallback')
self.assertEqual(result['source'], 'spotify')
self.assertEqual(
[warning['provider'] for warning in result['provider_warnings']],
['musicbrainz', 'musicbrainz']
)
def test_returns_not_found_when_all_providers_fail(self):
item = build_item()
matcher = build_matcher(
acoustid_error=MatchProviderError('acoustid', 'acoustid failed'),
musicbrainz_error=MatchProviderError('musicbrainz', 'musicbrainz failed'),
netease_error=MatchProviderError('netease', 'netease failed'),
qq_error=MatchProviderError('qq', 'qq failed'),
spotify_error=MatchProviderError('spotify', 'spotify failed')
)
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
self.assertEqual(result['status'], 'not_found')
self.assertEqual(
[warning['provider'] for warning in result['provider_warnings']],
['acoustid', 'musicbrainz', 'netease', 'qq', 'spotify']
)
def test_album_context_converges_to_single_release(self):
item_one = build_item(title='Song Title', track_number=1, duration_seconds=201)
item_two = build_item(
title='Song Title',
track_number=2,
duration_seconds=233,
relative_path='Artist/Album/02.flac',
filename='02.flac'
)
group = [item_one, item_two]
def dynamic_musicbrainz_candidates(item_metadata, _config, **_kwargs):
track_number = item_metadata.get('track_number')
duration_seconds = item_metadata.get('duration_seconds')
release_a = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.91,
track_number=track_number,
duration_seconds=duration_seconds,
recording_id=f'recording-a-{track_number}',
release_id='release-a',
release_group_id='group-a',
release_tracklist=[
{
'title': 'Song Title',
'track_number': 1,
'disc_number': 1,
'duration_seconds': 201
},
{
'title': 'Song Title',
'track_number': 2,
'disc_number': 1,
'duration_seconds': 233
}
],
source_ids={
'musicbrainz_recording_id': f'recording-a-{track_number}',
'musicbrainz_release_id': 'release-a',
'musicbrainz_release_group_id': 'group-a'
}
)
release_b = build_candidate(
provider='musicbrainz',
is_authoritative=True,
search_confidence=0.91,
track_number=track_number,
duration_seconds=duration_seconds,
recording_id=f'recording-b-{track_number}',
release_id='release-b',
release_group_id='group-b',
release_tracklist=[
{
'title': 'Track Zero',
'track_number': 1,
'disc_number': 1,
'duration_seconds': 120
},
{
'title': 'Track Extra',
'track_number': 2,
'disc_number': 1,
'duration_seconds': 310
}
],
source_ids={
'musicbrainz_recording_id': f'recording-b-{track_number}',
'musicbrainz_release_id': 'release-b',
'musicbrainz_release_group_id': 'group-b'
}
)
return [release_a, release_b]
matcher = build_matcher(
musicbrainz_candidates=dynamic_musicbrainz_candidates
)
result_one = matcher.match_item(item_one, group, DEFAULT_CONFIG)
result_two = matcher.match_item(item_two, group, DEFAULT_CONFIG)
self.assertEqual(result_one['status'], 'matched')
self.assertEqual(result_two['status'], 'matched')
self.assertEqual(result_one['matched_metadata_json']['release_id'], 'release-a')
self.assertEqual(result_two['matched_metadata_json']['release_id'], 'release-a')
def build_matcher(
*,
acoustid_candidates=None,
musicbrainz_candidates=None,
aligned_candidate=None,
netease_candidates=None,
qq_candidates=None,
spotify_candidates=None,
acoustid_error=None,
musicbrainz_error=None,
netease_error=None,
qq_error=None,
spotify_error=None
):
return Matcher(
acoustid_provider=StaticSearchProvider(acoustid_candidates, error=acoustid_error),
musicbrainz_provider=StaticMusicBrainzProvider(
musicbrainz_candidates,
aligned_candidate=aligned_candidate,
error=musicbrainz_error
),
netease_provider=StaticSearchProvider(netease_candidates, error=netease_error),
qq_provider=StaticSearchProvider(qq_candidates, error=qq_error),
spotify_provider=StaticSearchProvider(spotify_candidates, error=spotify_error),
discogs_provider=StaticEnrichmentProvider(),
lastfm_provider=StaticEnrichmentProvider(),
genius_provider=StaticEnrichmentProvider()
)
def build_item(
*,
title='Song Title',
artist='Song Artist',
album='Album Name',
track_number=1,
disc_number=1,
duration_seconds=201,
relative_path='Artist/Album/01.flac',
filename='01.flac'
):
return {
'id': 1,
'task_id': 'task-1',
'original_path': f'/tmp/{filename}',
'current_file_path': f'/tmp/{filename}',
'relative_path': relative_path,
'filename': filename,
'original_tags_json': {
'title': title,
'artist': artist,
'album': album,
'album_artist': artist,
'track_number': str(track_number),
'disc_number': str(disc_number)
},
'audio_props_json': {
'duration_seconds': duration_seconds
},
'acoustic_fingerprint': 'fingerprint',
'fingerprint_duration_seconds': duration_seconds,
'scan_status': 'queued',
'preprocess_status': 'completed'
}
def build_candidate(
*,
provider,
is_authoritative,
title='Song Title',
artist='Song Artist',
album='Album Name',
track_number=1,
disc_number=1,
duration_seconds=201,
recording_id='recording-main',
release_id='release-main',
release_group_id='release-group-main',
fingerprint_confidence=None,
search_confidence=None,
release_tracklist=None,
source_ids=None
):
return {
'provider': provider,
'is_authoritative': is_authoritative,
'title': title,
'artist': artist,
'artists': [artist],
'album': album,
'album_artist': artist,
'track_number': track_number,
'disc_number': disc_number,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': duration_seconds,
'recording_id': recording_id,
'release_id': release_id,
'release_group_id': release_group_id,
'source_ids': source_ids or {
'musicbrainz_recording_id': recording_id,
'musicbrainz_release_id': release_id,
'musicbrainz_release_group_id': release_group_id
},
'fingerprint_confidence': fingerprint_confidence,
'search_confidence': search_confidence,
'release_tracklist': release_tracklist or [
{
'title': title,
'track_number': track_number,
'disc_number': disc_number,
'duration_seconds': duration_seconds
}
]
}
class StaticSearchProvider:
def __init__(self, candidates=None, *, error=None):
self.candidates = candidates or []
self.error = error
def search(self, *args, **kwargs):
if self.error:
raise self.error
if callable(self.candidates):
return copy.deepcopy(self.candidates(*args, **kwargs))
return copy.deepcopy(self.candidates)
class StaticMusicBrainzProvider:
def __init__(self, candidates=None, *, aligned_candidate=None, error=None):
self.candidates = candidates or []
self.aligned_candidate = aligned_candidate
self.error = error
def search_text(self, *args, **kwargs):
if self.error:
raise self.error
if callable(self.candidates):
return copy.deepcopy(self.candidates(*args, **kwargs))
return copy.deepcopy(self.candidates)
def align_candidate(self, *args, **kwargs):
if self.error:
raise self.error
return copy.deepcopy(self.aligned_candidate)
class StaticEnrichmentProvider:
def enrich(self, *args, **kwargs):
return None
if __name__ == '__main__':
unittest.main()
@@ -0,0 +1,95 @@
import os
import tempfile
import unittest
from pathlib import Path
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_metadata_normalization_{next(tempfile._get_candidate_names())}.db'
)
from backend.app.metadata_normalization import MetadataNormalizationService, can_ingest_metadata, parse_artist_string
from backend.app.task_store import TaskStore
class MetadataNormalizationTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.task_store = TaskStore(self.db_path)
self.task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
self.service = MetadataNormalizationService(self.task_store)
def test_parse_artist_string_supports_common_delimiters(self):
self.assertEqual(parse_artist_string('A / B')['tokens'], ['A', 'B'])
self.assertEqual(parse_artist_string('A; B')['tokens'], ['A', 'B'])
self.assertEqual(parse_artist_string('A & B')['tokens'], ['A', 'B'])
self.assertEqual(parse_artist_string('A feat. B')['tokens'], ['A', 'B'])
self.assertEqual(parse_artist_string('A、B')['tokens'], ['A', 'B'])
def test_single_artist_album_derives_album_artist(self):
item = self._insert_item('track-01.flac', {'title': 'Song 1', 'artist': 'Artist A', 'album': 'Album X'})
self._insert_item('track-02.flac', {'title': 'Song 2', 'artist': 'Artist A', 'album': 'Album X'})
normalized = self.service.normalize_item(item)
self.assertEqual(normalized['album_artist'], 'Artist A')
self.assertEqual(normalized['normalization_strategy'], 'single_artist')
self.assertTrue(can_ingest_metadata({**normalized, 'title': 'Song 1'}))
def test_feat_album_uses_dominant_primary_artist(self):
item = self._insert_item('track-01.flac', {'title': 'Song 1', 'artist': 'Artist A feat. Guest', 'album': 'Album X'})
self._insert_item('track-02.flac', {'title': 'Song 2', 'artist': 'Artist A', 'album': 'Album X'})
self._insert_item('track-03.flac', {'title': 'Song 3', 'artist': 'Artist A & Another', 'album': 'Album X'})
normalized = self.service.normalize_item(item)
self.assertEqual(normalized['album_artist'], 'Artist A')
self.assertEqual(normalized['normalization_strategy'], 'main_artist_feat')
def test_compilation_album_sets_various_artists(self):
item = self._insert_item('track-01.flac', {'title': 'Song 1', 'artist': 'Artist A', 'album': 'Top Hits 2025'})
self._insert_item('track-02.flac', {'title': 'Song 2', 'artist': 'Artist B', 'album': 'Top Hits 2025'})
self._insert_item('track-03.flac', {'title': 'Song 3', 'artist': 'Artist C', 'album': 'Top Hits 2025'})
normalized = self.service.normalize_item(item)
self.assertEqual(normalized['album_artist'], 'Various Artists')
self.assertEqual(normalized['compilation'], 1)
self.assertEqual(normalized['normalization_strategy'], 'compilation')
def test_existing_album_artist_is_preserved(self):
item = self._insert_item(
'track-01.flac',
{'title': 'Song 1', 'artist': '阿信', 'album': 'Solo Album', 'album_artist': '五月天'}
)
normalized = self.service.normalize_item(item)
self.assertEqual(normalized['album_artist'], '五月天')
self.assertEqual(normalized['normalization_strategy'], 'source_preserved')
def _insert_item(self, filename: str, matched_metadata_json: dict):
return self.task_store.insert_task_item(
self.task['task_id'],
original_path=f'/tmp/input/{filename}',
current_file_path=f'/tmp/input/{filename}',
relative_path=f'Artist/Album/{filename}',
filename=filename,
extension='.flac',
size_bytes=123456,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
match_status='matched_fallback',
matched_metadata_json=matched_metadata_json
)
if __name__ == '__main__':
unittest.main()
+75
View File
@@ -0,0 +1,75 @@
import os
import tempfile
import unittest
from pathlib import Path
from backend.app.scanner import probe_local_assets
class ScannerAssetProbeTests(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
self.library_dir = Path(self.temp_dir.name)
def tearDown(self):
self.temp_dir.cleanup()
def test_probe_local_assets_supports_cover_jpeg(self):
audio_path = self._create_file('album/track.flac')
cover_path = self._create_file('album/cover.jpeg')
assets = probe_local_assets(audio_path)
self.assertEqual(assets['local_cover'], str(cover_path.resolve()))
self.assertIsNone(assets['local_lyric'])
def test_probe_local_assets_supports_folder_webp(self):
audio_path = self._create_file('album/track.flac')
cover_path = self._create_file('album/folder.webp')
assets = probe_local_assets(audio_path)
self.assertEqual(assets['local_cover'], str(cover_path.resolve()))
def test_probe_local_assets_matches_track_name_case_insensitively(self):
audio_path = self._create_file('album/Track.FLAC')
cover_path = self._create_file('album/track.JPEG')
lyric_path = self._create_file('album/TRACK.LRC', '[00:00.00] lyric')
assets = probe_local_assets(audio_path)
self.assertEqual(assets['local_cover'], str(cover_path.resolve()))
self.assertEqual(assets['local_lyric'], str(lyric_path.resolve()))
def test_probe_local_assets_respects_cover_priority(self):
audio_path = self._create_file('album/song.flac')
self._create_file('album/song.jpg')
self._create_file('album/folder.jpg')
preferred_cover = self._create_file('album/cover.png')
self._create_file('album/cover.webp')
assets = probe_local_assets(audio_path)
self.assertEqual(assets['local_cover'], str(preferred_cover.resolve()))
def test_probe_local_assets_ignores_symlink_covers(self):
audio_path = self._create_file('album/song.flac')
fallback_cover = self._create_file('album/folder.webp')
target_path = self._create_file('targets/real-cover.jpeg')
symlink_path = self.library_dir / 'album' / 'cover.jpeg'
symlink_path.parent.mkdir(parents=True, exist_ok=True)
os.symlink(target_path, symlink_path)
assets = probe_local_assets(audio_path)
self.assertEqual(assets['local_cover'], str(fallback_cover.resolve()))
def _create_file(self, relative_path: str, content: str = '') -> Path:
path = self.library_dir / relative_path
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding='utf-8')
return path
if __name__ == '__main__':
unittest.main()
+122
View File
@@ -0,0 +1,122 @@
import os
import tempfile
import time
import unittest
from pathlib import Path
from backend.app.scanner import Scanner
from backend.app.task_store import TaskConflictError, TaskStore
class ScannerTaskStoreTests(unittest.TestCase):
def test_scanner_queues_stable_audio_and_binds_local_assets(self):
items = []
root = Path(tempfile.mkdtemp())
album_dir = root / 'Artist' / 'Album'
album_dir.mkdir(parents=True)
audio_file = album_dir / '01.flac'
cover_file = album_dir / 'cover.jpeg'
lyric_file = album_dir / '01.lrc'
audio_file.write_bytes(b'abc')
cover_file.write_bytes(b'img')
lyric_file.write_text('[00:00.00] lyric', encoding='utf-8')
stable_timestamp = time.time() - 120
os.utime(audio_file, (stable_timestamp, stable_timestamp))
stats = Scanner().scan(str(root), on_item=lambda item: items.append(item.to_dict()))
self.assertEqual(stats['queued'], 1)
self.assertEqual(items[0]['scan_status'], 'queued')
self.assertTrue(items[0]['local_cover'].endswith('cover.jpeg'))
self.assertTrue(items[0]['local_lyric'].endswith('01.lrc'))
def test_scanner_skips_recently_modified_audio(self):
items = []
root = Path(tempfile.mkdtemp())
album_dir = root / 'Artist'
album_dir.mkdir(parents=True)
audio_file = album_dir / 'new.flac'
audio_file.write_bytes(b'abc')
stats = Scanner().scan(str(root), on_item=lambda item: items.append(item.to_dict()))
self.assertEqual(stats['skipped_locked'], 1)
self.assertEqual(items[0]['scan_status'], 'skipped_locked')
self.assertEqual(items[0]['scan_reason'], 'recent_mtime')
def test_task_store_repairs_stale_tasks_on_startup(self):
db_path = Path(tempfile.mkdtemp()) / 'scanner_tasks.db'
store = TaskStore(db_path)
task = store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
store.update_task(
task['task_id'],
status='running',
current_stage='scan',
stage_states={
'scan': 'running',
'preprocess': 'pending',
'match': 'pending',
'dedupe': 'pending',
'organize': 'pending',
'complete': 'pending'
},
stats={
'scan': {
'total_found': 10,
'queued': 8,
'skipped_locked': 1,
'skipped_invalid': 1,
'ignored_non_audio': 5
},
'preprocess': {
'input_items': 0,
'output_items': 0,
'split_parents': 0,
'generated_children': 0,
'converted_items': 0,
'metadata_snapshots': 0,
'fingerprints_ok': 0,
'fingerprints_failed': 0,
'failed_items': 0,
'warning_items': 0
}
}
)
repaired_task_ids = store.fail_stale_active_tasks()
repaired_task = store.get_task(task['task_id'])
self.assertIn(task['task_id'], repaired_task_ids)
self.assertEqual(repaired_task['status'], 'failed')
self.assertEqual(repaired_task['error_message'], 'Service restarted unexpectedly')
def test_task_store_isolates_active_ingest_and_repair_tasks(self):
db_path = Path(tempfile.mkdtemp()) / 'scanner_tasks_repair.db'
store = TaskStore(db_path)
ingest = store.create_task_if_idle(
{'input': '/tmp/input', 'output': '/tmp/output', 'trash': '/tmp/trash'}
)
repair = store.create_task_if_idle(
{'input': '/tmp/input', 'output': '/tmp/output', 'trash': '/tmp/trash'},
task_type='repair'
)
self.assertEqual(store.get_active_task()['task_id'], ingest['task_id'])
self.assertEqual(store.get_active_task('repair')['task_id'], repair['task_id'])
with self.assertRaises(TaskConflictError):
store.create_task_if_idle(
{'input': '/tmp/input', 'output': '/tmp/output', 'trash': '/tmp/trash'},
task_type='repair'
)
if __name__ == '__main__':
unittest.main()
+394
View File
@@ -0,0 +1,394 @@
import os
import tempfile
import unittest
from pathlib import Path
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_task_api_{next(tempfile._get_candidate_names())}.db'
)
from backend.app.task_store import TaskStore
try:
from backend.app.schemas import TaskHistoryListResponse
import backend.app.main as main_module
except ModuleNotFoundError as error:
main_module = None
TaskHistoryListResponse = None
FASTAPI_IMPORT_ERROR = error
else:
FASTAPI_IMPORT_ERROR = None
class TaskStoreTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.task_store = TaskStore(self.db_path)
self._item_index = 0
def test_get_task_items_filters_new_status_fields_and_serializes_postprocess_fields(self):
task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
self.task_store.insert_task_item(
task['task_id'],
original_path='/tmp/source-1.flac',
current_file_path='/tmp/source-1.flac',
relative_path='Artist/Album/01.flac',
filename='01.flac',
extension='.flac',
size_bytes=123,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
match_reason='authoritative_auto_match',
match_message='matched',
match_source='musicbrainz',
match_confidence=92.5,
match_is_authoritative=1,
matched_metadata_json={
'title': 'Song Title',
'artist': 'Song Artist',
'artists': ['Song Artist'],
'album': 'Album Name',
'album_artist': 'Song Artist',
'track_number': 1,
'disc_number': 1,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': 201,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
},
match_candidates_json=[
{
'provider': 'musicbrainz',
'score': 92.5,
'score_breakdown': {'title': 20},
'is_authoritative': True,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
}
],
match_enrichment_json={
'cover': {'selected_source': None, 'candidates': []},
'lyrics': {'selected_source': None, 'candidates': []},
'genres': {'selected_source': None, 'candidates': []},
'tags': {'selected_source': None, 'candidates': []}
},
dedupe_status='unique',
dedupe_reason=None,
dedupe_message='kept',
dedupe_group_key='recording-1',
dedupe_decision_json={
'comparison_scope': 'library',
'identity_basis': 'recording_id',
'kept_side': 'batch'
},
organize_status='organized',
organize_reason=None,
organize_message='organized',
library_relative_path='S/Song Artist/Album Name/01 - Song Title.flac',
library_file_path='/tmp/output/S/Song Artist/Album Name/01 - Song Title.flac',
trash_file_path=None,
organize_decision_json={
'source_path': '/tmp/source-1.flac',
'final_relative_path': 'S/Song Artist/Album Name/01 - Song Title.flac',
'final_action': 'organized'
}
)
self.task_store.insert_task_item(
task['task_id'],
original_path='/tmp/source-2.flac',
current_file_path='/tmp/source-2.flac',
relative_path='Artist/Album/02.flac',
filename='02.flac',
extension='.flac',
size_bytes=123,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='low score'
)
response = self.task_store.list_task_items(
task['task_id'],
scan_status=None,
preprocess_status=None,
match_status='matched',
dedupe_status='unique',
organize_status='organized',
page=1,
page_size=10,
active_only=False
)
self.assertEqual(response['total'], 1)
self.assertEqual(response['items'][0]['match_status'], 'matched')
self.assertEqual(response['items'][0]['match_source'], 'musicbrainz')
self.assertEqual(response['items'][0]['matched_metadata_json']['release_id'], 'release-1')
self.assertEqual(response['items'][0]['match_candidates_json'][0]['provider'], 'musicbrainz')
self.assertEqual(response['items'][0]['dedupe_status'], 'unique')
self.assertEqual(response['items'][0]['dedupe_decision_json']['identity_basis'], 'recording_id')
self.assertEqual(response['items'][0]['organize_status'], 'organized')
self.assertEqual(
response['items'][0]['organize_decision_json']['final_relative_path'],
'S/Song Artist/Album Name/01 - Song Title.flac'
)
def test_list_task_history_returns_empty_when_no_terminal_tasks(self):
response = self.task_store.list_task_history(page=1, page_size=8)
self.assertEqual(response, {'items': [], 'page': 1, 'page_size': 8, 'total': 0})
def test_list_task_history_only_returns_completed_and_failed_tasks(self):
completed_task = self._create_terminal_task('completed', '2024-01-01T08:00:00Z')
failed_task = self._create_terminal_task('failed', '2024-01-02T08:00:00Z')
active_task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
self.task_store.update_task(active_task['task_id'], status='running')
response = self.task_store.list_task_history(page=1, page_size=10)
self.assertEqual(response['total'], 2)
self.assertEqual(
[item['task_id'] for item in response['items']],
[failed_task['task_id'], completed_task['task_id']]
)
def test_list_task_history_orders_by_started_at_desc(self):
oldest = self._create_terminal_task('completed', '2024-01-01T08:00:00Z')
middle = self._create_terminal_task('completed', '2024-01-01T12:00:00Z')
newest = self._create_terminal_task('failed', '2024-01-02T09:30:00Z')
response = self.task_store.list_task_history(page=1, page_size=10)
self.assertEqual(
[item['task_id'] for item in response['items']],
[newest['task_id'], middle['task_id'], oldest['task_id']]
)
def test_list_task_history_aggregates_counts_and_report_status(self):
all_success = self._create_terminal_task('completed', '2024-01-01T08:00:00Z')
self._insert_history_item(all_success['task_id'])
self._insert_history_item(all_success['task_id'])
partial_success = self._create_terminal_task('completed', '2024-01-02T08:00:00Z')
self._insert_history_item(partial_success['task_id'])
self._insert_history_item(
partial_success['task_id'],
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配分过低',
dedupe_status='pending',
organize_status='pending'
)
self._insert_history_item(
partial_success['task_id'],
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败',
match_status='pending',
dedupe_status='pending',
organize_status='pending'
)
failed_task = self._create_terminal_task('failed', '2024-01-03T08:00:00Z')
self._insert_history_item(failed_task['task_id'])
response = self.task_store.list_task_history(page=1, page_size=10)
items_by_task_id = {item['task_id']: item for item in response['items']}
self.assertEqual(items_by_task_id[all_success['task_id']]['total_items'], 2)
self.assertEqual(items_by_task_id[all_success['task_id']]['success_items'], 2)
self.assertEqual(items_by_task_id[all_success['task_id']]['exception_items'], 0)
self.assertEqual(items_by_task_id[all_success['task_id']]['report_status'], 'success')
self.assertEqual(items_by_task_id[partial_success['task_id']]['total_items'], 3)
self.assertEqual(items_by_task_id[partial_success['task_id']]['success_items'], 1)
self.assertEqual(items_by_task_id[partial_success['task_id']]['exception_items'], 2)
self.assertEqual(items_by_task_id[partial_success['task_id']]['report_status'], 'warning')
self.assertEqual(items_by_task_id[failed_task['task_id']]['total_items'], 1)
self.assertEqual(items_by_task_id[failed_task['task_id']]['success_items'], 1)
self.assertEqual(items_by_task_id[failed_task['task_id']]['exception_items'], 0)
self.assertEqual(items_by_task_id[failed_task['task_id']]['report_status'], 'warning')
def test_list_task_history_paginates_results(self):
first = self._create_terminal_task('completed', '2024-01-01T08:00:00Z')
second = self._create_terminal_task('completed', '2024-01-02T08:00:00Z')
third = self._create_terminal_task('failed', '2024-01-03T08:00:00Z')
first_page = self.task_store.list_task_history(page=1, page_size=2)
second_page = self.task_store.list_task_history(page=2, page_size=2)
self.assertEqual(first_page['total'], 3)
self.assertEqual(first_page['page'], 1)
self.assertEqual(first_page['page_size'], 2)
self.assertEqual(len(first_page['items']), 2)
self.assertEqual(
[item['task_id'] for item in first_page['items']],
[third['task_id'], second['task_id']]
)
self.assertEqual(second_page['total'], 3)
self.assertEqual(second_page['page'], 2)
self.assertEqual(second_page['page_size'], 2)
self.assertEqual(
[item['task_id'] for item in second_page['items']],
[first['task_id']]
)
def _create_terminal_task(self, status: str, started_at: str) -> dict:
task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
self.task_store.update_task(task['task_id'], status=status, completed_at=started_at)
with self.task_store._connect() as connection:
connection.execute(
'''
UPDATE task_runs
SET started_at = ?, updated_at = ?, completed_at = ?
WHERE id = ?
''',
(started_at, started_at, started_at, task['task_id'])
)
connection.commit()
return self.task_store.get_task(task['task_id'])
def _insert_history_item(self, task_id: str, **overrides) -> dict:
self._item_index += 1
item_index = self._item_index
fields = {
'original_path': f'/tmp/source-{item_index}.flac',
'relative_path': f'Artist/Album/{item_index:02d}.flac',
'filename': f'{item_index:02d}.flac',
'extension': '.flac',
'size_bytes': 123,
'modified_at': '2024-01-01T00:00:00Z',
'local_cover': None,
'local_lyric': None,
'scan_status': 'queued',
'scan_reason': None,
'scan_message': None,
'preprocess_status': 'completed',
'preprocess_reason': None,
'preprocess_message': None,
'match_status': 'matched',
'match_reason': None,
'match_message': None,
'dedupe_status': 'unique',
'dedupe_reason': None,
'dedupe_message': '未发现重复项',
'organize_status': 'organized',
'organize_reason': None,
'organize_message': '已按标准路径入库'
}
fields.update(overrides)
return self.task_store.insert_task_item(task_id, **fields)
@unittest.skipIf(main_module is None, f'api deps unavailable: {FASTAPI_IMPORT_ERROR}')
class TaskHistoryApiTests(unittest.TestCase):
def setUp(self):
self.previous_task_store = main_module.task_store
def tearDown(self):
main_module.task_store = self.previous_task_store
def test_get_tasks_returns_paginated_history_payload(self):
fake_store = _FakeTaskStore(
{
'items': [
{
'task_id': 'task-2',
'started_at': '2024-01-03T12:00:00Z',
'status': 'failed',
'total_items': 5,
'success_items': 3,
'exception_items': 2,
'report_status': 'warning'
},
{
'task_id': 'task-1',
'started_at': '2024-01-02T12:00:00Z',
'status': 'completed',
'total_items': 4,
'success_items': 4,
'exception_items': 0,
'report_status': 'success'
}
],
'page': 2,
'page_size': 2,
'total': 7
}
)
main_module.task_store = fake_store
response = main_module.get_tasks(page=2, page_size=2)
payload = TaskHistoryListResponse.model_validate(response)
self.assertEqual(payload.page, 2)
self.assertEqual(payload.page_size, 2)
self.assertEqual(payload.total, 7)
self.assertEqual(payload.items[0].task_id, 'task-2')
self.assertEqual(payload.items[0].report_status, 'warning')
self.assertEqual(payload.items[1].report_status, 'success')
self.assertEqual(fake_store.calls, [{'page': 2, 'page_size': 2}])
def test_get_tasks_returns_empty_payload(self):
fake_store = _FakeTaskStore({'items': [], 'page': 1, 'page_size': 8, 'total': 0})
main_module.task_store = fake_store
response = main_module.get_tasks(page=1, page_size=8)
payload = TaskHistoryListResponse.model_validate(response)
self.assertEqual(payload.items, [])
self.assertEqual(payload.total, 0)
self.assertEqual(fake_store.calls, [{'page': 1, 'page_size': 8}])
class _FakeTaskStore:
def __init__(self, response: dict):
self.response = response
self.calls: list[dict] = []
def list_task_history(self, page: int, page_size: int) -> dict:
self.calls.append({'page': page, 'page_size': page_size})
return self.response
if __name__ == '__main__':
unittest.main()
@@ -0,0 +1,540 @@
import math
import os
import struct
import tempfile
import unittest
import wave
from pathlib import Path
from unittest.mock import patch
from backend.app.matcher import MatchProviderError
from backend.app.preprocessor import PreprocessDependencyError, Preprocessor
from backend.app.scanner import Scanner
from backend.app.task_runner import TaskRunner
from backend.app.task_store import TaskStore
from backend.app.task_stream import TaskStreamManager
class TaskRunnerPreprocessTests(unittest.TestCase):
def test_task_runner_completes_full_pipeline(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'Artist' / 'Album' / '01.wav'
source_file.parent.mkdir(parents=True)
_write_wave_file(source_file, duration_seconds=8)
stable_timestamp = source_file.stat().st_mtime - 120
os.utime(source_file, (stable_timestamp, stable_timestamp))
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=StaticMatcher()
)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
task_runner.start_task(task['task_id'], config_snapshot)
persisted_task = task_store.get_task(task['task_id'])
persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items']
self.assertEqual(persisted_task['status'], 'completed')
self.assertEqual(persisted_task['current_stage'], 'complete')
self.assertEqual(persisted_task['stage_states']['preprocess'], 'completed')
self.assertEqual(persisted_task['stage_states']['match'], 'completed')
self.assertEqual(persisted_task['stage_states']['dedupe'], 'completed')
self.assertEqual(persisted_task['stage_states']['organize'], 'completed')
self.assertEqual(persisted_task['stats']['scan']['queued'], 1)
self.assertEqual(persisted_task['stats']['preprocess']['input_items'], 1)
self.assertEqual(persisted_task['stats']['preprocess']['converted_items'], 1)
self.assertEqual(persisted_task['stats']['preprocess']['output_items'], 1)
self.assertEqual(persisted_task['stats']['match']['matched_authoritative'], 1)
self.assertEqual(persisted_task['stats']['dedupe']['kept_items'], 1)
self.assertEqual(persisted_task['stats']['organize']['moved_items'], 1)
self.assertEqual(len(persisted_items), 1)
item = persisted_items[0]
self.assertEqual(item['preprocess_status'], 'completed')
self.assertEqual(item['match_status'], 'matched')
self.assertEqual(item['dedupe_status'], 'unique')
self.assertEqual(item['organize_status'], 'organized')
self.assertTrue(item['current_file_path'].endswith('.flac'))
self.assertTrue(Path(item['current_file_path']).exists())
self.assertTrue(str(output_dir) in item['current_file_path'])
self.assertEqual(item['audio_props_json']['codec'], 'FLAC')
self.assertTrue(item['acoustic_fingerprint'])
self.assertEqual(item['matched_metadata_json']['release_id'], 'release-1')
self.assertEqual(item['library_relative_path'], 'M/Matched Artist/Matched Album/01 - Matched Song.flac')
def test_task_runner_fails_fast_when_preprocess_dependencies_are_missing(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'single.flac'
source_file.write_bytes(b'not-real-audio')
stable_timestamp = source_file.stat().st_mtime - 120
os.utime(source_file, (stable_timestamp, stable_timestamp))
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
preprocessor = Preprocessor()
task_runner = TaskRunner(
task_store,
Scanner(),
preprocessor,
TaskStreamManager(),
matcher=StaticMatcher()
)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
with patch.object(
preprocessor,
'check_dependencies',
side_effect=PreprocessDependencyError('missing preprocess dependencies')
):
task_runner.start_task(task['task_id'], config_snapshot)
persisted_task = task_store.get_task(task['task_id'])
self.assertEqual(persisted_task['status'], 'failed')
self.assertEqual(persisted_task['current_stage'], 'preprocess')
self.assertEqual(persisted_task['stage_states']['preprocess'], 'failed')
self.assertEqual(
persisted_task['error_message'],
'missing preprocess dependencies'
)
def test_task_runner_marks_match_item_failed_when_provider_errors(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'Artist' / 'Album' / '01.wav'
source_file.parent.mkdir(parents=True)
_write_wave_file(source_file, duration_seconds=8)
stable_timestamp = source_file.stat().st_mtime - 120
os.utime(source_file, (stable_timestamp, stable_timestamp))
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=ErrorMatcher()
)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
task_runner.start_task(task['task_id'], config_snapshot)
persisted_task = task_store.get_task(task['task_id'])
persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items']
self.assertEqual(persisted_task['status'], 'completed')
self.assertEqual(persisted_task['stats']['match']['failed_items'], 1)
self.assertEqual(persisted_items[0]['match_status'], 'failed')
self.assertEqual(persisted_items[0]['match_reason'], 'provider_error')
self.assertFalse(source_file.exists())
self.assertFalse(persisted_items[0]['is_active'])
self.assertTrue(Path(persisted_items[0]['current_file_path']).exists())
self.assertTrue(Path(persisted_items[0]['trash_file_path']).exists())
self.assertIn('/trash/match_failed/', persisted_items[0]['current_file_path'])
def test_task_runner_quarantines_historical_exception_before_scan(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'failed.flac'
source_file.write_bytes(b'audio')
stable_timestamp = source_file.stat().st_mtime - 120
os.utime(source_file, (stable_timestamp, stable_timestamp))
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
previous_task = task_store.create_task_if_idle(config_snapshot)
previous_item = task_store.insert_task_item(
previous_task['task_id'],
original_path=str(source_file),
current_file_path=str(source_file),
relative_path='failed.flac',
filename='failed.flac',
extension='.flac',
size_bytes=source_file.stat().st_size,
modified_at=None,
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='failed',
match_reason='provider_error',
match_message='provider exploded'
)
task_store.update_task(previous_task['task_id'], status='completed', current_stage='complete')
task = task_store.create_task_if_idle(config_snapshot)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=StaticMatcher()
)
task_runner.start_task(task['task_id'], config_snapshot)
persisted_task = task_store.get_task(task['task_id'])
quarantined_item = task_store.get_exception_source_item(previous_item['id'])
self.assertEqual(persisted_task['status'], 'completed')
self.assertEqual(persisted_task['stats']['scan']['total_found'], 0)
self.assertEqual(persisted_task['stats']['scan']['queued'], 0)
self.assertFalse(source_file.exists())
self.assertFalse(quarantined_item['is_active'])
self.assertTrue(Path(quarantined_item['trash_file_path']).exists())
self.assertIn('/trash/match_failed/', quarantined_item['trash_file_path'])
def test_task_runner_skips_exception_with_existing_trash_path(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'duplicate.flac'
source_file.write_bytes(b'audio')
existing_trash = trash_dir / 'duplicates' / 'old-task' / '1_duplicate.flac'
existing_trash.parent.mkdir(parents=True)
existing_trash.write_bytes(b'audio')
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
item = task_store.insert_task_item(
task['task_id'],
original_path=str(source_file),
current_file_path=str(existing_trash),
relative_path='duplicate.flac',
filename='duplicate.flac',
extension='.flac',
size_bytes=source_file.stat().st_size,
modified_at=None,
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='matched',
dedupe_status='duplicate_trashed',
trash_file_path=str(existing_trash)
)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=StaticMatcher()
)
task_runner._quarantine_exception_items(task['task_id'], config_snapshot, scope='current')
persisted_item = task_store.get_exception_source_item(item['id'])
self.assertTrue(source_file.exists())
self.assertTrue(existing_trash.exists())
self.assertEqual(persisted_item['trash_file_path'], str(existing_trash))
def test_task_runner_quarantines_converted_exception_and_original_source(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
workspace_dir = root / 'workspace'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
workspace_dir.mkdir()
source_file = input_dir / 'source.wav'
converted_file = workspace_dir / 'source.flac'
source_file.write_bytes(b'wav')
converted_file.write_bytes(b'flac')
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
item = task_store.insert_task_item(
task['task_id'],
original_path=str(source_file),
current_file_path=str(converted_file),
relative_path='source.wav',
filename='source.flac',
extension='.flac',
size_bytes=converted_file.stat().st_size,
modified_at=None,
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
preprocess_status='completed',
match_status='low_score',
match_reason='score_below_threshold',
match_message='候选分数不足'
)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=StaticMatcher()
)
task_runner._quarantine_exception_items(task['task_id'], config_snapshot, scope='current')
quarantined_item = task_store.get_exception_source_item(item['id'])
moved_paths = sorted((trash_dir / 'low_score' / task['task_id']).glob('*'))
self.assertFalse(source_file.exists())
self.assertFalse(converted_file.exists())
self.assertEqual(len(moved_paths), 2)
self.assertFalse(quarantined_item['is_active'])
self.assertTrue(Path(quarantined_item['current_file_path']).exists())
self.assertTrue(Path(quarantined_item['trash_file_path']).exists())
self.assertEqual(
Path(quarantined_item['current_file_path']).name,
f'{item["id"]}_source.flac'
)
def test_task_runner_logs_provider_warnings_and_continues_matching(self):
root = Path(tempfile.mkdtemp())
input_dir = root / 'input'
output_dir = root / 'output'
trash_dir = root / 'trash'
input_dir.mkdir()
output_dir.mkdir()
trash_dir.mkdir()
source_file = input_dir / 'Artist' / 'Album' / '01.wav'
source_file.parent.mkdir(parents=True)
_write_wave_file(source_file, duration_seconds=8)
stable_timestamp = source_file.stat().st_mtime - 120
os.utime(source_file, (stable_timestamp, stable_timestamp))
db_path = root / 'music_workshop.db'
task_store = TaskStore(db_path)
task_runner = TaskRunner(
task_store,
Scanner(),
Preprocessor(),
TaskStreamManager(),
matcher=WarningMatcher()
)
config_snapshot = {
'input': str(input_dir),
'output': str(output_dir),
'trash': str(trash_dir)
}
task = task_store.create_task_if_idle(config_snapshot)
task_runner.start_task(task['task_id'], config_snapshot)
persisted_task = task_store.get_task(task['task_id'])
persisted_items = task_store.list_task_items(task['task_id'], None, 1, 20)['items']
persisted_logs = task_store.list_task_logs(task['task_id'], 1, 50)['logs']
self.assertEqual(persisted_task['status'], 'completed')
self.assertEqual(persisted_task['stats']['match']['matched_fallback'], 1)
self.assertEqual(persisted_task['stats']['match']['provider_warnings'], 2)
self.assertEqual(persisted_task['stats']['match']['failed_items'], 0)
self.assertEqual(persisted_items[0]['match_status'], 'matched_fallback')
self.assertEqual(
[log['event_type'] for log in persisted_logs if log['event_type'] == 'match.provider_skipped'],
['match.provider_skipped', 'match.provider_skipped']
)
class StaticMatcher:
def match_item(self, item, album_group, config):
return {
'status': 'matched',
'reason': 'authoritative_auto_match',
'message': '静态测试匹配成功',
'source': 'musicbrainz',
'confidence': 93.5,
'is_authoritative': True,
'matched_metadata_json': {
'title': 'Matched Song',
'artist': 'Matched Artist',
'artists': ['Matched Artist'],
'album': 'Matched Album',
'album_artist': 'Matched Artist',
'track_number': 1,
'disc_number': 1,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': 8.0,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'release-group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
},
'match_candidates_json': [
{
'provider': 'musicbrainz',
'score': 93.5,
'score_breakdown': {'title': 20},
'is_authoritative': True,
'recording_id': 'recording-1',
'release_id': 'release-1',
'release_group_id': 'release-group-1',
'source_ids': {'musicbrainz_recording_id': 'recording-1'}
}
],
'match_enrichment_json': {
'cover': {'selected_source': None, 'candidates': []},
'lyrics': {'selected_source': None, 'candidates': []},
'genres': {'selected_source': None, 'candidates': []},
'tags': {'selected_source': None, 'candidates': []}
},
'provider_warnings': []
}
class ErrorMatcher:
def match_item(self, item, album_group, config):
raise MatchProviderError('musicbrainz', 'provider exploded')
class WarningMatcher:
def match_item(self, item, album_group, config):
return {
'status': 'matched_fallback',
'reason': 'fallback_auto_match',
'message': 'Fallback 候选自动匹配成功,得分 88.0',
'source': 'qq',
'confidence': 88.0,
'is_authoritative': False,
'matched_metadata_json': {
'title': 'Matched Song',
'artist': 'Matched Artist',
'artists': ['Matched Artist'],
'album': 'Matched Album',
'album_artist': 'Matched Artist',
'track_number': 1,
'disc_number': 1,
'release_date': '2024-01-01',
'year': 2024,
'duration_seconds': 8.0,
'recording_id': None,
'release_id': None,
'release_group_id': None,
'source_ids': {'qq_song_mid': 'song-1'}
},
'match_candidates_json': [
{
'provider': 'qq',
'score': 88.0,
'score_breakdown': {'title': 20},
'is_authoritative': False,
'recording_id': None,
'release_id': None,
'release_group_id': None,
'source_ids': {'qq_song_mid': 'song-1'}
}
],
'match_enrichment_json': {
'cover': {'selected_source': None, 'candidates': []},
'lyrics': {'selected_source': None, 'candidates': []},
'genres': {'selected_source': None, 'candidates': []},
'tags': {'selected_source': None, 'candidates': []}
},
'provider_warnings': [
{
'provider': 'acoustid',
'message': 'acoustid 请求失败 (HTTP 400) {"error":{"code":4,"message":"invalid API key"}}'
},
{
'provider': 'spotify',
'message': 'spotify 请求失败 (HTTP 403) Active premium subscription required for the owner of the app.'
}
]
}
def _write_wave_file(path: Path, *, duration_seconds: int):
sample_rate = 44100
frequency = 440.0
amplitude = 16000
total_frames = sample_rate * duration_seconds
with wave.open(str(path), 'wb') as handle:
handle.setnchannels(1)
handle.setsampwidth(2)
handle.setframerate(sample_rate)
frames = bytearray()
for index in range(total_frames):
sample = int(amplitude * math.sin((2.0 * math.pi * frequency * index) / sample_rate))
frames.extend(struct.pack('<h', sample))
handle.writeframes(bytes(frames))
if __name__ == '__main__':
unittest.main()