Add MusicWorkshop application

This commit is contained in:
liumangmang
2026-04-30 14:34:28 +08:00
parent 4cb403c956
commit 796f19990f
62 changed files with 21614 additions and 2168 deletions
+440
View File
@@ -0,0 +1,440 @@
import os
import tempfile
import unittest
from pathlib import Path
os.environ['MUSIC_WORKSHOP_DB_PATH'] = str(
Path(tempfile.gettempdir()) / f'music_workshop_exception_service_{next(tempfile._get_candidate_names())}.db'
)
from backend.app.exception_service import ExceptionItemNotFoundError, ExceptionService
from backend.app.task_store import TaskStore
class ExceptionServiceTests(unittest.TestCase):
def setUp(self):
self.db_path = Path(os.environ['MUSIC_WORKSHOP_DB_PATH'])
if self.db_path.exists():
self.db_path.unlink()
self.task_store = TaskStore(self.db_path)
self.service = ExceptionService(self.task_store)
self.task = self.task_store.create_task_if_idle(
{
'input': '/tmp/input',
'output': '/tmp/output',
'trash': '/tmp/trash'
}
)
def test_empty_summary_list_and_detail_not_found(self):
summary = self.service.get_summary()
self.assertEqual(summary['total'], 0)
self.assertEqual(
summary['counts_by_type'],
{
'missing_tags': 0,
'duplicates': 0,
'match_failed': 0,
'low_score': 0,
'convert_failed': 0,
'organize_failed': 0
}
)
page = self.service.get_items()
self.assertEqual(page['items'], [])
self.assertEqual(page['total'], 0)
with self.assertRaises(ExceptionItemNotFoundError):
self.service.get_item(9999)
def test_maps_exception_types_and_applies_priority(self):
missing_tags = self._insert_item(
filename='missing-tags.flac',
preprocess_status='warning',
preprocess_reason='cover_missing,metadata_failed',
preprocess_message='无法提取有效元数据'
)
low_score = self._insert_item(
filename='low-score.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Likely Match'}
)
match_failed = self._insert_item(
filename='match-failed.flac',
match_status='not_found',
match_reason='no_candidate',
match_message='MusicBrainz 查无此曲'
)
convert_failed = self._insert_item(
filename='convert-failed.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败'
)
duplicate = self._insert_item(
filename='duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='输出库中已存在重复文件,保留库内文件',
duplicate_of_path='/tmp/output/Artist/Old.flac',
dedupe_decision_json={
'comparison_scope': 'library',
'identity_basis': 'recording_id',
'compared_candidates': [
{'side': 'kept', 'path': '/tmp/output/Artist/Old.flac'},
{'side': 'trashed', 'path': '/tmp/input/duplicate.flac'}
]
},
trash_file_path='/tmp/trash/duplicates/task-1/duplicate.flac'
)
organize_failed = self._insert_item(
filename='organize-failed.flac',
organize_status='failed',
organize_reason='target_conflict',
organize_message='整理入库失败'
)
priority_item = self._insert_item(
filename='priority.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
dedupe_status='failed',
dedupe_reason='trash_move_failed',
dedupe_message='重复检测失败',
organize_status='trashed',
organize_reason='manual_review',
organize_message='已移入回收站等待人工处理'
)
summary = self.service.get_summary()
self.assertEqual(summary['total'], 7)
self.assertEqual(summary['counts_by_type']['missing_tags'], 1)
self.assertEqual(summary['counts_by_type']['duplicates'], 1)
self.assertEqual(summary['counts_by_type']['match_failed'], 1)
self.assertEqual(summary['counts_by_type']['low_score'], 1)
self.assertEqual(summary['counts_by_type']['convert_failed'], 1)
self.assertEqual(summary['counts_by_type']['organize_failed'], 2)
items = self.service.get_items(page_size=20)['items']
indexed = {item['filename']: item for item in items}
self.assertEqual(indexed['missing-tags.flac']['exception_type'], 'missing_tags')
self.assertEqual(
indexed['missing-tags.flac']['available_actions'],
['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file']
)
self.assertEqual(indexed['low-score.flac']['display_title'], 'Likely Match')
self.assertEqual(indexed['low-score.flac']['exception_type'], 'low_score')
self.assertFalse(indexed['low-score.flac']['can_ingest'])
self.assertEqual(indexed['low-score.flac']['workflow_state'], 'open')
self.assertEqual(indexed['match-failed.flac']['exception_type'], 'match_failed')
self.assertEqual(indexed['convert-failed.flac']['exception_type'], 'convert_failed')
self.assertEqual(indexed['duplicate.flac']['exception_type'], 'duplicates')
self.assertEqual(indexed['organize-failed.flac']['exception_type'], 'organize_failed')
self.assertEqual(indexed['priority.flac']['exception_type'], 'organize_failed')
self.assertEqual(indexed['priority.flac']['exception_stage'], 'organize')
self.assertEqual(indexed['priority.flac']['exception_reason_code'], 'manual_review')
self.assertEqual(
indexed['priority.flac']['available_actions'],
['edit_target_path', 'move_to_review_trash', 'ignore_exception', 'delete_file']
)
self.assertEqual(missing_tags['id'], indexed['missing-tags.flac']['exception_id'])
self.assertEqual(low_score['id'], indexed['low-score.flac']['exception_id'])
self.assertEqual(match_failed['id'], indexed['match-failed.flac']['exception_id'])
self.assertEqual(convert_failed['id'], indexed['convert-failed.flac']['exception_id'])
self.assertEqual(duplicate['id'], indexed['duplicate.flac']['exception_id'])
self.assertEqual(organize_failed['id'], indexed['organize-failed.flac']['exception_id'])
self.assertEqual(priority_item['id'], indexed['priority.flac']['exception_id'])
def test_duplicate_detail_preserves_comparison_data(self):
duplicate = self._insert_item(
filename='duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='输出库中已存在重复文件,保留库内文件',
duplicate_of_path='/tmp/output/Artist/Old.flac',
dedupe_decision_json={
'comparison_scope': 'library',
'identity_basis': 'recording_id',
'quality_breakdown': {
'kept': {'total': 88.0},
'trashed': {'total': 72.0}
},
'compared_candidates': [
{'side': 'kept', 'path': '/tmp/output/Artist/Old.flac', 'quality_score': 88.0},
{'side': 'trashed', 'path': '/tmp/input/duplicate.flac', 'quality_score': 72.0}
]
},
trash_file_path='/tmp/trash/duplicates/task-1/duplicate.flac'
)
detail = self.service.get_item(duplicate['id'])
self.assertEqual(detail['exception_type'], 'duplicates')
self.assertEqual(detail['duplicate_of_path'], '/tmp/output/Artist/Old.flac')
self.assertEqual(detail['trash_file_path'], '/tmp/trash/duplicates/task-1/duplicate.flac')
self.assertEqual(detail['dedupe_decision_json']['comparison_scope'], 'library')
self.assertEqual(len(detail['dedupe_decision_json']['compared_candidates']), 2)
self.assertFalse(detail['preview_available'])
def test_resolution_filter_hides_resolved_by_default(self):
resolved_item = self._insert_item(
filename='resolved.flac',
match_status='not_found',
match_reason='no_candidate',
match_message='未找到匹配',
exception_resolution_status='resolved',
exception_resolution_json={
'before_snapshot': {
'exception_type': 'match_failed',
'exception_stage': 'match',
'exception_reason_code': 'no_candidate',
'exception_message': '未找到匹配'
}
}
)
open_item = self._insert_item(
filename='open.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配失败'
)
open_page = self.service.get_items()
resolved_page = self.service.get_items(resolution_status='resolved')
self.assertEqual([item['exception_id'] for item in open_page['items']], [open_item['id']])
self.assertEqual([item['exception_id'] for item in resolved_page['items']], [resolved_item['id']])
def test_candidate_selected_item_remains_open_and_pending_ingest(self):
item = self._insert_item(
filename='candidate-selected.flac',
match_status='matched_fallback',
match_reason='manual_candidate_selected',
match_message='已手动确认匹配候选',
matched_metadata_json={'title': 'Song', 'artist': 'Artist', 'album_artist': 'Artist'},
exception_resolution_json={
'workflow_state': 'candidate_selected',
'metadata_draft': {'title': 'Song', 'artist': 'Artist', 'album_artist': 'Artist'},
'before_snapshot': {
'exception_type': 'low_score',
'exception_stage': 'match',
'exception_reason_code': 'score_gap_too_small',
'exception_message': '匹配候选分数过低'
}
}
)
open_page = self.service.get_items()
indexed = {row['filename']: row for row in open_page['items']}
detail = indexed['candidate-selected.flac']
self.assertEqual(item['id'], detail['exception_id'])
self.assertEqual(detail['workflow_state'], 'ready_to_ingest')
self.assertTrue(detail['pending_ingest'])
self.assertTrue(detail['can_ingest'])
self.assertEqual(detail['exception_type'], 'low_score')
self.assertIn('save_and_organize', detail['available_actions'])
def test_effective_metadata_derives_album_artist_for_ingest(self):
item = self._insert_item(
filename='derived-album-artist.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={
'title': 'Song',
'artist': 'Artist A feat. Guest',
'album': 'Album X'
}
)
self._insert_item(
filename='derived-album-artist-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={
'title': 'Song 2',
'artist': 'Artist A',
'album': 'Album X'
}
)
detail = self.service.get_item(item['id'])
self.assertEqual(detail['effective_metadata']['album_artist'], 'Artist A')
self.assertEqual(detail['normalization_strategy'], 'main_artist_feat')
self.assertTrue(detail['can_ingest'])
self.assertEqual(detail['workflow_state'], 'ready_to_ingest')
def test_filters_and_paginates_by_captured_at_desc(self):
older_duplicate = self._insert_item(
filename='older-duplicate.flac',
dedupe_status='duplicate_trashed',
dedupe_reason='library_duplicate',
dedupe_message='重复文件'
)
newest_match_failed = self._insert_item(
filename='newest-match-failed.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配服务请求失败'
)
middle_convert_failed = self._insert_item(
filename='middle-convert-failed.flac',
preprocess_status='failed',
preprocess_reason='convert_failed',
preprocess_message='音频转码失败'
)
self._set_updated_at(older_duplicate['id'], '2024-01-01T00:00:00Z')
self._set_updated_at(middle_convert_failed['id'], '2024-01-02T00:00:00Z')
self._set_updated_at(newest_match_failed['id'], '2024-01-03T00:00:00Z')
first_page = self.service.get_items(page=1, page_size=2)
second_page = self.service.get_items(page=2, page_size=2)
duplicate_page = self.service.get_items('duplicates', page=1, page_size=10)
self.assertEqual(first_page['total'], 3)
self.assertEqual(
[item['filename'] for item in first_page['items']],
['newest-match-failed.flac', 'middle-convert-failed.flac']
)
self.assertEqual([item['filename'] for item in second_page['items']], ['older-duplicate.flac'])
self.assertEqual(duplicate_page['total'], 1)
self.assertEqual(duplicate_page['items'][0]['filename'], 'older-duplicate.flac')
def test_summary_counts_without_triggering_metadata_normalization(self):
self._insert_item(
filename='low-score.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Song'}
)
self._insert_item(
filename='match-failed.flac',
match_status='failed',
match_reason='provider_error',
match_message='匹配服务请求失败'
)
def fail_normalize(*args, **kwargs):
raise AssertionError('get_summary should not normalize metadata')
self.service.metadata_normalizer.normalize_item = fail_normalize
summary = self.service.get_summary()
self.assertEqual(summary['total'], 2)
self.assertEqual(summary['counts_by_type']['low_score'], 1)
self.assertEqual(summary['counts_by_type']['match_failed'], 1)
def test_get_items_only_normalizes_current_page(self):
first_item = self._insert_item(
filename='page-1.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 1', 'artist': 'Artist A', 'album': 'Album X'}
)
second_item = self._insert_item(
filename='page-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 2', 'artist': 'Artist A', 'album': 'Album X'}
)
third_item = self._insert_item(
filename='page-3.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Page 3', 'artist': 'Artist A', 'album': 'Album X'}
)
self._set_updated_at(first_item['id'], '2030-01-03T00:00:00Z')
self._set_updated_at(second_item['id'], '2030-01-02T00:00:00Z')
self._set_updated_at(third_item['id'], '2030-01-01T00:00:00Z')
calls = []
original_normalize_item = self.service.metadata_normalizer.normalize_item
def tracked_normalize(item, metadata_patch=None, cache=None):
calls.append(item['id'])
return original_normalize_item(item, metadata_patch, cache)
self.service.metadata_normalizer.normalize_item = tracked_normalize
page = self.service.get_items(page=1, page_size=1)
self.assertEqual(page['total'], 3)
self.assertEqual([item['filename'] for item in page['items']], ['page-1.flac'])
self.assertEqual(calls, [first_item['id']])
def test_get_items_reuses_task_level_normalization_cache_within_page(self):
list_all_calls = []
original_list_all_task_items = self.task_store.list_all_task_items
def tracked_list_all_task_items(task_id, active_only=True):
list_all_calls.append((task_id, active_only))
return original_list_all_task_items(task_id, active_only=active_only)
self.task_store.list_all_task_items = tracked_list_all_task_items
self._insert_item(
filename='shared-1.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Shared 1', 'artist': 'Artist A feat. Guest', 'album': 'Album X'}
)
self._insert_item(
filename='shared-2.flac',
match_status='low_score',
match_reason='score_gap_too_small',
match_message='匹配候选分数过低',
matched_metadata_json={'title': 'Shared 2', 'artist': 'Artist A', 'album': 'Album X'}
)
page = self.service.get_items(page=1, page_size=2)
self.assertEqual(len(page['items']), 2)
self.assertEqual(len(list_all_calls), 1)
self.assertTrue(all(item['can_ingest'] for item in page['items']))
def _insert_item(self, **overrides):
filename = overrides.pop('filename', f'item-{next(tempfile._get_candidate_names())}.flac')
extension = Path(filename).suffix or '.flac'
return self.task_store.insert_task_item(
self.task['task_id'],
original_path=f'/tmp/input/{filename}',
current_file_path=f'/tmp/input/{filename}',
relative_path=f'Artist/Album/{filename}',
filename=filename,
extension=extension,
size_bytes=123456,
modified_at='2024-01-01T00:00:00Z',
local_cover=None,
local_lyric=None,
scan_status='queued',
scan_reason=None,
scan_message=None,
**overrides
)
def _set_updated_at(self, item_id: int, timestamp: str):
with self.task_store._connect() as connection:
connection.execute(
'UPDATE task_items SET updated_at = ? WHERE id = ?',
(timestamp, item_id)
)
connection.commit()
if __name__ == '__main__':
unittest.main()