Add MusicWorkshop application
This commit is contained in:
@@ -0,0 +1,397 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .metadata_normalization import (
|
||||
MetadataNormalizationService,
|
||||
can_ingest_metadata,
|
||||
merge_metadata_layers,
|
||||
normalize_metadata_shape
|
||||
)
|
||||
from .task_constants import current_timestamp
|
||||
|
||||
|
||||
EXCEPTION_TYPE_LABELS = {
|
||||
'missing_tags': '元数据缺失',
|
||||
'duplicates': '文件重复',
|
||||
'match_failed': '匹配失败',
|
||||
'low_score': '匹配分过低',
|
||||
'convert_failed': '转码失败',
|
||||
'organize_failed': '入库失败'
|
||||
}
|
||||
|
||||
EXCEPTION_TYPES = tuple(EXCEPTION_TYPE_LABELS.keys())
|
||||
READ_ONLY_ACTIONS: list[str] = []
|
||||
ACTION_RULES = {
|
||||
'missing_tags': ['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file'],
|
||||
'match_failed': ['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file'],
|
||||
'low_score': [
|
||||
'retry_match',
|
||||
'select_match_candidate',
|
||||
'edit_metadata',
|
||||
'save_and_organize',
|
||||
'ignore_exception',
|
||||
'delete_file'
|
||||
],
|
||||
'duplicates': ['keep_existing', 'replace_existing', 'keep_both_with_rename', 'ignore_exception', 'delete_file'],
|
||||
'convert_failed': ['retry_preprocess', 'move_to_review_trash', 'ignore_exception', 'delete_file'],
|
||||
'organize_failed': ['retry_organize', 'edit_target_path', 'move_to_review_trash', 'ignore_exception', 'delete_file']
|
||||
}
|
||||
|
||||
|
||||
class ExceptionItemNotFoundError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ExceptionService:
|
||||
def __init__(self, task_store):
|
||||
self.task_store = task_store
|
||||
self.metadata_normalizer = MetadataNormalizationService(task_store)
|
||||
|
||||
def get_summary(self) -> dict:
|
||||
counts_by_type = {exception_type: 0 for exception_type in EXCEPTION_TYPES}
|
||||
total = 0
|
||||
|
||||
for source_item in self.task_store.list_exception_source_items('all'):
|
||||
exception_index = self._identify_exception_item(source_item)
|
||||
if exception_index is None:
|
||||
continue
|
||||
counts_by_type[exception_index['exception_type']] += 1
|
||||
total += 1
|
||||
|
||||
return {
|
||||
'total': total,
|
||||
'counts_by_type': counts_by_type,
|
||||
'scanned_at': current_timestamp()
|
||||
}
|
||||
|
||||
def get_items(
|
||||
self,
|
||||
exception_type: str = 'all',
|
||||
page: int = 1,
|
||||
page_size: int = 50,
|
||||
resolution_status: str = 'open'
|
||||
) -> dict:
|
||||
if exception_type != 'all' and exception_type not in EXCEPTION_TYPES:
|
||||
raise ValueError(f'Unsupported exception type: {exception_type}')
|
||||
|
||||
indexed_items = []
|
||||
for source_item in self.task_store.list_exception_source_items(resolution_status):
|
||||
exception_index = self._identify_exception_item(source_item)
|
||||
if exception_index is None:
|
||||
continue
|
||||
if exception_type != 'all' and exception_index['exception_type'] != exception_type:
|
||||
continue
|
||||
indexed_items.append(exception_index)
|
||||
|
||||
total = len(indexed_items)
|
||||
offset = (page - 1) * page_size
|
||||
page_indexes = indexed_items[offset:offset + page_size]
|
||||
normalization_cache = self.metadata_normalizer.create_cache()
|
||||
return {
|
||||
'items': [
|
||||
self._build_exception_item(
|
||||
page_index['source_item'],
|
||||
include_detail=False,
|
||||
exception_index=page_index,
|
||||
normalization_cache=normalization_cache
|
||||
)
|
||||
for page_index in page_indexes
|
||||
],
|
||||
'page': page,
|
||||
'page_size': page_size,
|
||||
'total': total
|
||||
}
|
||||
|
||||
def get_item(self, exception_id: int) -> dict:
|
||||
source_item = self.task_store.get_exception_source_item(exception_id)
|
||||
if source_item is None:
|
||||
raise ExceptionItemNotFoundError(exception_id)
|
||||
|
||||
exception_item = self._build_exception_item(source_item, include_detail=True)
|
||||
if exception_item is None:
|
||||
raise ExceptionItemNotFoundError(exception_id)
|
||||
return exception_item
|
||||
|
||||
def resolve_audio_path(self, exception_id: int) -> Path:
|
||||
item = self.get_item(exception_id)
|
||||
candidates = [
|
||||
item.get('current_file_path'),
|
||||
item.get('trash_file_path')
|
||||
]
|
||||
|
||||
for candidate in candidates:
|
||||
if not candidate:
|
||||
continue
|
||||
path = Path(candidate)
|
||||
if path.exists() and path.is_file():
|
||||
return path
|
||||
|
||||
raise FileNotFoundError(f'No playable audio found for exception item: {exception_id}')
|
||||
|
||||
def _identify_exception_item(self, source_item: dict) -> dict | None:
|
||||
resolution = source_item.get('exception_resolution_json') or {}
|
||||
exception_state = self._resolve_exception_state(source_item)
|
||||
if exception_state is None:
|
||||
exception_state = self._resolve_from_resolution_snapshot(source_item)
|
||||
if exception_state is None:
|
||||
return None
|
||||
|
||||
exception_type, exception_stage, exception_reason_code, exception_message = exception_state
|
||||
return {
|
||||
'source_item': source_item,
|
||||
'resolution': resolution,
|
||||
'exception_type': exception_type,
|
||||
'exception_stage': exception_stage,
|
||||
'exception_reason_code': exception_reason_code,
|
||||
'exception_message': exception_message,
|
||||
'display_reason': exception_message or self._default_reason(exception_type)
|
||||
}
|
||||
|
||||
def _build_exception_item(
|
||||
self,
|
||||
source_item: dict,
|
||||
*,
|
||||
include_detail: bool,
|
||||
exception_index: dict | None = None,
|
||||
normalization_cache: dict[str, dict[Any, Any]] | None = None
|
||||
) -> dict | None:
|
||||
exception_index = exception_index or self._identify_exception_item(source_item)
|
||||
if exception_index is None:
|
||||
return None
|
||||
|
||||
resolution = exception_index['resolution']
|
||||
raw_metadata = self._normalize_metadata(source_item.get('original_tags_json'))
|
||||
matched_metadata = self._normalize_metadata(source_item.get('matched_metadata_json'))
|
||||
metadata_draft = self._normalize_metadata(resolution.get('metadata_draft'))
|
||||
effective_metadata = self._build_effective_metadata(
|
||||
source_item,
|
||||
raw_metadata,
|
||||
matched_metadata,
|
||||
metadata_draft,
|
||||
normalization_cache
|
||||
)
|
||||
workflow_state = self._resolve_workflow_state(source_item, effective_metadata)
|
||||
can_ingest = self._can_ingest(effective_metadata)
|
||||
pending_ingest = (
|
||||
source_item.get('exception_resolution_status') == 'open'
|
||||
and workflow_state in {'candidate_selected', 'ready_to_ingest'}
|
||||
)
|
||||
display_title = self._first_non_empty(effective_metadata.get('title'), source_item.get('filename')) or '-'
|
||||
|
||||
payload = {
|
||||
'exception_id': source_item['id'],
|
||||
'task_id': source_item['task_id'],
|
||||
'task_started_at': source_item['task_started_at'],
|
||||
'exception_type': exception_index['exception_type'],
|
||||
'exception_stage': exception_index['exception_stage'],
|
||||
'exception_reason_code': exception_index['exception_reason_code'],
|
||||
'exception_message': exception_index['exception_message'],
|
||||
'captured_at': source_item['updated_at'],
|
||||
'filename': source_item['filename'],
|
||||
'relative_path': source_item['relative_path'],
|
||||
'original_path': source_item['original_path'],
|
||||
'current_file_path': source_item['current_file_path'],
|
||||
'trash_file_path': source_item.get('trash_file_path'),
|
||||
'audio_props_json': source_item.get('audio_props_json'),
|
||||
'original_tags_json': source_item.get('original_tags_json'),
|
||||
'matched_metadata_json': source_item.get('matched_metadata_json'),
|
||||
'duplicate_of_path': source_item.get('duplicate_of_path'),
|
||||
'dedupe_decision_json': source_item.get('dedupe_decision_json'),
|
||||
'library_relative_path': source_item.get('library_relative_path'),
|
||||
'library_file_path': source_item.get('library_file_path'),
|
||||
'match_source': source_item.get('match_source'),
|
||||
'match_confidence': source_item.get('match_confidence'),
|
||||
'preview_available': False,
|
||||
'available_actions': self._available_actions_for(exception_index['exception_type'], source_item, can_ingest),
|
||||
'exception_resolution_status': source_item.get('exception_resolution_status') or 'open',
|
||||
'exception_resolution_json': resolution,
|
||||
'workflow_state': workflow_state,
|
||||
'raw_metadata': raw_metadata,
|
||||
'metadata_draft': metadata_draft,
|
||||
'effective_metadata': effective_metadata,
|
||||
'normalization_strategy': effective_metadata.get('normalization_strategy'),
|
||||
'album_artist_reason': effective_metadata.get('album_artist_reason'),
|
||||
'compilation': int(effective_metadata.get('compilation') or 0),
|
||||
'can_ingest': can_ingest,
|
||||
'pending_ingest': pending_ingest,
|
||||
'display_title': display_title,
|
||||
'display_reason': exception_index['display_reason'],
|
||||
'type_label': EXCEPTION_TYPE_LABELS[exception_index['exception_type']]
|
||||
}
|
||||
|
||||
if include_detail:
|
||||
payload.update(
|
||||
{
|
||||
'preprocess_artifacts_json': source_item.get('preprocess_artifacts_json'),
|
||||
'match_candidates_json': source_item.get('match_candidates_json'),
|
||||
'match_enrichment_json': source_item.get('match_enrichment_json'),
|
||||
'organize_decision_json': source_item.get('organize_decision_json')
|
||||
}
|
||||
)
|
||||
|
||||
return payload
|
||||
|
||||
def _resolve_from_resolution_snapshot(
|
||||
self,
|
||||
item: dict
|
||||
) -> tuple[str, str, str | None, str | None] | None:
|
||||
resolution = item.get('exception_resolution_json') or {}
|
||||
snapshot = resolution.get('after_snapshot') or resolution.get('before_snapshot') or {}
|
||||
exception_type = snapshot.get('exception_type')
|
||||
if exception_type not in EXCEPTION_TYPES:
|
||||
return None
|
||||
return (
|
||||
exception_type,
|
||||
snapshot.get('exception_stage') or 'organize',
|
||||
snapshot.get('exception_reason_code'),
|
||||
snapshot.get('exception_message')
|
||||
)
|
||||
|
||||
def _resolve_exception_state(
|
||||
self,
|
||||
item: dict
|
||||
) -> tuple[str, str, str | None, str | None] | None:
|
||||
if item.get('organize_status') in {'trashed', 'failed'}:
|
||||
return (
|
||||
'organize_failed',
|
||||
'organize',
|
||||
self._first_non_empty(item.get('organize_reason'), item.get('organize_status')),
|
||||
self._first_non_empty(item.get('organize_message'), self._default_reason('organize_failed'))
|
||||
)
|
||||
|
||||
if item.get('dedupe_status') == 'duplicate_trashed':
|
||||
return (
|
||||
'duplicates',
|
||||
'dedupe',
|
||||
self._first_non_empty(item.get('dedupe_reason'), item.get('dedupe_status')),
|
||||
self._first_non_empty(item.get('dedupe_message'), self._default_reason('duplicates'))
|
||||
)
|
||||
|
||||
if item.get('dedupe_status') == 'failed':
|
||||
return (
|
||||
'duplicates',
|
||||
'dedupe',
|
||||
self._first_non_empty(item.get('dedupe_reason'), item.get('dedupe_status')),
|
||||
self._first_non_empty(item.get('dedupe_message'), self._default_reason('duplicates'))
|
||||
)
|
||||
|
||||
if item.get('match_status') == 'low_score':
|
||||
return (
|
||||
'low_score',
|
||||
'match',
|
||||
self._first_non_empty(item.get('match_reason'), item.get('match_status')),
|
||||
self._first_non_empty(item.get('match_message'), self._default_reason('low_score'))
|
||||
)
|
||||
|
||||
if item.get('match_status') in {'failed', 'not_found'}:
|
||||
return (
|
||||
'match_failed',
|
||||
'match',
|
||||
self._first_non_empty(item.get('match_reason'), item.get('match_status')),
|
||||
self._first_non_empty(item.get('match_message'), self._default_reason('match_failed'))
|
||||
)
|
||||
|
||||
if (
|
||||
item.get('preprocess_status') == 'failed'
|
||||
and item.get('preprocess_reason') == 'convert_failed'
|
||||
):
|
||||
return (
|
||||
'convert_failed',
|
||||
'preprocess',
|
||||
item.get('preprocess_reason'),
|
||||
self._first_non_empty(item.get('preprocess_message'), self._default_reason('convert_failed'))
|
||||
)
|
||||
|
||||
preprocess_reason = item.get('preprocess_reason') or ''
|
||||
if item.get('preprocess_status') == 'warning' and 'metadata_failed' in preprocess_reason:
|
||||
return (
|
||||
'missing_tags',
|
||||
'preprocess',
|
||||
preprocess_reason,
|
||||
self._first_non_empty(item.get('preprocess_message'), self._default_reason('missing_tags'))
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _default_reason(self, exception_type: str) -> str:
|
||||
defaults = {
|
||||
'missing_tags': '无法提取有效元数据',
|
||||
'duplicates': '检测到重复文件',
|
||||
'match_failed': '未能完成元数据匹配',
|
||||
'low_score': '匹配候选分数过低',
|
||||
'convert_failed': '音频转码失败',
|
||||
'organize_failed': '整理入库失败'
|
||||
}
|
||||
return defaults[exception_type]
|
||||
|
||||
def _available_actions_for(self, exception_type: str, item: dict, can_ingest: bool) -> list[str]:
|
||||
resolution_status = item.get('exception_resolution_status') or 'open'
|
||||
if resolution_status in {'resolved', 'ignored'}:
|
||||
return READ_ONLY_ACTIONS.copy()
|
||||
|
||||
actions = ACTION_RULES.get(exception_type, READ_ONLY_ACTIONS).copy()
|
||||
|
||||
if exception_type == 'low_score' and not (item.get('match_candidates_json') or []):
|
||||
actions = [action for action in actions if action != 'select_match_candidate']
|
||||
|
||||
if exception_type == 'organize_failed' and item.get('matched_metadata_json'):
|
||||
return actions
|
||||
|
||||
if exception_type == 'organize_failed':
|
||||
return [action for action in actions if action != 'retry_organize']
|
||||
|
||||
return actions
|
||||
|
||||
def _resolve_workflow_state(self, item: dict, effective_metadata: dict[str, Any]) -> str:
|
||||
resolution_status = item.get('exception_resolution_status') or 'open'
|
||||
resolution = item.get('exception_resolution_json') or {}
|
||||
if resolution_status == 'ignored':
|
||||
return 'ignored'
|
||||
if resolution_status == 'resolved':
|
||||
return 'deleted' if resolution.get('action') == 'delete_file' else 'ingested'
|
||||
|
||||
workflow_state = resolution.get('workflow_state')
|
||||
if workflow_state in {'open', 'candidate_selected', 'ready_to_ingest'}:
|
||||
if workflow_state == 'candidate_selected' and self._can_ingest(effective_metadata):
|
||||
return 'ready_to_ingest'
|
||||
return workflow_state
|
||||
|
||||
return 'ready_to_ingest' if self._can_ingest(effective_metadata) else 'open'
|
||||
|
||||
def _normalize_metadata(self, metadata: dict[str, Any] | None) -> dict[str, Any]:
|
||||
return normalize_metadata_shape(metadata)
|
||||
|
||||
def _build_effective_metadata(
|
||||
self,
|
||||
source_item: dict[str, Any],
|
||||
raw_metadata: dict[str, Any],
|
||||
matched_metadata: dict[str, Any],
|
||||
metadata_draft: dict[str, Any],
|
||||
normalization_cache: dict[str, dict[Any, Any]] | None = None
|
||||
) -> dict[str, Any]:
|
||||
metadata_patch = {key: value for key, value in metadata_draft.items() if value is not None}
|
||||
merged = merge_metadata_layers(raw_metadata, matched_metadata, metadata_patch)
|
||||
return self.metadata_normalizer.normalize_item(
|
||||
{
|
||||
**source_item,
|
||||
'original_tags_json': raw_metadata,
|
||||
'matched_metadata_json': matched_metadata
|
||||
},
|
||||
metadata_patch,
|
||||
cache=normalization_cache
|
||||
) if merged else {}
|
||||
|
||||
def _can_ingest(self, metadata: dict[str, Any]) -> bool:
|
||||
return can_ingest_metadata(metadata)
|
||||
|
||||
def _first_non_empty(self, *values: Any) -> Any:
|
||||
for value in values:
|
||||
if isinstance(value, str):
|
||||
if value.strip():
|
||||
return value
|
||||
continue
|
||||
if value is not None:
|
||||
return value
|
||||
return None
|
||||
Reference in New Issue
Block a user