398 lines
15 KiB
Python
398 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from .metadata_normalization import (
|
|
MetadataNormalizationService,
|
|
can_ingest_metadata,
|
|
merge_metadata_layers,
|
|
normalize_metadata_shape
|
|
)
|
|
from .task_constants import current_timestamp
|
|
|
|
|
|
EXCEPTION_TYPE_LABELS = {
|
|
'missing_tags': '元数据缺失',
|
|
'duplicates': '文件重复',
|
|
'match_failed': '匹配失败',
|
|
'low_score': '匹配分过低',
|
|
'convert_failed': '转码失败',
|
|
'organize_failed': '入库失败'
|
|
}
|
|
|
|
EXCEPTION_TYPES = tuple(EXCEPTION_TYPE_LABELS.keys())
|
|
READ_ONLY_ACTIONS: list[str] = []
|
|
ACTION_RULES = {
|
|
'missing_tags': ['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file'],
|
|
'match_failed': ['retry_match', 'edit_metadata', 'save_and_organize', 'ignore_exception', 'delete_file'],
|
|
'low_score': [
|
|
'retry_match',
|
|
'select_match_candidate',
|
|
'edit_metadata',
|
|
'save_and_organize',
|
|
'ignore_exception',
|
|
'delete_file'
|
|
],
|
|
'duplicates': ['keep_existing', 'replace_existing', 'keep_both_with_rename', 'ignore_exception', 'delete_file'],
|
|
'convert_failed': ['retry_preprocess', 'move_to_review_trash', 'ignore_exception', 'delete_file'],
|
|
'organize_failed': ['retry_organize', 'edit_target_path', 'move_to_review_trash', 'ignore_exception', 'delete_file']
|
|
}
|
|
|
|
|
|
class ExceptionItemNotFoundError(Exception):
|
|
pass
|
|
|
|
|
|
class ExceptionService:
|
|
def __init__(self, task_store):
|
|
self.task_store = task_store
|
|
self.metadata_normalizer = MetadataNormalizationService(task_store)
|
|
|
|
def get_summary(self) -> dict:
|
|
counts_by_type = {exception_type: 0 for exception_type in EXCEPTION_TYPES}
|
|
total = 0
|
|
|
|
for source_item in self.task_store.list_exception_source_items('all'):
|
|
exception_index = self._identify_exception_item(source_item)
|
|
if exception_index is None:
|
|
continue
|
|
counts_by_type[exception_index['exception_type']] += 1
|
|
total += 1
|
|
|
|
return {
|
|
'total': total,
|
|
'counts_by_type': counts_by_type,
|
|
'scanned_at': current_timestamp()
|
|
}
|
|
|
|
def get_items(
|
|
self,
|
|
exception_type: str = 'all',
|
|
page: int = 1,
|
|
page_size: int = 50,
|
|
resolution_status: str = 'open'
|
|
) -> dict:
|
|
if exception_type != 'all' and exception_type not in EXCEPTION_TYPES:
|
|
raise ValueError(f'Unsupported exception type: {exception_type}')
|
|
|
|
indexed_items = []
|
|
for source_item in self.task_store.list_exception_source_items(resolution_status):
|
|
exception_index = self._identify_exception_item(source_item)
|
|
if exception_index is None:
|
|
continue
|
|
if exception_type != 'all' and exception_index['exception_type'] != exception_type:
|
|
continue
|
|
indexed_items.append(exception_index)
|
|
|
|
total = len(indexed_items)
|
|
offset = (page - 1) * page_size
|
|
page_indexes = indexed_items[offset:offset + page_size]
|
|
normalization_cache = self.metadata_normalizer.create_cache()
|
|
return {
|
|
'items': [
|
|
self._build_exception_item(
|
|
page_index['source_item'],
|
|
include_detail=False,
|
|
exception_index=page_index,
|
|
normalization_cache=normalization_cache
|
|
)
|
|
for page_index in page_indexes
|
|
],
|
|
'page': page,
|
|
'page_size': page_size,
|
|
'total': total
|
|
}
|
|
|
|
def get_item(self, exception_id: int) -> dict:
|
|
source_item = self.task_store.get_exception_source_item(exception_id)
|
|
if source_item is None:
|
|
raise ExceptionItemNotFoundError(exception_id)
|
|
|
|
exception_item = self._build_exception_item(source_item, include_detail=True)
|
|
if exception_item is None:
|
|
raise ExceptionItemNotFoundError(exception_id)
|
|
return exception_item
|
|
|
|
def resolve_audio_path(self, exception_id: int) -> Path:
|
|
item = self.get_item(exception_id)
|
|
candidates = [
|
|
item.get('current_file_path'),
|
|
item.get('trash_file_path')
|
|
]
|
|
|
|
for candidate in candidates:
|
|
if not candidate:
|
|
continue
|
|
path = Path(candidate)
|
|
if path.exists() and path.is_file():
|
|
return path
|
|
|
|
raise FileNotFoundError(f'No playable audio found for exception item: {exception_id}')
|
|
|
|
def _identify_exception_item(self, source_item: dict) -> dict | None:
|
|
resolution = source_item.get('exception_resolution_json') or {}
|
|
exception_state = self._resolve_exception_state(source_item)
|
|
if exception_state is None:
|
|
exception_state = self._resolve_from_resolution_snapshot(source_item)
|
|
if exception_state is None:
|
|
return None
|
|
|
|
exception_type, exception_stage, exception_reason_code, exception_message = exception_state
|
|
return {
|
|
'source_item': source_item,
|
|
'resolution': resolution,
|
|
'exception_type': exception_type,
|
|
'exception_stage': exception_stage,
|
|
'exception_reason_code': exception_reason_code,
|
|
'exception_message': exception_message,
|
|
'display_reason': exception_message or self._default_reason(exception_type)
|
|
}
|
|
|
|
def _build_exception_item(
|
|
self,
|
|
source_item: dict,
|
|
*,
|
|
include_detail: bool,
|
|
exception_index: dict | None = None,
|
|
normalization_cache: dict[str, dict[Any, Any]] | None = None
|
|
) -> dict | None:
|
|
exception_index = exception_index or self._identify_exception_item(source_item)
|
|
if exception_index is None:
|
|
return None
|
|
|
|
resolution = exception_index['resolution']
|
|
raw_metadata = self._normalize_metadata(source_item.get('original_tags_json'))
|
|
matched_metadata = self._normalize_metadata(source_item.get('matched_metadata_json'))
|
|
metadata_draft = self._normalize_metadata(resolution.get('metadata_draft'))
|
|
effective_metadata = self._build_effective_metadata(
|
|
source_item,
|
|
raw_metadata,
|
|
matched_metadata,
|
|
metadata_draft,
|
|
normalization_cache
|
|
)
|
|
workflow_state = self._resolve_workflow_state(source_item, effective_metadata)
|
|
can_ingest = self._can_ingest(effective_metadata)
|
|
pending_ingest = (
|
|
source_item.get('exception_resolution_status') == 'open'
|
|
and workflow_state in {'candidate_selected', 'ready_to_ingest'}
|
|
)
|
|
display_title = self._first_non_empty(effective_metadata.get('title'), source_item.get('filename')) or '-'
|
|
|
|
payload = {
|
|
'exception_id': source_item['id'],
|
|
'task_id': source_item['task_id'],
|
|
'task_started_at': source_item['task_started_at'],
|
|
'exception_type': exception_index['exception_type'],
|
|
'exception_stage': exception_index['exception_stage'],
|
|
'exception_reason_code': exception_index['exception_reason_code'],
|
|
'exception_message': exception_index['exception_message'],
|
|
'captured_at': source_item['updated_at'],
|
|
'filename': source_item['filename'],
|
|
'relative_path': source_item['relative_path'],
|
|
'original_path': source_item['original_path'],
|
|
'current_file_path': source_item['current_file_path'],
|
|
'trash_file_path': source_item.get('trash_file_path'),
|
|
'audio_props_json': source_item.get('audio_props_json'),
|
|
'original_tags_json': source_item.get('original_tags_json'),
|
|
'matched_metadata_json': source_item.get('matched_metadata_json'),
|
|
'duplicate_of_path': source_item.get('duplicate_of_path'),
|
|
'dedupe_decision_json': source_item.get('dedupe_decision_json'),
|
|
'library_relative_path': source_item.get('library_relative_path'),
|
|
'library_file_path': source_item.get('library_file_path'),
|
|
'match_source': source_item.get('match_source'),
|
|
'match_confidence': source_item.get('match_confidence'),
|
|
'preview_available': False,
|
|
'available_actions': self._available_actions_for(exception_index['exception_type'], source_item, can_ingest),
|
|
'exception_resolution_status': source_item.get('exception_resolution_status') or 'open',
|
|
'exception_resolution_json': resolution,
|
|
'workflow_state': workflow_state,
|
|
'raw_metadata': raw_metadata,
|
|
'metadata_draft': metadata_draft,
|
|
'effective_metadata': effective_metadata,
|
|
'normalization_strategy': effective_metadata.get('normalization_strategy'),
|
|
'album_artist_reason': effective_metadata.get('album_artist_reason'),
|
|
'compilation': int(effective_metadata.get('compilation') or 0),
|
|
'can_ingest': can_ingest,
|
|
'pending_ingest': pending_ingest,
|
|
'display_title': display_title,
|
|
'display_reason': exception_index['display_reason'],
|
|
'type_label': EXCEPTION_TYPE_LABELS[exception_index['exception_type']]
|
|
}
|
|
|
|
if include_detail:
|
|
payload.update(
|
|
{
|
|
'preprocess_artifacts_json': source_item.get('preprocess_artifacts_json'),
|
|
'match_candidates_json': source_item.get('match_candidates_json'),
|
|
'match_enrichment_json': source_item.get('match_enrichment_json'),
|
|
'organize_decision_json': source_item.get('organize_decision_json')
|
|
}
|
|
)
|
|
|
|
return payload
|
|
|
|
def _resolve_from_resolution_snapshot(
|
|
self,
|
|
item: dict
|
|
) -> tuple[str, str, str | None, str | None] | None:
|
|
resolution = item.get('exception_resolution_json') or {}
|
|
snapshot = resolution.get('after_snapshot') or resolution.get('before_snapshot') or {}
|
|
exception_type = snapshot.get('exception_type')
|
|
if exception_type not in EXCEPTION_TYPES:
|
|
return None
|
|
return (
|
|
exception_type,
|
|
snapshot.get('exception_stage') or 'organize',
|
|
snapshot.get('exception_reason_code'),
|
|
snapshot.get('exception_message')
|
|
)
|
|
|
|
def _resolve_exception_state(
|
|
self,
|
|
item: dict
|
|
) -> tuple[str, str, str | None, str | None] | None:
|
|
if item.get('organize_status') in {'trashed', 'failed'}:
|
|
return (
|
|
'organize_failed',
|
|
'organize',
|
|
self._first_non_empty(item.get('organize_reason'), item.get('organize_status')),
|
|
self._first_non_empty(item.get('organize_message'), self._default_reason('organize_failed'))
|
|
)
|
|
|
|
if item.get('dedupe_status') == 'duplicate_trashed':
|
|
return (
|
|
'duplicates',
|
|
'dedupe',
|
|
self._first_non_empty(item.get('dedupe_reason'), item.get('dedupe_status')),
|
|
self._first_non_empty(item.get('dedupe_message'), self._default_reason('duplicates'))
|
|
)
|
|
|
|
if item.get('dedupe_status') == 'failed':
|
|
return (
|
|
'duplicates',
|
|
'dedupe',
|
|
self._first_non_empty(item.get('dedupe_reason'), item.get('dedupe_status')),
|
|
self._first_non_empty(item.get('dedupe_message'), self._default_reason('duplicates'))
|
|
)
|
|
|
|
if item.get('match_status') == 'low_score':
|
|
return (
|
|
'low_score',
|
|
'match',
|
|
self._first_non_empty(item.get('match_reason'), item.get('match_status')),
|
|
self._first_non_empty(item.get('match_message'), self._default_reason('low_score'))
|
|
)
|
|
|
|
if item.get('match_status') in {'failed', 'not_found'}:
|
|
return (
|
|
'match_failed',
|
|
'match',
|
|
self._first_non_empty(item.get('match_reason'), item.get('match_status')),
|
|
self._first_non_empty(item.get('match_message'), self._default_reason('match_failed'))
|
|
)
|
|
|
|
if (
|
|
item.get('preprocess_status') == 'failed'
|
|
and item.get('preprocess_reason') == 'convert_failed'
|
|
):
|
|
return (
|
|
'convert_failed',
|
|
'preprocess',
|
|
item.get('preprocess_reason'),
|
|
self._first_non_empty(item.get('preprocess_message'), self._default_reason('convert_failed'))
|
|
)
|
|
|
|
preprocess_reason = item.get('preprocess_reason') or ''
|
|
if item.get('preprocess_status') == 'warning' and 'metadata_failed' in preprocess_reason:
|
|
return (
|
|
'missing_tags',
|
|
'preprocess',
|
|
preprocess_reason,
|
|
self._first_non_empty(item.get('preprocess_message'), self._default_reason('missing_tags'))
|
|
)
|
|
|
|
return None
|
|
|
|
def _default_reason(self, exception_type: str) -> str:
|
|
defaults = {
|
|
'missing_tags': '无法提取有效元数据',
|
|
'duplicates': '检测到重复文件',
|
|
'match_failed': '未能完成元数据匹配',
|
|
'low_score': '匹配候选分数过低',
|
|
'convert_failed': '音频转码失败',
|
|
'organize_failed': '整理入库失败'
|
|
}
|
|
return defaults[exception_type]
|
|
|
|
def _available_actions_for(self, exception_type: str, item: dict, can_ingest: bool) -> list[str]:
|
|
resolution_status = item.get('exception_resolution_status') or 'open'
|
|
if resolution_status in {'resolved', 'ignored'}:
|
|
return READ_ONLY_ACTIONS.copy()
|
|
|
|
actions = ACTION_RULES.get(exception_type, READ_ONLY_ACTIONS).copy()
|
|
|
|
if exception_type == 'low_score' and not (item.get('match_candidates_json') or []):
|
|
actions = [action for action in actions if action != 'select_match_candidate']
|
|
|
|
if exception_type == 'organize_failed' and item.get('matched_metadata_json'):
|
|
return actions
|
|
|
|
if exception_type == 'organize_failed':
|
|
return [action for action in actions if action != 'retry_organize']
|
|
|
|
return actions
|
|
|
|
def _resolve_workflow_state(self, item: dict, effective_metadata: dict[str, Any]) -> str:
|
|
resolution_status = item.get('exception_resolution_status') or 'open'
|
|
resolution = item.get('exception_resolution_json') or {}
|
|
if resolution_status == 'ignored':
|
|
return 'ignored'
|
|
if resolution_status == 'resolved':
|
|
return 'deleted' if resolution.get('action') == 'delete_file' else 'ingested'
|
|
|
|
workflow_state = resolution.get('workflow_state')
|
|
if workflow_state in {'open', 'candidate_selected', 'ready_to_ingest'}:
|
|
if workflow_state == 'candidate_selected' and self._can_ingest(effective_metadata):
|
|
return 'ready_to_ingest'
|
|
return workflow_state
|
|
|
|
return 'ready_to_ingest' if self._can_ingest(effective_metadata) else 'open'
|
|
|
|
def _normalize_metadata(self, metadata: dict[str, Any] | None) -> dict[str, Any]:
|
|
return normalize_metadata_shape(metadata)
|
|
|
|
def _build_effective_metadata(
|
|
self,
|
|
source_item: dict[str, Any],
|
|
raw_metadata: dict[str, Any],
|
|
matched_metadata: dict[str, Any],
|
|
metadata_draft: dict[str, Any],
|
|
normalization_cache: dict[str, dict[Any, Any]] | None = None
|
|
) -> dict[str, Any]:
|
|
metadata_patch = {key: value for key, value in metadata_draft.items() if value is not None}
|
|
merged = merge_metadata_layers(raw_metadata, matched_metadata, metadata_patch)
|
|
return self.metadata_normalizer.normalize_item(
|
|
{
|
|
**source_item,
|
|
'original_tags_json': raw_metadata,
|
|
'matched_metadata_json': matched_metadata
|
|
},
|
|
metadata_patch,
|
|
cache=normalization_cache
|
|
) if merged else {}
|
|
|
|
def _can_ingest(self, metadata: dict[str, Any]) -> bool:
|
|
return can_ingest_metadata(metadata)
|
|
|
|
def _first_non_empty(self, *values: Any) -> Any:
|
|
for value in values:
|
|
if isinstance(value, str):
|
|
if value.strip():
|
|
return value
|
|
continue
|
|
if value is not None:
|
|
return value
|
|
return None
|