MusicWorkshop/backend/app/library_postprocess.py

import os
import re
import shutil
import time
import unicodedata
from pathlib import Path

from . import library_index
from .scanner import ALLOWED_AUDIO_EXTENSIONS
from .task_constants import (
  DEDUPE_PROGRESS_BATCH_SIZE,
  DEDUPE_PROGRESS_INTERVAL_SECONDS,
  ORGANIZE_PROGRESS_BATCH_SIZE,
  ORGANIZE_PROGRESS_INTERVAL_SECONDS,
  TASK_STATUS_RUNNING
)


LOSSLESS_EXTENSIONS = library_index.LOSSLESS_EXTENSIONS
PRESERVED_VERSION_TOKENS = library_index.PRESERVED_VERSION_TOKENS
REPLACE_SCORE_THRESHOLD = 15.0
MAX_PATH_COMPONENT_LENGTH = 96


class DedupeItemError(Exception):
  def __init__(self, reason: str, message: str):
    super().__init__(message)
    self.reason = reason
    self.message = message


class OrganizeItemError(Exception):
  def __init__(self, reason: str, message: str):
    super().__init__(message)
    self.reason = reason
    self.message = message


class DedupeRunner:
  def __init__(self, task_store, preprocessor, task_stream):
    self.task_store = task_store
    self.preprocessor = preprocessor
    self.task_stream = task_stream

  def run(self, task_id: str, current_stats: dict, config_snapshot: dict):
    dedupe_stats = current_stats['dedupe'].copy()
    candidates = self.task_store.list_dedupe_candidate_items(task_id)
    dedupe_stats['input_items'] = len(candidates)
    current_stats['dedupe'] = dedupe_stats.copy()
    self._persist_progress(task_id, current_stats, dedupe_stats)

    if not candidates:
      return

    library_index = self._index_library_files(config_snapshot['output'])
    dedupe_stats['library_candidates'] = library_index['count']
    current_stats['dedupe'] = dedupe_stats.copy()
    self.task_stream.broadcast_event(
      task_id,
      'dedupe.library_indexed',
      'dedupe',
      {'count': library_index['count']}
    )
    self._append_log(
      task_id,
      level='info',
      event_type='dedupe.library_indexed',
      message=f'已索引输出库音频: {library_index["count"]} 个',
      payload={'count': library_index['count']}
    )

    groups = _group_batch_candidates(candidates)
    winners: list[dict] = []
    processed_count = 0
    last_progress_at = time.monotonic()

    for group in groups:
      running_items = [
        self.task_store.update_task_item(
          item['id'],
          dedupe_status='running',
          dedupe_reason=None,
          dedupe_message=None,
          dedupe_group_key=None,
          duplicate_of_path=None,
          duplicate_of_item_id=None,
          dedupe_decision_json=None
        )
        for item in group
      ]
      winner, batch_duplicates, identity_basis, group_key = _select_batch_winner(running_items)

      for duplicate_item in batch_duplicates:
        try:
          self.task_stream.broadcast_event(
            task_id,
            'dedupe.lookup_started',
            'dedupe',
            {'item': duplicate_item}
          )
          self._append_log(
            task_id,
            level='info',
            event_type='dedupe.lookup_started',
            message=f'开始比对重复项: {duplicate_item["relative_path"]}',
            payload={'item': duplicate_item}
          )
          dedupe_stats['batch_duplicates'] += 1
          trashed_path = self._move_file_to_trash(
            config_snapshot['trash'],
            'duplicates',
            task_id,
            duplicate_item['id'],
            duplicate_item['current_file_path']
          )
          final_item = self.task_store.update_task_item(
            duplicate_item['id'],
            is_active=0,
            current_file_path=trashed_path,
            trash_file_path=trashed_path,
            dedupe_status='duplicate_trashed',
            dedupe_reason='batch_duplicate',
            dedupe_message='当前批次中存在更优文件，已移入回收站',
            dedupe_group_key=group_key,
            duplicate_of_path=winner['current_file_path'],
            duplicate_of_item_id=winner['id'],
            dedupe_decision_json={
              'comparison_scope': 'batch',
              'identity_basis': identity_basis,
              'quality_breakdown': {
                'kept': _build_quality_breakdown(winner),
                'trashed': _build_quality_breakdown(duplicate_item)
              },
              'kept_side': 'batch',
              'trashed_path': trashed_path,
              'replaced_existing_path': None,
              'compared_candidates': [
                _serialize_compared_candidate('kept', winner),
                _serialize_compared_candidate('trashed', duplicate_item)
              ]
            }
          )
          self.task_stream.broadcast_event(
            task_id,
            'dedupe.item_duplicate',
            'dedupe',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='warning',
            event_type='dedupe.item_duplicate',
            message=f'批次重复已淘汰: {duplicate_item["relative_path"]}',
            payload={'item': final_item}
          )
        except DedupeItemError as error:
          dedupe_stats['failed_items'] += 1
          final_item = self.task_store.update_task_item(
            duplicate_item['id'],
            dedupe_status='failed',
            dedupe_reason=error.reason,
            dedupe_message=error.message,
            dedupe_group_key=group_key,
            duplicate_of_path=winner['current_file_path'],
            duplicate_of_item_id=winner['id']
          )
          self.task_stream.broadcast_event(
            task_id,
            'dedupe.item_failed',
            'dedupe',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='error',
            event_type='dedupe.item_failed',
            message=f'重复检测失败: {duplicate_item["relative_path"]}',
            payload={'item': final_item}
          )
        processed_count += 1
        last_progress_at = self._maybe_persist_progress(
          task_id,
          current_stats,
          dedupe_stats,
          processed_count,
          last_progress_at
        )

      winners.append(
        self.task_store.update_task_item(
          winner['id'],
          dedupe_status='running',
          dedupe_group_key=group_key
        )
      )

    replace_enabled = bool(
      (config_snapshot.get('advancedStrategy') or {}).get('replaceLowQualityDuplicates')
    )

    for winner in winners:
      if not winner['is_active']:
        continue

      self.task_stream.broadcast_event(
        task_id,
        'dedupe.lookup_started',
        'dedupe',
        {'item': winner}
      )
      self._append_log(
        task_id,
        level='info',
        event_type='dedupe.lookup_started',
        message=f'开始比对重复项: {winner["relative_path"]}',
        payload={'item': winner}
      )
      identity_basis, identity_key = _choose_primary_identity(winner)
      if not identity_basis or not identity_key:
        unique_item = self.task_store.update_task_item(
          winner['id'],
          dedupe_status='unique',
          dedupe_reason=None,
          dedupe_message='未发现重复项',
          dedupe_group_key=winner.get('dedupe_group_key') or f'item:{winner["id"]}',
          dedupe_decision_json={
            'comparison_scope': 'none',
            'identity_basis': None,
            'quality_breakdown': {'kept': _build_quality_breakdown(winner)},
            'kept_side': 'batch',
            'trashed_path': None,
            'replaced_existing_path': None,
            'compared_candidates': [_serialize_compared_candidate('kept', winner)]
          }
        )
        dedupe_stats['kept_items'] += 1
        self.task_stream.broadcast_event(
          task_id,
          'dedupe.item_unique',
          'dedupe',
          {'item': unique_item}
        )
        self._append_log(
          task_id,
          level='success',
          event_type='dedupe.item_unique',
          message=f'未发现重复项，保留文件: {winner["relative_path"]}',
          payload={'item': unique_item}
        )
        processed_count += 1
        last_progress_at = self._maybe_persist_progress(
          task_id,
          current_stats,
          dedupe_stats,
          processed_count,
          last_progress_at
        )
        continue

      library_candidates = library_index['by_basis'].get(identity_basis, {}).get(identity_key, [])
      if not library_candidates:
        unique_item = self.task_store.update_task_item(
          winner['id'],
          dedupe_status='unique',
          dedupe_reason=None,
          dedupe_message='未发现库内重复项',
          dedupe_group_key=winner.get('dedupe_group_key') or identity_key,
          dedupe_decision_json={
            'comparison_scope': 'library',
            'identity_basis': identity_basis,
            'quality_breakdown': {'kept': _build_quality_breakdown(winner)},
            'kept_side': 'batch',
            'trashed_path': None,
            'replaced_existing_path': None,
            'compared_candidates': [_serialize_compared_candidate('kept', winner)]
          }
        )
        dedupe_stats['kept_items'] += 1
        self.task_stream.broadcast_event(
          task_id,
          'dedupe.item_unique',
          'dedupe',
          {'item': unique_item}
        )
        self._append_log(
          task_id,
          level='success',
          event_type='dedupe.item_unique',
          message=f'未发现库内重复项，保留文件: {winner["relative_path"]}',
          payload={'item': unique_item}
        )
        processed_count += 1
        last_progress_at = self._maybe_persist_progress(
          task_id,
          current_stats,
          dedupe_stats,
          processed_count,
          last_progress_at
        )
        continue

      library_item = max(library_candidates, key=lambda candidate: _build_quality_breakdown(candidate)['total'])
      dedupe_stats['library_duplicates'] += 1
      winner_quality = _build_quality_breakdown(winner)
      library_quality = _build_quality_breakdown(library_item)

      try:
        if replace_enabled and winner_quality['total'] >= library_quality['total'] + REPLACE_SCORE_THRESHOLD:
          replaced_path = self._move_file_to_trash(
            config_snapshot['trash'],
            'duplicates',
            task_id,
            winner['id'],
            library_item['file_path']
          )
          final_item = self.task_store.update_task_item(
            winner['id'],
            dedupe_status='duplicate_replaced',
            dedupe_reason='replaced_library_duplicate',
            dedupe_message='当前文件质量明显更高，已替换库内旧文件',
            dedupe_group_key=winner.get('dedupe_group_key') or identity_key,
            duplicate_of_path=library_item['file_path'],
            duplicate_of_item_id=None,
            dedupe_decision_json={
              'comparison_scope': 'library',
              'identity_basis': identity_basis,
              'quality_breakdown': {
                'kept': winner_quality,
                'replaced': library_quality
              },
              'kept_side': 'batch',
              'trashed_path': replaced_path,
              'replaced_existing_path': library_item['file_path'],
              'compared_candidates': [
                _serialize_compared_candidate('kept', winner),
                _serialize_compared_candidate('replaced', library_item)
              ]
            }
          )
          dedupe_stats['replaced_library_items'] += 1
          dedupe_stats['kept_items'] += 1
          self.task_stream.broadcast_event(
            task_id,
            'dedupe.item_replaced',
            'dedupe',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='success',
            event_type='dedupe.item_replaced',
            message=f'已替换库内旧文件: {winner["relative_path"]}',
            payload={'item': final_item}
          )
        else:
          trashed_path = self._move_file_to_trash(
            config_snapshot['trash'],
            'duplicates',
            task_id,
            winner['id'],
            winner['current_file_path']
          )
          final_item = self.task_store.update_task_item(
            winner['id'],
            is_active=0,
            current_file_path=trashed_path,
            trash_file_path=trashed_path,
            dedupe_status='duplicate_trashed',
            dedupe_reason='library_duplicate',
            dedupe_message='输出库中已存在重复文件，保留库内文件',
            dedupe_group_key=winner.get('dedupe_group_key') or identity_key,
            duplicate_of_path=library_item['file_path'],
            duplicate_of_item_id=None,
            dedupe_decision_json={
              'comparison_scope': 'library',
              'identity_basis': identity_basis,
              'quality_breakdown': {
                'kept': library_quality,
                'trashed': winner_quality
              },
              'kept_side': 'library',
              'trashed_path': trashed_path,
              'replaced_existing_path': None,
              'compared_candidates': [
                _serialize_compared_candidate('kept', library_item),
                _serialize_compared_candidate('trashed', winner)
              ]
            }
          )
          self.task_stream.broadcast_event(
            task_id,
            'dedupe.item_duplicate',
            'dedupe',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='warning',
            event_type='dedupe.item_duplicate',
            message=f'输出库已存在重复文件，已淘汰: {winner["relative_path"]}',
            payload={'item': final_item}
          )
      except DedupeItemError as error:
        dedupe_stats['failed_items'] += 1
        final_item = self.task_store.update_task_item(
          winner['id'],
          dedupe_status='failed',
          dedupe_reason=error.reason,
          dedupe_message=error.message,
          dedupe_group_key=winner.get('dedupe_group_key') or identity_key,
          duplicate_of_path=library_item['file_path']
        )
        self.task_stream.broadcast_event(
          task_id,
          'dedupe.item_failed',
          'dedupe',
          {'item': final_item}
        )
        self._append_log(
          task_id,
          level='error',
          event_type='dedupe.item_failed',
          message=f'重复检测失败: {winner["relative_path"]}',
          payload={'item': final_item}
        )

      processed_count += 1
      last_progress_at = self._maybe_persist_progress(
        task_id,
        current_stats,
        dedupe_stats,
        processed_count,
        last_progress_at
      )

    self._persist_progress(task_id, current_stats, dedupe_stats)

  def _index_library_files(self, output_dir: str) -> dict:
    return library_index.build_library_index(
      output_dir,
      probe_audio=self._safe_probe_audio,
      read_tags=self._safe_read_library_tags
    )

  def _safe_probe_audio(self, file_path: str) -> dict:
    try:
      return self.preprocessor.probe_audio(file_path)
    except Exception:
      return {}

  def _safe_read_library_tags(self, file_path: str) -> dict:
    return library_index.safe_read_tags(library_index.default_read_library_tags, file_path)

  def _move_file_to_trash(
    self,
    trash_root: str,
    reason: str,
    task_id: str,
    item_id: int | None,
    source_path: str
  ) -> str:
    source = Path(source_path)
    if not source.exists():
      raise DedupeItemError('source_missing', f'源文件不存在: {source}')
    destination = _build_unique_destination(
      Path(trash_root) / reason / task_id,
      _build_prefixed_name(item_id, source.name)
    )
    destination.parent.mkdir(parents=True, exist_ok=True)
    try:
      self._move_file(source, destination)
    except OSError as error:
      raise DedupeItemError('trash_move_failed', f'移动到回收站失败: {error}') from error
    return str(destination.resolve(strict=False))

  def _move_file(self, source: Path, destination: Path):
    shutil.move(str(source), str(destination))

  def _persist_progress(self, task_id: str, current_stats: dict, dedupe_stats: dict[str, int]):
    current_stats['dedupe'] = dedupe_stats.copy()
    self.task_store.update_task(
      task_id,
      status=TASK_STATUS_RUNNING,
      current_stage='dedupe',
      stats=current_stats
    )
    self.task_stream.broadcast_event(
      task_id,
      'dedupe.progress',
      'dedupe',
      {'stats': current_stats}
    )

  def _append_log(
    self,
    task_id: str,
    *,
    level: str,
    event_type: str,
    message: str,
    payload: dict | None = None
  ):
    persisted_log = self.task_store.append_log(
      task_id,
      'dedupe',
      level,
      event_type,
      message,
      payload
    )
    self.task_stream.broadcast_event(
      task_id,
      'log.appended',
      'dedupe',
      {'log': persisted_log}
    )

  def _maybe_persist_progress(
    self,
    task_id: str,
    current_stats: dict,
    dedupe_stats: dict[str, int],
    processed_count: int,
    last_progress_at: float
  ) -> float:
    now = time.monotonic()
    if (
      processed_count % DEDUPE_PROGRESS_BATCH_SIZE == 0
      or now - last_progress_at >= DEDUPE_PROGRESS_INTERVAL_SECONDS
    ):
      self._persist_progress(task_id, current_stats, dedupe_stats)
      return now
    return last_progress_at


class OrganizeRunner:
  def __init__(self, task_store, task_stream):
    self.task_store = task_store
    self.task_stream = task_stream

  def run(self, task_id: str, current_stats: dict, config_snapshot: dict):
    organize_stats = current_stats['organize'].copy()
    candidates = self.task_store.list_organize_candidate_items(task_id)
    organize_stats['input_items'] = len(candidates)
    current_stats['organize'] = organize_stats.copy()
    self._persist_progress(task_id, current_stats, organize_stats)

    if not candidates:
      return

    output_root = Path(config_snapshot['output']).expanduser().resolve(strict=False)
    trash_root = Path(config_snapshot['trash']).expanduser().resolve(strict=False)
    processed_count = 0
    last_progress_at = time.monotonic()

    for original_item in candidates:
      item = self.task_store.update_task_item(
        original_item['id'],
        organize_status='running',
        organize_reason=None,
        organize_message=None,
        library_relative_path=None,
        library_file_path=None,
        organize_decision_json=None
      )

      try:
        plan = _build_organize_plan(output_root, item)
        self.task_stream.broadcast_event(
          task_id,
          'organize.path_planned',
          'organize',
          {
            'item': item,
            'planned_relative_path': plan['planned_relative_path']
          }
        )
        self._append_log(
          task_id,
          level='info',
          event_type='organize.path_planned',
          message=f'已规划入库路径: {item["relative_path"]}',
          payload={
            'item': item,
            'planned_relative_path': plan['planned_relative_path']
          }
        )
        final_path, collision_count = self._resolve_destination(
          output_root / plan['planned_relative_path'],
          Path(item['current_file_path'])
        )
        final_path.parent.mkdir(parents=True, exist_ok=True)
        source_path = Path(item['current_file_path'])
        if not source_path.exists():
          raise OrganizeItemError('source_missing', f'源文件不存在: {source_path}')
        self._move_file(source_path, final_path)
        final_relative_path = final_path.relative_to(output_root).as_posix()
        renamed = Path(item['current_file_path']).name != final_path.name
        moved = Path(item['current_file_path']).resolve(strict=False) != final_path.resolve(strict=False)

        if moved:
          organize_stats['moved_items'] += 1
        if renamed:
          organize_stats['renamed_items'] += 1
        if collision_count > 1:
          organize_stats['collision_resolved'] += 1

        final_item = self.task_store.update_task_item(
          item['id'],
          current_file_path=str(final_path.resolve(strict=False)),
          filename=final_path.name,
          organize_status='organized',
          organize_reason=None,
          organize_message='已按标准路径入库',
          library_relative_path=final_relative_path,
          library_file_path=str(final_path.resolve(strict=False)),
          organize_decision_json={
            'source_path': item['current_file_path'],
            'planned_relative_path': plan['planned_relative_path'],
            'final_relative_path': final_relative_path,
            'collision_strategy': 'suffix' if collision_count > 1 else 'none',
            'trashed_on_failure': None,
            'final_action': 'organized'
          }
        )
        self.task_stream.broadcast_event(
          task_id,
          'organize.item_organized',
          'organize',
          {'item': final_item}
        )
        self._append_log(
          task_id,
          level='success',
          event_type='organize.item_organized',
          message=f'文件已入库: {final_relative_path}',
          payload={'item': final_item}
        )
      except OrganizeItemError as error:
        organize_stats['failed_items'] += 1
        final_item = self._handle_failure(task_id, item, error, output_root, trash_root)
        if final_item['organize_status'] == 'trashed':
          organize_stats['trashed_items'] += 1
          self.task_stream.broadcast_event(
            task_id,
            'organize.item_trashed',
            'organize',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='warning',
            event_type='organize.item_trashed',
            message=f'入库失败后已移入回收站: {item["relative_path"]}',
            payload={'item': final_item}
          )
        else:
          self.task_stream.broadcast_event(
            task_id,
            'organize.item_failed',
            'organize',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='error',
            event_type='organize.item_failed',
            message=f'整理入库失败: {item["relative_path"]}',
            payload={'item': final_item}
          )
      except OSError as error:
        organize_stats['failed_items'] += 1
        final_item = self._handle_failure(
          task_id,
          item,
          OrganizeItemError('move_failed', f'整理入库失败: {error}'),
          output_root,
          trash_root
        )
        if final_item['organize_status'] == 'trashed':
          organize_stats['trashed_items'] += 1
          self.task_stream.broadcast_event(
            task_id,
            'organize.item_trashed',
            'organize',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='warning',
            event_type='organize.item_trashed',
            message=f'入库失败后已移入回收站: {item["relative_path"]}',
            payload={'item': final_item}
          )
        else:
          self.task_stream.broadcast_event(
            task_id,
            'organize.item_failed',
            'organize',
            {'item': final_item}
          )
          self._append_log(
            task_id,
            level='error',
            event_type='organize.item_failed',
            message=f'整理入库失败: {item["relative_path"]}',
            payload={'item': final_item}
          )

      processed_count += 1
      last_progress_at = self._maybe_persist_progress(
        task_id,
        current_stats,
        organize_stats,
        processed_count,
        last_progress_at
      )

    self._persist_progress(task_id, current_stats, organize_stats)

  def _handle_failure(
    self,
    task_id: str,
    item: dict,
    error: OrganizeItemError,
    output_root: Path,
    trash_root: Path
  ) -> dict:
    source_path = Path(item['current_file_path'])
    trashed_path = None
    final_status = 'failed'
    message = error.message

    if source_path.exists():
      destination = _build_unique_destination(
        trash_root / 'organize_failed' / task_id,
        _build_prefixed_name(item['id'], source_path.name)
      )
      destination.parent.mkdir(parents=True, exist_ok=True)
      try:
        self._move_file(source_path, destination)
        trashed_path = str(destination.resolve(strict=False))
        final_status = 'trashed'
      except OSError as trash_error:
        message = f'{error.message}; 移入回收站失败: {trash_error}'

    return self.task_store.update_task_item(
      item['id'],
      current_file_path=trashed_path or item['current_file_path'],
      trash_file_path=trashed_path,
      organize_status=final_status,
      organize_reason=error.reason,
      organize_message=message,
      organize_decision_json={
        'source_path': item['current_file_path'],
        'planned_relative_path': None,
        'final_relative_path': None,
        'collision_strategy': 'none',
        'trashed_on_failure': trashed_path,
        'final_action': final_status
      }
    )

  def _resolve_destination(self, desired_path: Path, source_path: Path) -> tuple[Path, int]:
    candidate = desired_path
    collision_index = 1

    while candidate.exists():
      if candidate.resolve(strict=False) == source_path.resolve(strict=False):
        return candidate, collision_index
      collision_index += 1
      candidate = candidate.with_name(
        f'{desired_path.stem} ({collision_index}){desired_path.suffix}'
      )

    return candidate, collision_index

  def _move_file(self, source: Path, destination: Path):
    shutil.move(str(source), str(destination))

  def _persist_progress(self, task_id: str, current_stats: dict, organize_stats: dict[str, int]):
    current_stats['organize'] = organize_stats.copy()
    self.task_store.update_task(
      task_id,
      status=TASK_STATUS_RUNNING,
      current_stage='organize',
      stats=current_stats
    )
    self.task_stream.broadcast_event(
      task_id,
      'organize.progress',
      'organize',
      {'stats': current_stats}
    )

  def _append_log(
    self,
    task_id: str,
    *,
    level: str,
    event_type: str,
    message: str,
    payload: dict | None = None
  ):
    persisted_log = self.task_store.append_log(
      task_id,
      'organize',
      level,
      event_type,
      message,
      payload
    )
    self.task_stream.broadcast_event(
      task_id,
      'log.appended',
      'organize',
      {'log': persisted_log}
    )

  def _maybe_persist_progress(
    self,
    task_id: str,
    current_stats: dict,
    organize_stats: dict[str, int],
    processed_count: int,
    last_progress_at: float
  ) -> float:
    now = time.monotonic()
    if (
      processed_count % ORGANIZE_PROGRESS_BATCH_SIZE == 0
      or now - last_progress_at >= ORGANIZE_PROGRESS_INTERVAL_SECONDS
    ):
      self._persist_progress(task_id, current_stats, organize_stats)
      return now
    return last_progress_at


def _group_batch_candidates(items: list[dict]) -> list[list[dict]]:
  groups: list[list[dict]] = []
  indexed_groups: dict[tuple[str, str], list[dict]] = {}

  for item in items:
    identity_keys = _identity_keys_for_item(item, include_fingerprint=True)
    target_group = None
    for identity_basis, identity_key in identity_keys:
      target_group = indexed_groups.get((identity_basis, identity_key))
      if target_group is not None:
        break
    if target_group is None:
      target_group = [item]
      groups.append(target_group)
      if identity_keys:
        for identity_basis, identity_key in identity_keys:
          indexed_groups.setdefault((identity_basis, identity_key), target_group)
      else:
        indexed_groups[(f'item:{item["id"]}', str(item['id']))] = target_group
    else:
      target_group.append(item)

  return groups


def _select_batch_winner(group: list[dict]) -> tuple[dict, list[dict], str | None, str]:
  winner = group[0]
  for candidate in group[1:]:
    if _compare_batch_candidates(candidate, winner) < 0:
      winner = candidate
  identity_basis, identity_key = _choose_primary_identity(winner)
  return winner, [item for item in group if item['id'] != winner['id']], identity_basis, identity_key or f'item:{winner["id"]}'


def _compare_batch_candidates(left: dict, right: dict) -> int:
  left_quality = _build_quality_breakdown(left)
  right_quality = _build_quality_breakdown(right)
  if left_quality['total'] != right_quality['total']:
    return -1 if left_quality['total'] > right_quality['total'] else 1

  left_authority = (1 if left.get('match_is_authoritative') else 0, left.get('match_confidence') or 0)
  right_authority = (1 if right.get('match_is_authoritative') else 0, right.get('match_confidence') or 0)
  if left_authority != right_authority:
    return -1 if left_authority > right_authority else 1

  left_created = left.get('created_at') or ''
  right_created = right.get('created_at') or ''
  if left_created != right_created:
    return -1 if left_created < right_created else 1
  if left['id'] != right['id']:
    return -1 if left['id'] < right['id'] else 1
  return 0


def _build_library_metadata(tags: dict, audio_props: dict, file_path: Path) -> dict:
  return library_index.build_library_metadata(tags, audio_props, file_path)


def _identity_keys_for_item(item: dict, *, include_fingerprint: bool) -> list[tuple[str, str]]:
  return library_index.identity_keys_for_item(item, include_fingerprint=include_fingerprint)


def _choose_primary_identity(item: dict) -> tuple[str | None, str | None]:
  return library_index.choose_primary_identity(item)


def _build_quality_breakdown(item: dict) -> dict:
  audio_props = item.get('audio_props_json') or {}
  duration_seconds = _first_non_empty(
    (item.get('matched_metadata_json') or {}).get('duration_seconds'),
    item.get('fingerprint_duration_seconds'),
    audio_props.get('duration_seconds')
  )
  bit_depth = _safe_float(audio_props.get('bit_depth'))
  sample_rate = _safe_float(audio_props.get('sample_rate'))
  bitrate = _safe_float(audio_props.get('bitrate'))
  channels = _safe_float(audio_props.get('channels'))
  size_bytes = _safe_float(item.get('size_bytes'))
  extension = (item.get('extension') or Path(item.get('current_file_path') or '').suffix).lower()
  is_lossless = extension in LOSSLESS_EXTENSIONS or str(audio_props.get('codec') or '').upper() == 'FLAC'

  breakdown = {
    'bit_depth': round(min((bit_depth or 0) / 24.0, 1.0) * 30.0, 2),
    'sample_rate': round(min((sample_rate or 0) / 96000.0, 1.0) * 20.0, 2),
    'bitrate': round(min((bitrate or 0) / 320000.0, 1.0) * 20.0, 2),
    'lossless': 15.0 if is_lossless else 0.0,
    'channels': round(min((channels or 0) / 2.0, 1.0) * 5.0, 2),
    'size_duration_consistency': 0.0,
    'match_quality': round(
      (3.0 if item.get('match_is_authoritative') else 0.0)
      + min((_safe_float(item.get('match_confidence')) or 0.0) / 100.0, 1.0) * 2.0,
      2
    )
  }
  if size_bytes and duration_seconds and _safe_float(duration_seconds) and size_bytes / _safe_float(duration_seconds) > 1000:
    breakdown['size_duration_consistency'] = 5.0
  breakdown['total'] = round(sum(value for key, value in breakdown.items() if key != 'total'), 2)
  return breakdown


def _serialize_compared_candidate(side: str, item: dict) -> dict:
  return {
    'side': side,
    'item_id': item.get('id'),
    'path': item.get('file_path') or item.get('current_file_path'),
    'relative_path': item.get('relative_path'),
    'quality_score': _build_quality_breakdown(item)['total']
  }


def _build_organize_plan(output_root: Path, item: dict) -> dict:
  metadata = item.get('matched_metadata_json') or {}
  album_artist = _sanitize_path_component(
    _first_non_empty(metadata.get('album_artist'), metadata.get('artist'), 'Unknown Artist')
  )
  if not album_artist:
    raise OrganizeItemError('invalid_target_path', '无法生成有效的 Album Artist 目录')

  title = _sanitize_path_component(
    _first_non_empty(metadata.get('title'), Path(item.get('current_file_path') or item['filename']).stem)
  )
  if not title:
    raise OrganizeItemError('invalid_target_path', '无法生成有效的标题文件名')

  year = _extract_year(_first_non_empty(metadata.get('release_date'), metadata.get('year')))
  track_number = _parse_track_number(metadata.get('track_number')) or 1
  disc_number = _parse_track_number(metadata.get('disc_number'))
  extension = (item.get('extension') or Path(item.get('current_file_path') or '').suffix).lower()
  bucket = _bucket_letter(album_artist)
  album = _sanitize_path_component(metadata.get('album'))

  filename = f'{track_number:02d} - {title}{extension}'
  if album and album.lower() not in {'single', 'singles'}:
    path_parts = [bucket, album_artist, album]
    if disc_number and disc_number > 1:
      path_parts.append(f'Disc {disc_number}')
    path_parts.append(filename)
  else:
    year_label = str(year) if year else 'Unknown Year'
    single_dir = _sanitize_path_component(f'{year_label} - {title}')
    path_parts = [bucket, album_artist, 'Singles', single_dir, f'01 - {title}{extension}']

  planned_relative_path = Path(*path_parts).as_posix()
  return {
    'output_root': output_root,
    'planned_relative_path': planned_relative_path
  }


def _build_prefixed_name(item_id: int | None, filename: str) -> str:
  safe_name = _sanitize_path_component(Path(filename).name, fallback='file')
  return f'{item_id}_{safe_name}' if item_id is not None else safe_name


def _build_unique_destination(directory: Path, filename: str) -> Path:
  candidate = directory / filename
  if not candidate.exists():
    return candidate
  stem = candidate.stem
  suffix = candidate.suffix
  counter = 2
  while True:
    next_candidate = candidate.with_name(f'{stem} ({counter}){suffix}')
    if not next_candidate.exists():
      return next_candidate
    counter += 1


def _bucket_letter(value: str) -> str:
  normalized = unicodedata.normalize('NFKC', value).strip()
  if not normalized:
    return '#'
  first = normalized[0].upper()
  return first if first.isalnum() and first.isascii() else '#'


def _sanitize_path_component(value: str | None, fallback: str | None = None) -> str:
  raw = unicodedata.normalize('NFKC', str(value or fallback or '')).strip()
  cleaned = re.sub(r'[\\/:*?"<>|\x00-\x1f]+', ' ', raw)
  cleaned = re.sub(r'\s+', ' ', cleaned).strip().rstrip('. ')
  if not cleaned:
    cleaned = fallback or ''
  if len(cleaned) > MAX_PATH_COMPONENT_LENGTH:
    cleaned = cleaned[:MAX_PATH_COMPONENT_LENGTH].rstrip('. ')
  return cleaned


def _normalize_identity_text(value: str | None) -> str:
  return library_index.normalize_identity_text(value)


def _extract_preserved_version_tokens(value: str | None) -> set[str]:
  return library_index.extract_preserved_version_tokens(value)


def _normalize_tag_key(value: str) -> str:
  return library_index.normalize_tag_key(value)


def _coerce_tag_value(value) -> str | None:
  return library_index.coerce_tag_value(value)


def _parse_track_number(value) -> int | None:
  return library_index.parse_track_number(value)


def _extract_year(value) -> int | None:
  return library_index.extract_year(value)


def _duration_bucket(value) -> int | None:
  return library_index.duration_bucket(value)


def _safe_float(value) -> float | None:
  return library_index.safe_float(value)


def _first_non_empty(*values):
  return library_index.first_non_empty(*values)


def _split_artists(value: str | None) -> list[str]:
  return library_index.split_artists(value)