462 lines
14 KiB
Python
462 lines
14 KiB
Python
import copy
|
|
import unittest
|
|
|
|
from backend.app.matcher import MatchProviderError, Matcher
|
|
|
|
|
|
DEFAULT_CONFIG = {
|
|
'advancedStrategy': {
|
|
'metadataFallback': True,
|
|
'downloadAssets': False
|
|
},
|
|
'metadata': {
|
|
'acoustidUrl': 'https://api.acoustid.org/v2',
|
|
'acoustidClientKey': 'client-key',
|
|
'musicbrainz': 'https://musicbrainz.org/ws/2/',
|
|
'netease': 'http://localhost:3000',
|
|
'qq': 'http://localhost:3300',
|
|
'spotifyUrl': 'https://api.spotify.com/v1',
|
|
'spotifyClientId': 'spotify-id',
|
|
'spotifySecret': 'spotify-secret',
|
|
'discogsUrl': 'https://api.discogs.com',
|
|
'discogsToken': '',
|
|
'lastfmUrl': 'https://ws.audioscrobbler.com/2.0/',
|
|
'lastfmKey': '',
|
|
'geniusUrl': 'https://api.genius.com',
|
|
'geniusToken': ''
|
|
}
|
|
}
|
|
|
|
|
|
class MatcherTests(unittest.TestCase):
|
|
def test_matches_authoritative_acoustid_candidate(self):
|
|
item = build_item()
|
|
candidate = build_candidate(
|
|
provider='acoustid',
|
|
is_authoritative=True,
|
|
fingerprint_confidence=0.98
|
|
)
|
|
matcher = build_matcher(
|
|
acoustid_candidates=[candidate]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'matched')
|
|
self.assertEqual(result['source'], 'acoustid')
|
|
self.assertTrue(result['is_authoritative'])
|
|
self.assertGreaterEqual(result['confidence'], 85)
|
|
|
|
def test_matches_musicbrainz_text_candidate_without_fingerprint(self):
|
|
item = build_item()
|
|
candidate = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.92
|
|
)
|
|
matcher = build_matcher(
|
|
musicbrainz_candidates=[candidate]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'matched')
|
|
self.assertEqual(result['source'], 'musicbrainz')
|
|
self.assertTrue(result['is_authoritative'])
|
|
self.assertEqual(result['matched_metadata_json']['release_id'], 'release-main')
|
|
|
|
def test_matches_fallback_candidate_when_authoritative_missing(self):
|
|
item = build_item()
|
|
candidate = build_candidate(
|
|
provider='spotify',
|
|
is_authoritative=False,
|
|
search_confidence=0.9,
|
|
source_ids={
|
|
'spotify_track_id': 'track-1',
|
|
'spotify_album_id': 'album-1'
|
|
}
|
|
)
|
|
matcher = build_matcher(
|
|
spotify_candidates=[candidate]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'matched_fallback')
|
|
self.assertEqual(result['source'], 'spotify')
|
|
self.assertFalse(result['is_authoritative'])
|
|
|
|
def test_respects_repair_provider_scope(self):
|
|
item = build_item()
|
|
authoritative = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.95
|
|
)
|
|
fallback = build_candidate(
|
|
provider='spotify',
|
|
is_authoritative=False,
|
|
search_confidence=0.9,
|
|
source_ids={
|
|
'spotify_track_id': 'track-1',
|
|
'spotify_album_id': 'album-1'
|
|
}
|
|
)
|
|
matcher = build_matcher(
|
|
musicbrainz_candidates=[authoritative],
|
|
spotify_candidates=[fallback]
|
|
)
|
|
scoped_config = copy.deepcopy(DEFAULT_CONFIG)
|
|
scoped_config['repair_provider_scope'] = ['spotify']
|
|
|
|
result = matcher.match_item(item, [item], scoped_config)
|
|
|
|
self.assertEqual(result['status'], 'matched_fallback')
|
|
self.assertEqual(result['source'], 'spotify')
|
|
self.assertFalse(result['is_authoritative'])
|
|
|
|
def test_returns_low_score_when_gap_is_too_small(self):
|
|
item = build_item()
|
|
first = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.95,
|
|
release_id='release-a'
|
|
)
|
|
second = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.94,
|
|
release_id='release-b',
|
|
source_ids={
|
|
'musicbrainz_recording_id': 'recording-b',
|
|
'musicbrainz_release_id': 'release-b',
|
|
'musicbrainz_release_group_id': 'release-group-b'
|
|
}
|
|
)
|
|
matcher = build_matcher(
|
|
musicbrainz_candidates=[first, second]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'low_score')
|
|
self.assertEqual(result['reason'], 'score_gap_too_small')
|
|
self.assertEqual(len(result['match_candidates_json']), 2)
|
|
|
|
def test_returns_not_found_when_no_candidates_exist(self):
|
|
item = build_item()
|
|
matcher = build_matcher()
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'not_found')
|
|
self.assertIsNone(result['matched_metadata_json'])
|
|
self.assertEqual(result['match_candidates_json'], [])
|
|
|
|
def test_skips_acoustid_error_and_matches_musicbrainz_text_candidate(self):
|
|
item = build_item()
|
|
candidate = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.92
|
|
)
|
|
matcher = build_matcher(
|
|
acoustid_error=MatchProviderError('acoustid', 'acoustid failed'),
|
|
musicbrainz_candidates=[candidate]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'matched')
|
|
self.assertEqual(result['source'], 'musicbrainz')
|
|
self.assertEqual(len(result['provider_warnings']), 1)
|
|
self.assertEqual(result['provider_warnings'][0]['provider'], 'acoustid')
|
|
|
|
def test_skips_musicbrainz_error_and_uses_fallback_candidate(self):
|
|
item = build_item()
|
|
candidate = build_candidate(
|
|
provider='spotify',
|
|
is_authoritative=False,
|
|
search_confidence=0.9,
|
|
source_ids={
|
|
'spotify_track_id': 'track-1',
|
|
'spotify_album_id': 'album-1'
|
|
}
|
|
)
|
|
matcher = build_matcher(
|
|
musicbrainz_error=MatchProviderError('musicbrainz', 'musicbrainz failed'),
|
|
spotify_candidates=[candidate]
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'matched_fallback')
|
|
self.assertEqual(result['source'], 'spotify')
|
|
self.assertEqual(
|
|
[warning['provider'] for warning in result['provider_warnings']],
|
|
['musicbrainz', 'musicbrainz']
|
|
)
|
|
|
|
def test_returns_not_found_when_all_providers_fail(self):
|
|
item = build_item()
|
|
matcher = build_matcher(
|
|
acoustid_error=MatchProviderError('acoustid', 'acoustid failed'),
|
|
musicbrainz_error=MatchProviderError('musicbrainz', 'musicbrainz failed'),
|
|
netease_error=MatchProviderError('netease', 'netease failed'),
|
|
qq_error=MatchProviderError('qq', 'qq failed'),
|
|
spotify_error=MatchProviderError('spotify', 'spotify failed')
|
|
)
|
|
|
|
result = matcher.match_item(item, [item], DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result['status'], 'not_found')
|
|
self.assertEqual(
|
|
[warning['provider'] for warning in result['provider_warnings']],
|
|
['acoustid', 'musicbrainz', 'netease', 'qq', 'spotify']
|
|
)
|
|
|
|
def test_album_context_converges_to_single_release(self):
|
|
item_one = build_item(title='Song Title', track_number=1, duration_seconds=201)
|
|
item_two = build_item(
|
|
title='Song Title',
|
|
track_number=2,
|
|
duration_seconds=233,
|
|
relative_path='Artist/Album/02.flac',
|
|
filename='02.flac'
|
|
)
|
|
group = [item_one, item_two]
|
|
def dynamic_musicbrainz_candidates(item_metadata, _config, **_kwargs):
|
|
track_number = item_metadata.get('track_number')
|
|
duration_seconds = item_metadata.get('duration_seconds')
|
|
release_a = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.91,
|
|
track_number=track_number,
|
|
duration_seconds=duration_seconds,
|
|
recording_id=f'recording-a-{track_number}',
|
|
release_id='release-a',
|
|
release_group_id='group-a',
|
|
release_tracklist=[
|
|
{
|
|
'title': 'Song Title',
|
|
'track_number': 1,
|
|
'disc_number': 1,
|
|
'duration_seconds': 201
|
|
},
|
|
{
|
|
'title': 'Song Title',
|
|
'track_number': 2,
|
|
'disc_number': 1,
|
|
'duration_seconds': 233
|
|
}
|
|
],
|
|
source_ids={
|
|
'musicbrainz_recording_id': f'recording-a-{track_number}',
|
|
'musicbrainz_release_id': 'release-a',
|
|
'musicbrainz_release_group_id': 'group-a'
|
|
}
|
|
)
|
|
release_b = build_candidate(
|
|
provider='musicbrainz',
|
|
is_authoritative=True,
|
|
search_confidence=0.91,
|
|
track_number=track_number,
|
|
duration_seconds=duration_seconds,
|
|
recording_id=f'recording-b-{track_number}',
|
|
release_id='release-b',
|
|
release_group_id='group-b',
|
|
release_tracklist=[
|
|
{
|
|
'title': 'Track Zero',
|
|
'track_number': 1,
|
|
'disc_number': 1,
|
|
'duration_seconds': 120
|
|
},
|
|
{
|
|
'title': 'Track Extra',
|
|
'track_number': 2,
|
|
'disc_number': 1,
|
|
'duration_seconds': 310
|
|
}
|
|
],
|
|
source_ids={
|
|
'musicbrainz_recording_id': f'recording-b-{track_number}',
|
|
'musicbrainz_release_id': 'release-b',
|
|
'musicbrainz_release_group_id': 'group-b'
|
|
}
|
|
)
|
|
return [release_a, release_b]
|
|
|
|
matcher = build_matcher(
|
|
musicbrainz_candidates=dynamic_musicbrainz_candidates
|
|
)
|
|
|
|
result_one = matcher.match_item(item_one, group, DEFAULT_CONFIG)
|
|
result_two = matcher.match_item(item_two, group, DEFAULT_CONFIG)
|
|
|
|
self.assertEqual(result_one['status'], 'matched')
|
|
self.assertEqual(result_two['status'], 'matched')
|
|
self.assertEqual(result_one['matched_metadata_json']['release_id'], 'release-a')
|
|
self.assertEqual(result_two['matched_metadata_json']['release_id'], 'release-a')
|
|
|
|
|
|
def build_matcher(
|
|
*,
|
|
acoustid_candidates=None,
|
|
musicbrainz_candidates=None,
|
|
aligned_candidate=None,
|
|
netease_candidates=None,
|
|
qq_candidates=None,
|
|
spotify_candidates=None,
|
|
acoustid_error=None,
|
|
musicbrainz_error=None,
|
|
netease_error=None,
|
|
qq_error=None,
|
|
spotify_error=None
|
|
):
|
|
return Matcher(
|
|
acoustid_provider=StaticSearchProvider(acoustid_candidates, error=acoustid_error),
|
|
musicbrainz_provider=StaticMusicBrainzProvider(
|
|
musicbrainz_candidates,
|
|
aligned_candidate=aligned_candidate,
|
|
error=musicbrainz_error
|
|
),
|
|
netease_provider=StaticSearchProvider(netease_candidates, error=netease_error),
|
|
qq_provider=StaticSearchProvider(qq_candidates, error=qq_error),
|
|
spotify_provider=StaticSearchProvider(spotify_candidates, error=spotify_error),
|
|
discogs_provider=StaticEnrichmentProvider(),
|
|
lastfm_provider=StaticEnrichmentProvider(),
|
|
genius_provider=StaticEnrichmentProvider()
|
|
)
|
|
|
|
|
|
def build_item(
|
|
*,
|
|
title='Song Title',
|
|
artist='Song Artist',
|
|
album='Album Name',
|
|
track_number=1,
|
|
disc_number=1,
|
|
duration_seconds=201,
|
|
relative_path='Artist/Album/01.flac',
|
|
filename='01.flac'
|
|
):
|
|
return {
|
|
'id': 1,
|
|
'task_id': 'task-1',
|
|
'original_path': f'/tmp/{filename}',
|
|
'current_file_path': f'/tmp/{filename}',
|
|
'relative_path': relative_path,
|
|
'filename': filename,
|
|
'original_tags_json': {
|
|
'title': title,
|
|
'artist': artist,
|
|
'album': album,
|
|
'album_artist': artist,
|
|
'track_number': str(track_number),
|
|
'disc_number': str(disc_number)
|
|
},
|
|
'audio_props_json': {
|
|
'duration_seconds': duration_seconds
|
|
},
|
|
'acoustic_fingerprint': 'fingerprint',
|
|
'fingerprint_duration_seconds': duration_seconds,
|
|
'scan_status': 'queued',
|
|
'preprocess_status': 'completed'
|
|
}
|
|
|
|
|
|
def build_candidate(
|
|
*,
|
|
provider,
|
|
is_authoritative,
|
|
title='Song Title',
|
|
artist='Song Artist',
|
|
album='Album Name',
|
|
track_number=1,
|
|
disc_number=1,
|
|
duration_seconds=201,
|
|
recording_id='recording-main',
|
|
release_id='release-main',
|
|
release_group_id='release-group-main',
|
|
fingerprint_confidence=None,
|
|
search_confidence=None,
|
|
release_tracklist=None,
|
|
source_ids=None
|
|
):
|
|
return {
|
|
'provider': provider,
|
|
'is_authoritative': is_authoritative,
|
|
'title': title,
|
|
'artist': artist,
|
|
'artists': [artist],
|
|
'album': album,
|
|
'album_artist': artist,
|
|
'track_number': track_number,
|
|
'disc_number': disc_number,
|
|
'release_date': '2024-01-01',
|
|
'year': 2024,
|
|
'duration_seconds': duration_seconds,
|
|
'recording_id': recording_id,
|
|
'release_id': release_id,
|
|
'release_group_id': release_group_id,
|
|
'source_ids': source_ids or {
|
|
'musicbrainz_recording_id': recording_id,
|
|
'musicbrainz_release_id': release_id,
|
|
'musicbrainz_release_group_id': release_group_id
|
|
},
|
|
'fingerprint_confidence': fingerprint_confidence,
|
|
'search_confidence': search_confidence,
|
|
'release_tracklist': release_tracklist or [
|
|
{
|
|
'title': title,
|
|
'track_number': track_number,
|
|
'disc_number': disc_number,
|
|
'duration_seconds': duration_seconds
|
|
}
|
|
]
|
|
}
|
|
|
|
|
|
class StaticSearchProvider:
|
|
def __init__(self, candidates=None, *, error=None):
|
|
self.candidates = candidates or []
|
|
self.error = error
|
|
|
|
def search(self, *args, **kwargs):
|
|
if self.error:
|
|
raise self.error
|
|
if callable(self.candidates):
|
|
return copy.deepcopy(self.candidates(*args, **kwargs))
|
|
return copy.deepcopy(self.candidates)
|
|
|
|
|
|
class StaticMusicBrainzProvider:
|
|
def __init__(self, candidates=None, *, aligned_candidate=None, error=None):
|
|
self.candidates = candidates or []
|
|
self.aligned_candidate = aligned_candidate
|
|
self.error = error
|
|
|
|
def search_text(self, *args, **kwargs):
|
|
if self.error:
|
|
raise self.error
|
|
if callable(self.candidates):
|
|
return copy.deepcopy(self.candidates(*args, **kwargs))
|
|
return copy.deepcopy(self.candidates)
|
|
|
|
def align_candidate(self, *args, **kwargs):
|
|
if self.error:
|
|
raise self.error
|
|
return copy.deepcopy(self.aligned_candidate)
|
|
|
|
|
|
class StaticEnrichmentProvider:
|
|
def enrich(self, *args, **kwargs):
|
|
return None
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|