From 98eb5e2cc08f6c613b82b31a4756b39152e4ec9b Mon Sep 17 00:00:00 2001 From: Myned Date: Tue, 24 Sep 2019 22:18:27 -0400 Subject: [PATCH 1/2] Change similarity logic to return False instead of processing everything --- src/utils/scraper.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/utils/scraper.py b/src/utils/scraper.py index ac3fb7c..36d6d80 100644 --- a/src/utils/scraper.py +++ b/src/utils/scraper.py @@ -50,6 +50,10 @@ async def query_kheina(url): iqdbdata = soup.find('data', id='iqdbdata').string iqdbdata = ast.literal_eval(iqdbdata) + similarity = int(float(iqdbdata[0]['similarity'])) + if similarity < 55: + return False + for e in results: if iqdbdata[0]['iqdbid'] in e: match = e @@ -59,9 +63,9 @@ async def query_kheina(url): 'source': match[3], 'artist': match[4], 'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{match[1]}.jpg', - 'similarity': str(int(float(iqdbdata[0]['similarity']))), + 'similarity': str(similarity), 'database': 'Kheina' - } + } return result @@ -74,9 +78,9 @@ async def query_saucenao(url): match = content['results'][0] - if 'author_name' in match['data']: - artist = 'author_name' - elif 'member_name' in match['data']: + similarity = int(float(match['header']['similarity'])) + if similarity < 55: + return False artist = 'member_name' elif 'creator' in match['data']: artist = 'creator' @@ -87,7 +91,7 @@ async def query_saucenao(url): 'source': match['data']['ext_urls'][0], 'artist': match['data'][artist], 'thumbnail': match['header']['thumbnail'], - 'similarity': str(int(float(match['header']['similarity']))), + 'similarity': str(similarity), 'database': 'SauceNAO' } @@ -102,9 +106,9 @@ async def get_post(url): raise exc.SizeError(size(filesize, system=alternative)) result = await query_kheina(url) - if int(result['similarity']) < 55: + if not result: result = await query_saucenao(url) - if int(result['similarity']) < 55: + if not result: raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1)) return result From cd27cadb52e0a1ae0b87b999b02d68ebf5c1ce7f Mon Sep 17 00:00:00 2001 From: Myned Date: Tue, 24 Sep 2019 22:21:10 -0400 Subject: [PATCH 2/2] Add back e621 preference, refactor artist logic --- src/utils/scraper.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/utils/scraper.py b/src/utils/scraper.py index 36d6d80..05888c9 100644 --- a/src/utils/scraper.py +++ b/src/utils/scraper.py @@ -81,19 +81,30 @@ async def query_saucenao(url): similarity = int(float(match['header']['similarity'])) if similarity < 55: return False - artist = 'member_name' - elif 'creator' in match['data']: - artist = 'creator' - else: - artist = 'imdb_id' + + source = match['data']['ext_urls'][0] + for e in match['data']['ext_urls']: + if 'e621' in e: + source = e + break + + artist = 'Unknown' + for e in ( + 'author_name', + 'member_name', + 'creator' + ): + if e in match['data']: + artist = match['data'][e] + break result = { - 'source': match['data']['ext_urls'][0], - 'artist': match['data'][artist], + 'source': source, + 'artist': artist, 'thumbnail': match['header']['thumbnail'], 'similarity': str(similarity), 'database': 'SauceNAO' - } + } return result