From ce132fbc64777e10f527c3a7414753c593eb6f3d Mon Sep 17 00:00:00 2001 From: Myned Date: Mon, 23 Sep 2019 02:06:13 -0400 Subject: [PATCH] Refactor reverse commands, add Kheina database, and remove Harry.lu --- src/cogs/booru.py | 69 +++++++--------------- src/utils/scraper.py | 133 ++++++++++++++++++++++++++++--------------- 2 files changed, 107 insertions(+), 95 deletions(-) diff --git a/src/cogs/booru.py b/src/cogs/booru.py index bdf50df..cd39919 100644 --- a/src/cogs/booru.py +++ b/src/cogs/booru.py @@ -106,23 +106,6 @@ class MsG(cmds.Cog): return args - def _get_icon(self, score): - if score is 'SauceNAO': - return 'https://d2.alternativeto.net/dist/icons/saucenao_23437.png?width=64&height=64&mode=crop&upscale=false' - elif score < 0: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/pouting-face_1f621.png' - elif score == 0: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/mozilla/36/pile-of-poo_1f4a9.png' - elif 10 > score > 0: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/white-medium-star_2b50.png' - elif 50 > score >= 10: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/glowing-star_1f31f.png' - elif 100 > score >= 50: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/dizzy-symbol_1f4ab.png' - elif score >= 100: - return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/sparkles_2728.png' - return None - async def _send_hearts(self): while self.hearting: temp = await self.heartqueue.get() @@ -384,7 +367,7 @@ class MsG(cmds.Cog): # Reverse image searches a linked image using the public iqdb @cmds.cooldown(1, 5, cmds.BucketType.member) - @cmds.command(name='reverse', aliases=['rev', 'ris'], brief='Reverse image search from e621', description='NSFW\nReverse-search an image with given URL') + @cmds.command(name='reverse', aliases=['rev', 'ris'], brief='Reverse image search from Kheina and SauceNAO', description='NSFW\nReverse-search an image with given URL') async def reverse(self, ctx, *args): try: kwargs = u.get_kwargs(ctx, args) @@ -400,19 +383,15 @@ class MsG(cmds.Cog): async with ctx.channel.typing(): for url in urls: try: - post, source, similarity = await scraper.get_post(url) + result = await scraper.get_post(url) embed = d.Embed( - title=', '.join(post['artist']), - url=source, + title=result['artist'], + url=result['source'], color=ctx.me.color if isinstance(ctx.channel, d.TextChannel) else u.color) - embed.set_image(url=post['file_url']) - embed.set_author( - name=similarity, - icon_url=ctx.author.avatar_url) - embed.set_footer( - text=post['score'], - icon_url=self._get_icon(post['score'])) + embed.set_image(url=result['thumbnail']) + embed.set_author(name=result['similarity'] + '% Match', icon_url=ctx.author.avatar_url) + embed.set_footer(text=result['database']) await ctx.send(embed=embed) @@ -474,19 +453,15 @@ class MsG(cmds.Cog): for message, urls in links.items(): for url in urls: try: - post, source, similarity = await scraper.get_post(url) + result = await scraper.get_post(url) embed = d.Embed( - title=', '.join(post['artist']), - url=source, + title=result['artist'], + url=result['source'], color=ctx.me.color if isinstance(ctx.channel, d.TextChannel) else u.color) - embed.set_image(url=post['file_url']) - embed.set_author( - name=similarity, - icon_url=message.author.avatar_url) - embed.set_footer( - text=post['score'], - icon_url=self._get_icon(post['score'])) + embed.set_image(url=result['thumbnail']) + embed.set_author(name=result['similarity'] + '% Match', icon_url=ctx.author.avatar_url) + embed.set_footer(text=result['database']) await dest.send(embed=embed) await message.add_reaction('\N{WHITE HEAVY CHECK MARK}') @@ -536,19 +511,15 @@ class MsG(cmds.Cog): async with message.channel.typing(): for url in urls: try: - post, source, similarity = await scraper.get_post(url) + result = await scraper.get_post(url) embed = d.Embed( - title=', '.join(post['artist']), - url=source, - color=message.channel.guild.me.color if isinstance(message.channel, d.TextChannel) else u.color) - embed.set_image(url=post['file_url']) - embed.set_author( - name=similarity, - icon_url=message.author.avatar_url) - embed.set_footer( - text=post['score'], - icon_url=self._get_icon(post['score'])) + title=result['artist'], + url=result['source'], + color=message.me.color if isinstance(message.channel, d.TextChannel) else u.color) + embed.set_image(url=result['thumbnail']) + embed.set_author(name=result['similarity'] + '% Match', icon_url=message.author.avatar_url) + embed.set_footer(text=result['database']) await message.channel.send(embed=embed) diff --git a/src/utils/scraper.py b/src/utils/scraper.py index 28532e1..dae24f5 100644 --- a/src/utils/scraper.py +++ b/src/utils/scraper.py @@ -1,4 +1,5 @@ -import asyncio +import aiohttp +import ast import re from bs4 import BeautifulSoup @@ -9,58 +10,98 @@ from misc import exceptions as exc from utils import utils as u +# async def get_harry(url): +# content = await u.fetch('https://iqdb.harry.lu', params={'url': url}) +# soup = BeautifulSoup(content, 'html.parser') +# +# if soup.find('div', id='show1').string is 'Not the right one? ': +# parent = soup.find('th', string='Probable match:').parent.parent +# +# post = await u.fetch( +# 'https://e621.net/post/show.json', +# params={'id': re.search('show/([0-9]+)', parent.tr.td.a.get('href')).group(1)}, +# json=True) +# if (post['status'] == 'deleted'): +# post = await u.fetch( +# 'https://e621.net/post/show.json', +# params={'id': re.search('#(\\d+)', post['delreason']).group(1)}, +# json=True) +# +# result = { +# 'source': f'https://e621.net/post/show/{post["id"]}', +# 'artist': ', '.join(post['artist']), +# 'thumbnail': parent.td.a.img.get('src'), +# 'similarity': re.search('\\d+', parent.tr[4].td.string).group(0), +# 'database': 'Harry.lu' +# } +# +# return result +# else: +# return False + + +async def get_kheina(url): + content = await u.fetch('https://kheina.com', params={'url': url}) + soup = BeautifulSoup(content, 'html.parser') + + results = ast.literal_eval(soup.find('data', id='results').string)[-1] + iqdbdata = ast.literal_eval(soup.find('data', id='iqdbdata').string)[0] + + result = { + 'source': results[3], + 'artist': results[4], + 'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{results[1]}.jpg', + 'similarity': str(int(float(iqdbdata['similarity']))), + 'database': 'Kheina' + } + + return result + + +async def get_saucenao(url): + content = await u.fetch( + 'https://saucenao.com/search.php', + params={'url': url, 'api_key': u.config['saucenao_api'], 'output_type': 2}, + json=True) + results = content['results'][0] + for i in range(len(content['results'])): + if 'e621' in content['results'][i]['header']['index_name']: + results = content['results'][i] + + if 'author_name' in results['data']: + artist = 'author_name' + elif 'member_name' in results['data']: + artist = 'member_name' + else: + artist = 'creator' + + result = { + 'source': results['data']['ext_urls'][0], + 'artist': results['data'][artist], + 'thumbnail': results['header']['thumbnail'], + 'similarity': str(int(float(results['header']['similarity']))), + 'database': 'SauceNAO' + } + + return result + + async def get_post(url): try: - image = await u.fetch(url, response=True) - filesize = int(image.headers['Content-Length']) + content = await u.fetch(url, response=True) + filesize = int(content.headers['Content-Length']) if filesize > 8192 * 1024: raise exc.SizeError(size(filesize, system=alternative)) - content = await u.fetch('http://iqdb.harry.lu', params={'url': url}) - soup = BeautifulSoup(content, 'html.parser') - source = soup.find_all('a', limit=2)[1].get('href') + result = await get_kheina(url) + if int(result['similarity']) < 55: + result = await get_saucenao(url) + if int(result['similarity']) < 55: + raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1)) - if source != '#': - ident = re.search('show/([0-9]+)', source).group(1) - post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True) - if (post['status'] == 'deleted'): - ident = re.search('#(\\d+)', post['delreason']).group(1) - post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True) - source = f'https://e621.net/post/show/{post["id"]}' - similarity = re.search('\\d+', soup.find(string=re.compile('similarity'))).group(0) + return result - return post, source, similarity + '% Match' - else: - raise IndexError - - except IndexError: - content = await u.fetch( - 'https://saucenao.com/search.php', - params={ - 'url': url, - 'api_key': u.config['saucenao_api'], - 'output_type': 2}, - json=True) - result = content['results'][0] - if 'author_name' in result['data']: - artist = 'author_name' - elif 'member_name' in result['data']: - artist = 'member_name' - else: - artist = 'creator' - post = { - 'file_url': result['header']['thumbnail'], - 'artist': [result['data'][artist]], - 'score': 'SauceNAO'} - source = result['data']['ext_urls'][0] - similarity = re.search('(\\d+)\\.', result['header']['similarity']).group(1) - - if int(similarity) >= 55: - return post, source, similarity + '% Match' - - raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1)) - - except (AttributeError, ValueError, KeyError): + except aiohttp.InvalidURL: raise exc.MissingArgument