From ce132fbc64777e10f527c3a7414753c593eb6f3d Mon Sep 17 00:00:00 2001
From: Myned <onemyned@gmail.com>
Date: Mon, 23 Sep 2019 02:06:13 -0400
Subject: [PATCH] Refactor reverse commands, add Kheina database, and remove
 Harry.lu

---
 src/cogs/booru.py    |  69 +++++++---------------
 src/utils/scraper.py | 133 ++++++++++++++++++++++++++++---------------
 2 files changed, 107 insertions(+), 95 deletions(-)

diff --git a/src/cogs/booru.py b/src/cogs/booru.py
index bdf50df..cd39919 100644
--- a/src/cogs/booru.py
+++ b/src/cogs/booru.py
@@ -106,23 +106,6 @@ class MsG(cmds.Cog):
 
         return args
 
-    def _get_icon(self, score):
-        if score is 'SauceNAO':
-            return 'https://d2.alternativeto.net/dist/icons/saucenao_23437.png?width=64&height=64&mode=crop&upscale=false'
-        elif score < 0:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/pouting-face_1f621.png'
-        elif score == 0:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/mozilla/36/pile-of-poo_1f4a9.png'
-        elif 10 > score > 0:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/white-medium-star_2b50.png'
-        elif 50 > score >= 10:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/glowing-star_1f31f.png'
-        elif 100 > score >= 50:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/dizzy-symbol_1f4ab.png'
-        elif score >= 100:
-            return 'https://emojipedia-us.s3.amazonaws.com/thumbs/320/twitter/103/sparkles_2728.png'
-        return None
-
     async def _send_hearts(self):
         while self.hearting:
             temp = await self.heartqueue.get()
@@ -384,7 +367,7 @@ class MsG(cmds.Cog):
 
     # Reverse image searches a linked image using the public iqdb
     @cmds.cooldown(1, 5, cmds.BucketType.member)
-    @cmds.command(name='reverse', aliases=['rev', 'ris'], brief='Reverse image search from e621', description='NSFW\nReverse-search an image with given URL')
+    @cmds.command(name='reverse', aliases=['rev', 'ris'], brief='Reverse image search from Kheina and SauceNAO', description='NSFW\nReverse-search an image with given URL')
     async def reverse(self, ctx, *args):
         try:
             kwargs = u.get_kwargs(ctx, args)
@@ -400,19 +383,15 @@ class MsG(cmds.Cog):
             async with ctx.channel.typing():
                 for url in urls:
                     try:
-                        post, source, similarity = await scraper.get_post(url)
+                        result = await scraper.get_post(url)
 
                         embed = d.Embed(
-                            title=', '.join(post['artist']),
-                            url=source,
+                            title=result['artist'],
+                            url=result['source'],
                             color=ctx.me.color if isinstance(ctx.channel, d.TextChannel) else u.color)
-                        embed.set_image(url=post['file_url'])
-                        embed.set_author(
-                            name=similarity,
-                            icon_url=ctx.author.avatar_url)
-                        embed.set_footer(
-                            text=post['score'],
-                            icon_url=self._get_icon(post['score']))
+                        embed.set_image(url=result['thumbnail'])
+                        embed.set_author(name=result['similarity'] + '% Match', icon_url=ctx.author.avatar_url)
+                        embed.set_footer(text=result['database'])
 
                         await ctx.send(embed=embed)
 
@@ -474,19 +453,15 @@ class MsG(cmds.Cog):
                 for message, urls in links.items():
                     for url in urls:
                         try:
-                            post, source, similarity = await scraper.get_post(url)
+                            result = await scraper.get_post(url)
 
                             embed = d.Embed(
-                                title=', '.join(post['artist']),
-                                url=source,
+                                title=result['artist'],
+                                url=result['source'],
                                 color=ctx.me.color if isinstance(ctx.channel, d.TextChannel) else u.color)
-                            embed.set_image(url=post['file_url'])
-                            embed.set_author(
-                                name=similarity,
-                                icon_url=message.author.avatar_url)
-                            embed.set_footer(
-                                text=post['score'],
-                                icon_url=self._get_icon(post['score']))
+                            embed.set_image(url=result['thumbnail'])
+                            embed.set_author(name=result['similarity'] + '% Match', icon_url=ctx.author.avatar_url)
+                            embed.set_footer(text=result['database'])
 
                             await dest.send(embed=embed)
                             await message.add_reaction('\N{WHITE HEAVY CHECK MARK}')
@@ -536,19 +511,15 @@ class MsG(cmds.Cog):
             async with message.channel.typing():
                 for url in urls:
                     try:
-                        post, source, similarity = await scraper.get_post(url)
+                        result = await scraper.get_post(url)
 
                         embed = d.Embed(
-                            title=', '.join(post['artist']),
-                            url=source,
-                            color=message.channel.guild.me.color if isinstance(message.channel, d.TextChannel) else u.color)
-                        embed.set_image(url=post['file_url'])
-                        embed.set_author(
-                            name=similarity,
-                            icon_url=message.author.avatar_url)
-                        embed.set_footer(
-                            text=post['score'],
-                            icon_url=self._get_icon(post['score']))
+                            title=result['artist'],
+                            url=result['source'],
+                            color=message.me.color if isinstance(message.channel, d.TextChannel) else u.color)
+                        embed.set_image(url=result['thumbnail'])
+                        embed.set_author(name=result['similarity'] + '% Match', icon_url=message.author.avatar_url)
+                        embed.set_footer(text=result['database'])
 
                         await message.channel.send(embed=embed)
 
diff --git a/src/utils/scraper.py b/src/utils/scraper.py
index 28532e1..dae24f5 100644
--- a/src/utils/scraper.py
+++ b/src/utils/scraper.py
@@ -1,4 +1,5 @@
-import asyncio
+import aiohttp
+import ast
 import re
 
 from bs4 import BeautifulSoup
@@ -9,58 +10,98 @@ from misc import exceptions as exc
 from utils import utils as u
 
 
+# async def get_harry(url):
+#     content = await u.fetch('https://iqdb.harry.lu', params={'url': url})
+#     soup = BeautifulSoup(content, 'html.parser')
+#
+#     if soup.find('div', id='show1').string is 'Not the right one? ':
+#         parent = soup.find('th', string='Probable match:').parent.parent
+#
+#         post = await u.fetch(
+#             'https://e621.net/post/show.json',
+#             params={'id': re.search('show/([0-9]+)', parent.tr.td.a.get('href')).group(1)},
+#             json=True)
+#         if (post['status'] == 'deleted'):
+#             post = await u.fetch(
+#                 'https://e621.net/post/show.json',
+#                 params={'id': re.search('#(\\d+)', post['delreason']).group(1)},
+#                 json=True)
+#
+#         result = {
+#             'source': f'https://e621.net/post/show/{post["id"]}',
+#             'artist': ', '.join(post['artist']),
+#             'thumbnail': parent.td.a.img.get('src'),
+#             'similarity': re.search('\\d+', parent.tr[4].td.string).group(0),
+#             'database': 'Harry.lu'
+#             }
+#
+#         return result
+#     else:
+#         return False
+
+
+async def get_kheina(url):
+    content = await u.fetch('https://kheina.com', params={'url': url})
+    soup = BeautifulSoup(content, 'html.parser')
+
+    results = ast.literal_eval(soup.find('data', id='results').string)[-1]
+    iqdbdata = ast.literal_eval(soup.find('data', id='iqdbdata').string)[0]
+
+    result = {
+        'source': results[3],
+        'artist': results[4],
+        'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{results[1]}.jpg',
+        'similarity': str(int(float(iqdbdata['similarity']))),
+        'database': 'Kheina'
+        }
+
+    return result
+
+
+async def get_saucenao(url):
+    content = await u.fetch(
+        'https://saucenao.com/search.php',
+        params={'url': url, 'api_key': u.config['saucenao_api'], 'output_type': 2},
+        json=True)
+    results = content['results'][0]
+    for i in range(len(content['results'])):
+        if 'e621' in content['results'][i]['header']['index_name']:
+            results = content['results'][i]
+
+    if 'author_name' in results['data']:
+        artist = 'author_name'
+    elif 'member_name' in results['data']:
+        artist = 'member_name'
+    else:
+        artist = 'creator'
+
+    result = {
+        'source': results['data']['ext_urls'][0],
+        'artist': results['data'][artist],
+        'thumbnail': results['header']['thumbnail'],
+        'similarity': str(int(float(results['header']['similarity']))),
+        'database': 'SauceNAO'
+        }
+
+    return result
+
+
 async def get_post(url):
     try:
-        image = await u.fetch(url, response=True)
-        filesize = int(image.headers['Content-Length'])
+        content = await u.fetch(url, response=True)
+        filesize = int(content.headers['Content-Length'])
         if filesize > 8192 * 1024:
             raise exc.SizeError(size(filesize, system=alternative))
 
-        content = await u.fetch('http://iqdb.harry.lu', params={'url': url})
-        soup = BeautifulSoup(content, 'html.parser')
-        source = soup.find_all('a', limit=2)[1].get('href')
+        result = await get_kheina(url)
+        if int(result['similarity']) < 55:
+            result = await get_saucenao(url)
+        if int(result['similarity']) < 55:
+            raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1))
 
-        if source != '#':
-            ident = re.search('show/([0-9]+)', source).group(1)
-            post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True)
-            if (post['status'] == 'deleted'):
-                ident = re.search('#(\\d+)', post['delreason']).group(1)
-                post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True)
-            source = f'https://e621.net/post/show/{post["id"]}'
-            similarity = re.search('\\d+', soup.find(string=re.compile('similarity'))).group(0)
+        return result
 
-            return post, source, similarity + '% Match'
-        else:
-            raise IndexError
-
-    except IndexError:
-        content = await u.fetch(
-            'https://saucenao.com/search.php',
-            params={
-                'url': url,
-                'api_key': u.config['saucenao_api'],
-                'output_type': 2},
-            json=True)
-        result = content['results'][0]
-        if 'author_name' in result['data']:
-            artist = 'author_name'
-        elif 'member_name' in result['data']:
-            artist = 'member_name'
-        else:
-            artist = 'creator'
-        post = {
-            'file_url': result['header']['thumbnail'],
-            'artist': [result['data'][artist]],
-            'score': 'SauceNAO'}
-        source = result['data']['ext_urls'][0]
-        similarity = re.search('(\\d+)\\.', result['header']['similarity']).group(1)
-
-        if int(similarity) >= 55:
-            return post, source, similarity + '% Match'
-
-        raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1))
-
-    except (AttributeError, ValueError, KeyError):
+    except aiohttp.InvalidURL:
         raise exc.MissingArgument