From 9ab357e26f28eeb905c1475254e470de1b93c037 Mon Sep 17 00:00:00 2001
From: Myned <onemyned@gmail.com>
Date: Tue, 24 Sep 2019 19:10:56 -0400
Subject: [PATCH 1/4] Add html5lib requirement as bs4 parser

---
 Pipfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Pipfile b/Pipfile
index c08f897..9a61dbb 100644
--- a/Pipfile
+++ b/Pipfile
@@ -20,6 +20,7 @@ beautifulsoup4 = "*"
 "discord.py" = {extras = ["voice"],git = "https://github.com/Rapptz/discord.py"}
 "hurry.filesize" = "*"
 requests = "*"
+html5lib = "*"
 
 [dev-packages]
 lxml = "*"

From 3741b0e694101efe3ab7fd67370977369f0364a0 Mon Sep 17 00:00:00 2001
From: Myned <onemyned@gmail.com>
Date: Tue, 24 Sep 2019 19:11:29 -0400
Subject: [PATCH 2/4] Add text argument to fetch util

---
 src/utils/utils.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/utils/utils.py b/src/utils/utils.py
index e907744..b86541e 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -85,14 +85,17 @@ color = d.Color(0x1A1A1A)
 last_commands = {}
 
 
-async def fetch(url, *, params={}, json=False, response=False):
+async def fetch(url, *, params={}, json=False, response=False, text=False):
     async with aiohttp.ClientSession() as session:
         async with session.get(url, params=params, headers={'User-Agent': 'Myned/Modufur'}, ssl=False) as r:
-            if response:
-                return r
-            elif json:
+            if json:
                 return await r.json()
-            return await r.read()
+            elif response:
+                return r
+            elif text:
+                return await r.text()
+            else:
+                return await r.read()
 
 
 def generate_embed(ctx, *, title=d.Embed.Empty, kind='rich', description=d.Embed.Empty, url=d.Embed.Empty, timestamp=d.Embed.Empty, colour=color, footer={}, image=d.Embed.Empty, thumbnail=d.Embed.Empty, author={}, fields=[]):

From 1262dc2ba78b4cef15749d3bed46ae3083e59b0f Mon Sep 17 00:00:00 2001
From: Myned <onemyned@gmail.com>
Date: Tue, 24 Sep 2019 19:12:31 -0400
Subject: [PATCH 3/4] Change package reference to avoid clashes

---
 src/utils/scraper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/utils/scraper.py b/src/utils/scraper.py
index dae24f5..1ad5b78 100644
--- a/src/utils/scraper.py
+++ b/src/utils/scraper.py
@@ -3,7 +3,7 @@ import ast
 import re
 
 from bs4 import BeautifulSoup
-from lxml import html
+import lxml
 from hurry.filesize import size, alternative
 
 from misc import exceptions as exc
@@ -108,7 +108,7 @@ async def get_post(url):
 async def get_image(url):
     content = await u.fetch(url)
 
-    value = html.fromstring(content).xpath(
+    value = lxml.html.fromstring(content).xpath(
         'string(/html/body/div[@id="content"]/div[@id="post-view"]/div[@class="content"]/div[2]/img/@src)')
 
     return value

From 5987b4692535af69f8891cb919fbd26a6aa7f28f Mon Sep 17 00:00:00 2001
From: Myned <onemyned@gmail.com>
Date: Tue, 24 Sep 2019 19:13:10 -0400
Subject: [PATCH 4/4] Fix Kheina parsing and eval

---
 src/utils/scraper.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/utils/scraper.py b/src/utils/scraper.py
index 1ad5b78..e9590a0 100644
--- a/src/utils/scraper.py
+++ b/src/utils/scraper.py
@@ -12,7 +12,7 @@ from utils import utils as u
 
 # async def get_harry(url):
 #     content = await u.fetch('https://iqdb.harry.lu', params={'url': url})
-#     soup = BeautifulSoup(content, 'html.parser')
+#     soup = BeautifulSoup(content, 'html5lib')
 #
 #     if soup.find('div', id='show1').string is 'Not the right one? ':
 #         parent = soup.find('th', string='Probable match:').parent.parent
@@ -41,17 +41,25 @@ from utils import utils as u
 
 
 async def get_kheina(url):
-    content = await u.fetch('https://kheina.com', params={'url': url})
-    soup = BeautifulSoup(content, 'html.parser')
+    content = await u.fetch('https://kheina.com', params={'url': url}, text=True)
 
-    results = ast.literal_eval(soup.find('data', id='results').string)[-1]
-    iqdbdata = ast.literal_eval(soup.find('data', id='iqdbdata').string)[0]
+    content = content.replace('&quot;', 'quot;').replace('&apos;', 'apos;')
+    soup = BeautifulSoup(content, 'html5lib')
+    results = soup.find('data', id='results').string.replace('quot;', '&quot;').replace('apos;', '&apos;')
+    results = ast.literal_eval(results)
+    iqdbdata = soup.find('data', id='iqdbdata').string
+    iqdbdata = ast.literal_eval(iqdbdata)
+
+    for e in results:
+        if iqdbdata[0]['iqdbid'] in e:
+            match = e
+            break
 
     result = {
-        'source': results[3],
-        'artist': results[4],
-        'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{results[1]}.jpg',
-        'similarity': str(int(float(iqdbdata['similarity']))),
+        'source': match[3],
+        'artist': match[4],
+        'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{match[1]}.jpg',
+        'similarity': str(int(float(iqdbdata[0]['similarity']))),
         'database': 'Kheina'
         }
 
@@ -63,6 +71,7 @@ async def get_saucenao(url):
         'https://saucenao.com/search.php',
         params={'url': url, 'api_key': u.config['saucenao_api'], 'output_type': 2},
         json=True)
+
     results = content['results'][0]
     for i in range(len(content['results'])):
         if 'e621' in content['results'][i]['header']['index_name']: