diff --git a/src/utils/scraper.py b/src/utils/scraper.py index 369e408..99780d7 100644 --- a/src/utils/scraper.py +++ b/src/utils/scraper.py @@ -41,37 +41,21 @@ from utils import utils as u async def query_kheina(url): try: - content = await u.fetch(f'https://kheina.com?url={url}', text=True) + content = await u.fetch(f'https://api.kheina.com/v1/search', post={'url': url}, json=True) - for e in ('"', '''): - content = content.replace(e, '') - content = re.sub('', '', content) - - soup = BeautifulSoup(content, 'html5lib') - - if soup.find('data', id='error'): - return None - - results = soup.find('data', id='results').string - results = ast.literal_eval(results) - iqdbdata = soup.find('data', id='iqdbdata').string - iqdbdata = ast.literal_eval(iqdbdata) - - similarity = int(float(iqdbdata[0]['similarity'])) + similarity = int(content['results'][0]['similarity']) if similarity < 55: return None - for e in results: - if iqdbdata[0]['iqdbid'] in e: - match = e - break + source = re.search('\\d+$', content['results'][0]['sources'][0]['source']).group(0) + export = await u.fetch(f'https://faexport.spangle.org.uk/submission/{source}.json', json=True) result = { - 'source': match[3].replace('\\', ''), - 'artist': match[4], - 'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{match[1]}.jpg', + 'source': content['results'][0]['sources'][0]['source'], + 'artist': content['results'][0]['sources'][0]['artist'], + 'thumbnail': '' if isinstance(export, int) and export != 200 else export['full'], 'similarity': str(similarity), - 'database': tld.extract(match[3].replace('\\', '')).domain + 'database': tld.extract(content['results'][0]['sources'][0]['source']).domain } return result diff --git a/src/utils/utils.py b/src/utils/utils.py index 10ef267..f9ab5ea 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -84,19 +84,36 @@ last_commands = {} asession = aiohttp.ClientSession() -async def fetch(url, *, json=False, response=False, text=False): +async def fetch(url, *, post={}, response=False, text=False, json=False): if '.json' in url and ('e621' in url or 'e926' in url): url += f'&login=BotMyned&api_key={config["e621_api"]}' - async with asession.get(url, headers={ - 'User-Agent': 'Myned/Modufur (https://github.com/Myned/Modufur)'}, ssl=False) as r: - if response: - return r - elif json: - return await r.json() - elif text: - return await r.text() - else: - return await r.read() + + if post: + async with asession.post(url, data=post, headers={ + 'User-Agent': 'Myned/Modufur (https://github.com/Myned/Modufur)'}, ssl=False) as r: + if r.status != 200: + return r.status + elif response: + return r + elif text: + return await r.text() + elif json: + return await r.json() + else: + return await r.read() + else: + async with asession.get(url, headers={ + 'User-Agent': 'Myned/Modufur (https://github.com/Myned/Modufur)'}, ssl=False) as r: + if r.status != 200: + return r.status + elif response: + return r + elif text: + return await r.text() + elif json: + return await r.json() + else: + return await r.read() def generate_embed(ctx, *, title=d.Embed.Empty, kind='rich', description=d.Embed.Empty, url=d.Embed.Empty, timestamp=d.Embed.Empty, colour=color, footer={}, image=d.Embed.Empty, thumbnail=d.Embed.Empty, author={}, fields=[]):