2017-10-27 21:31:57 -04:00
|
|
|
import asyncio
|
2017-10-15 15:32:35 -04:00
|
|
|
import re
|
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from lxml import html
|
2017-11-20 02:12:56 -05:00
|
|
|
from hurry.filesize import size, alternative
|
2017-10-12 22:30:40 -04:00
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
from misc import exceptions as exc
|
2017-10-12 22:30:40 -04:00
|
|
|
from utils import utils as u
|
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
|
2017-10-16 02:06:33 -04:00
|
|
|
async def get_post(url):
|
2017-11-20 02:12:56 -05:00
|
|
|
try:
|
|
|
|
image = await u.fetch(url, response=True)
|
|
|
|
filesize = int(image.headers['Content-Length'])
|
|
|
|
if filesize > 8192 * 1024:
|
|
|
|
raise exc.SizeError(size(filesize, system=alternative))
|
2018-11-03 17:00:36 -04:00
|
|
|
|
2019-09-17 02:59:35 -04:00
|
|
|
content = await u.fetch('http://iqdb.harry.lu', params={'url': url})
|
|
|
|
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
source = soup.find_all('a', limit=2)[1].get('href')
|
2017-10-20 16:23:27 -04:00
|
|
|
|
2019-09-17 02:59:35 -04:00
|
|
|
if source != '#':
|
|
|
|
ident = re.search('show/([0-9]+)', source).group(1)
|
2017-11-19 23:25:30 -05:00
|
|
|
post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True)
|
2019-04-28 11:07:36 -04:00
|
|
|
if (post['status'] == 'deleted'):
|
|
|
|
ident = re.search('#(\\d+)', post['delreason']).group(1)
|
|
|
|
post = await u.fetch('http://e621.net/post/show.json', params={'id': ident}, json=True)
|
2019-09-17 02:59:35 -04:00
|
|
|
source = f'https://e621.net/post/show/{post["id"]}'
|
2019-09-22 13:39:04 -04:00
|
|
|
similarity = re.search('\\d+', soup.find(string=re.compile('similarity'))).group(0)
|
2019-04-28 11:07:36 -04:00
|
|
|
|
2019-09-22 13:39:04 -04:00
|
|
|
return post, source, similarity + '% Match'
|
2017-10-20 16:23:27 -04:00
|
|
|
else:
|
|
|
|
raise IndexError
|
2017-10-21 16:39:11 -04:00
|
|
|
|
2017-10-20 16:23:27 -04:00
|
|
|
except IndexError:
|
2019-09-17 02:59:35 -04:00
|
|
|
content = await u.fetch(
|
|
|
|
'https://saucenao.com/search.php',
|
|
|
|
params={
|
|
|
|
'url': url,
|
|
|
|
'api_key': u.config['saucenao_api'],
|
|
|
|
'output_type': 2},
|
|
|
|
json=True)
|
|
|
|
result = content['results'][0]
|
|
|
|
if 'author_name' in result['data']:
|
|
|
|
artist = 'author_name'
|
|
|
|
elif 'member_name' in result['data']:
|
|
|
|
artist = 'member_name'
|
|
|
|
else:
|
|
|
|
artist = 'creator'
|
|
|
|
post = {
|
|
|
|
'file_url': result['header']['thumbnail'],
|
|
|
|
'artist': [result['data'][artist]],
|
|
|
|
'score': 'SauceNAO'}
|
|
|
|
source = result['data']['ext_urls'][0]
|
2019-09-22 13:39:04 -04:00
|
|
|
similarity = re.search('(\\d+)\\.', result['header']['similarity']).group(1)
|
2019-09-17 02:59:35 -04:00
|
|
|
|
2019-09-22 13:39:04 -04:00
|
|
|
if int(similarity) >= 55:
|
|
|
|
return post, source, similarity + '% Match'
|
2017-10-12 22:30:40 -04:00
|
|
|
|
2019-09-17 02:59:35 -04:00
|
|
|
raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1))
|
|
|
|
|
|
|
|
except (AttributeError, ValueError, KeyError):
|
|
|
|
raise exc.MissingArgument
|
2017-10-16 02:06:33 -04:00
|
|
|
|
|
|
|
|
|
|
|
async def get_image(url):
|
2017-10-20 16:23:27 -04:00
|
|
|
content = await u.fetch(url)
|
2017-10-16 02:06:33 -04:00
|
|
|
|
2017-10-20 16:23:27 -04:00
|
|
|
value = html.fromstring(content).xpath(
|
|
|
|
'string(/html/body/div[@id="content"]/div[@id="post-view"]/div[@class="content"]/div[2]/img/@src)')
|
2017-10-16 02:06:33 -04:00
|
|
|
|
2017-10-20 16:23:27 -04:00
|
|
|
return value
|