2019-09-23 02:06:13 -04:00
|
|
|
import aiohttp
|
|
|
|
import ast
|
2017-10-15 15:32:35 -04:00
|
|
|
import re
|
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
from bs4 import BeautifulSoup
|
2019-09-24 19:12:31 -04:00
|
|
|
import lxml
|
2017-11-20 02:12:56 -05:00
|
|
|
from hurry.filesize import size, alternative
|
2017-10-12 22:30:40 -04:00
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
from misc import exceptions as exc
|
2017-10-12 22:30:40 -04:00
|
|
|
from utils import utils as u
|
|
|
|
|
2017-09-24 11:05:28 -04:00
|
|
|
|
2019-09-23 02:06:13 -04:00
|
|
|
# async def get_harry(url):
|
|
|
|
# content = await u.fetch('https://iqdb.harry.lu', params={'url': url})
|
2019-09-24 19:13:10 -04:00
|
|
|
# soup = BeautifulSoup(content, 'html5lib')
|
2019-09-23 02:06:13 -04:00
|
|
|
#
|
|
|
|
# if soup.find('div', id='show1').string is 'Not the right one? ':
|
|
|
|
# parent = soup.find('th', string='Probable match:').parent.parent
|
|
|
|
#
|
|
|
|
# post = await u.fetch(
|
|
|
|
# 'https://e621.net/post/show.json',
|
|
|
|
# params={'id': re.search('show/([0-9]+)', parent.tr.td.a.get('href')).group(1)},
|
|
|
|
# json=True)
|
|
|
|
# if (post['status'] == 'deleted'):
|
|
|
|
# post = await u.fetch(
|
|
|
|
# 'https://e621.net/post/show.json',
|
|
|
|
# params={'id': re.search('#(\\d+)', post['delreason']).group(1)},
|
|
|
|
# json=True)
|
|
|
|
#
|
|
|
|
# result = {
|
|
|
|
# 'source': f'https://e621.net/post/show/{post["id"]}',
|
|
|
|
# 'artist': ', '.join(post['artist']),
|
|
|
|
# 'thumbnail': parent.td.a.img.get('src'),
|
|
|
|
# 'similarity': re.search('\\d+', parent.tr[4].td.string).group(0),
|
|
|
|
# 'database': 'Harry.lu'
|
|
|
|
# }
|
|
|
|
#
|
|
|
|
# return result
|
|
|
|
# else:
|
|
|
|
# return False
|
|
|
|
|
|
|
|
|
2019-09-24 20:03:06 -04:00
|
|
|
async def query_kheina(url):
|
2019-09-24 19:13:10 -04:00
|
|
|
content = await u.fetch('https://kheina.com', params={'url': url}, text=True)
|
2019-09-23 02:06:13 -04:00
|
|
|
|
2019-09-29 23:57:00 -04:00
|
|
|
for e in ('"', '''):
|
|
|
|
content = content.replace(e, '')
|
2019-09-29 23:57:40 -04:00
|
|
|
content = re.sub('<a href="/cdn-cgi/l/email-protection".+</a>', '', content)
|
|
|
|
|
2019-09-24 19:13:10 -04:00
|
|
|
soup = BeautifulSoup(content, 'html5lib')
|
2019-10-01 21:59:32 -04:00
|
|
|
|
|
|
|
if soup.find('data', id='error'):
|
|
|
|
return False
|
|
|
|
|
2019-09-29 23:57:00 -04:00
|
|
|
results = soup.find('data', id='results').string
|
2019-09-24 19:13:10 -04:00
|
|
|
results = ast.literal_eval(results)
|
|
|
|
iqdbdata = soup.find('data', id='iqdbdata').string
|
|
|
|
iqdbdata = ast.literal_eval(iqdbdata)
|
|
|
|
|
2019-09-24 22:18:27 -04:00
|
|
|
similarity = int(float(iqdbdata[0]['similarity']))
|
|
|
|
if similarity < 55:
|
|
|
|
return False
|
|
|
|
|
2019-09-24 19:13:10 -04:00
|
|
|
for e in results:
|
|
|
|
if iqdbdata[0]['iqdbid'] in e:
|
|
|
|
match = e
|
|
|
|
break
|
2019-09-23 02:06:13 -04:00
|
|
|
|
|
|
|
result = {
|
2019-10-19 13:09:55 -04:00
|
|
|
'source': match[3].replace('\\', ''),
|
2019-09-24 19:13:10 -04:00
|
|
|
'artist': match[4],
|
|
|
|
'thumbnail': f'https://f002.backblazeb2.com/file/kheinacom/{match[1]}.jpg',
|
2019-09-24 22:18:27 -04:00
|
|
|
'similarity': str(similarity),
|
2019-09-23 02:06:13 -04:00
|
|
|
'database': 'Kheina'
|
2019-09-24 22:18:27 -04:00
|
|
|
}
|
2019-09-23 02:06:13 -04:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2019-09-24 20:03:06 -04:00
|
|
|
async def query_saucenao(url):
|
2019-09-23 02:06:13 -04:00
|
|
|
content = await u.fetch(
|
|
|
|
'https://saucenao.com/search.php',
|
|
|
|
params={'url': url, 'api_key': u.config['saucenao_api'], 'output_type': 2},
|
|
|
|
json=True)
|
2019-09-24 19:13:10 -04:00
|
|
|
|
2019-11-01 19:18:25 -04:00
|
|
|
if content['header'].get('message', '') in (
|
|
|
|
'Access to specified file was denied... ;_;',
|
|
|
|
'Problem with remote server...'):
|
2019-10-19 13:09:34 -04:00
|
|
|
raise exc.ImageError
|
|
|
|
|
2019-09-24 20:05:18 -04:00
|
|
|
match = content['results'][0]
|
2019-09-23 02:06:13 -04:00
|
|
|
|
2019-09-24 22:18:27 -04:00
|
|
|
similarity = int(float(match['header']['similarity']))
|
|
|
|
if similarity < 55:
|
|
|
|
return False
|
2019-09-24 22:21:10 -04:00
|
|
|
|
|
|
|
source = match['data']['ext_urls'][0]
|
|
|
|
for e in match['data']['ext_urls']:
|
|
|
|
if 'e621' in e:
|
|
|
|
source = e
|
|
|
|
break
|
|
|
|
|
|
|
|
artist = 'Unknown'
|
|
|
|
for e in (
|
|
|
|
'author_name',
|
|
|
|
'member_name',
|
|
|
|
'creator'
|
|
|
|
):
|
|
|
|
if e in match['data']:
|
|
|
|
artist = match['data'][e]
|
|
|
|
break
|
2019-09-23 02:06:13 -04:00
|
|
|
|
|
|
|
result = {
|
2019-09-24 22:21:10 -04:00
|
|
|
'source': source,
|
|
|
|
'artist': artist,
|
2019-09-24 20:05:18 -04:00
|
|
|
'thumbnail': match['header']['thumbnail'],
|
2019-09-24 22:18:27 -04:00
|
|
|
'similarity': str(similarity),
|
2019-09-23 02:06:13 -04:00
|
|
|
'database': 'SauceNAO'
|
2019-09-24 22:21:10 -04:00
|
|
|
}
|
2019-09-23 02:06:13 -04:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2017-10-16 02:06:33 -04:00
|
|
|
async def get_post(url):
|
2017-11-20 02:12:56 -05:00
|
|
|
try:
|
2019-09-23 02:06:13 -04:00
|
|
|
content = await u.fetch(url, response=True)
|
|
|
|
filesize = int(content.headers['Content-Length'])
|
2017-11-20 02:12:56 -05:00
|
|
|
if filesize > 8192 * 1024:
|
|
|
|
raise exc.SizeError(size(filesize, system=alternative))
|
2018-11-03 17:00:36 -04:00
|
|
|
|
2019-09-24 20:03:06 -04:00
|
|
|
result = await query_kheina(url)
|
2019-09-24 22:18:27 -04:00
|
|
|
if not result:
|
2019-09-24 20:03:06 -04:00
|
|
|
result = await query_saucenao(url)
|
2019-09-24 22:18:27 -04:00
|
|
|
if not result:
|
2019-09-23 02:06:13 -04:00
|
|
|
raise exc.MatchError(re.search('\\/([^\\/]+)$', url).group(1))
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
except aiohttp.InvalidURL:
|
2019-09-17 02:59:35 -04:00
|
|
|
raise exc.MissingArgument
|
2017-10-16 02:06:33 -04:00
|
|
|
|
|
|
|
|
|
|
|
async def get_image(url):
|
2017-10-20 16:23:27 -04:00
|
|
|
content = await u.fetch(url)
|
2017-10-16 02:06:33 -04:00
|
|
|
|
2019-09-24 19:12:31 -04:00
|
|
|
value = lxml.html.fromstring(content).xpath(
|
2017-10-20 16:23:27 -04:00
|
|
|
'string(/html/body/div[@id="content"]/div[@id="post-view"]/div[@class="content"]/div[2]/img/@src)')
|
2017-10-16 02:06:33 -04:00
|
|
|
|
2017-10-20 16:23:27 -04:00
|
|
|
return value
|