1
0
Fork 0
mirror of https://github.com/myned/modufur.git synced 2024-11-01 13:02:38 +00:00
modufur/tools/scraper.py

73 lines
2.1 KiB
Python
Raw Normal View History

2022-02-21 07:10:57 +00:00
import aiohttp
import tldextract
import lightbulb
import pysaucenao
import config as c
2022-02-21 20:53:53 +00:00
plugin = lightbulb.Plugin("scraper")
sauce = pysaucenao.SauceNao(api_key=c.config["saucenao"], priority=(29, 40, 41)) # e621 > Fur Affinity > Twitter
2022-02-21 07:10:57 +00:00
2022-03-04 05:59:46 +00:00
# Return list of matches
2022-02-21 07:10:57 +00:00
async def reverse(urls):
2022-03-04 06:00:06 +00:00
return [await saucenao(url) or await kheina(url) for url in urls]
2022-02-21 07:10:57 +00:00
2022-02-21 20:53:53 +00:00
2022-03-04 06:00:06 +00:00
# Query SauceNAO
async def saucenao(url):
2022-02-21 07:10:57 +00:00
try:
results = await sauce.from_url(url)
except pysaucenao.FileSizeLimitException:
raise pysaucenao.FileSizeLimitException(url)
except pysaucenao.ImageSizeException:
raise pysaucenao.ImageSizeException(url)
except pysaucenao.InvalidImageException:
raise pysaucenao.InvalidImageException(url)
2022-02-21 20:53:53 +00:00
return (
{
"url": results[0].url,
2022-02-23 22:20:59 +00:00
"artist": results[0].title
if results[0].author_name == "Unknown"
else ", ".join(results[0].authors) or "Unknown",
2022-02-21 20:53:53 +00:00
"thumbnail": results[0].thumbnail,
"similarity": round(results[0].similarity),
"source": tldextract.extract(results[0].index).domain,
}
if results
else None
)
2022-02-21 07:10:57 +00:00
2022-03-04 06:00:06 +00:00
# Query Kheina
async def kheina(url):
content = await post("https://api.kheina.com/v1/search", {"url": url})
2022-02-21 19:43:53 +00:00
2022-02-21 20:53:53 +00:00
if content["results"][0]["similarity"] < 50:
2022-02-21 19:43:53 +00:00
return None
return {
2022-02-21 20:53:53 +00:00
"url": content["results"][0]["sources"][0]["source"],
"artist": content["results"][0]["sources"][0]["artist"] or "Unknown",
2022-02-23 23:47:12 +00:00
"thumbnail": f"https://cdn.kheina.com/file/kheinacom/{content['results'][0]['sources'][0]['sha1']}.jpg",
2022-02-21 20:53:53 +00:00
"similarity": round(content["results"][0]["similarity"]),
"source": tldextract.extract(content["results"][0]["sources"][0]["source"]).domain,
2022-02-21 19:43:53 +00:00
}
2022-02-21 07:10:57 +00:00
2022-02-21 20:53:53 +00:00
2022-03-04 06:00:06 +00:00
# Return post response as json
async def post(url, data):
2022-02-21 07:10:57 +00:00
async with aiohttp.ClientSession() as session:
2022-02-22 19:11:35 +00:00
async with session.post(url, data=data) as response:
2022-02-21 07:10:57 +00:00
return await response.json() if response.status == 200 else None
def load(bot):
bot.add_plugin(plugin)
2022-02-21 20:53:53 +00:00
2022-02-21 07:10:57 +00:00
def unload(bot):
bot.remove_plugin(plugin)