1
0
Fork 0
mirror of https://github.com/myned/modufur.git synced 2024-12-27 07:17:28 +00:00
modufur/src/main/utils/scraper.py

38 lines
880 B
Python
Raw Normal View History

import asyncio
import re
2017-09-24 15:05:28 +00:00
from bs4 import BeautifulSoup
from lxml import html
2017-09-24 15:05:28 +00:00
from misc import exceptions as exc
from utils import utils as u
2017-09-24 15:05:28 +00:00
2017-10-16 06:06:33 +00:00
async def get_post(url):
await asyncio.sleep(u.RATE_LIMIT)
2017-10-20 20:23:27 +00:00
content = await u.fetch('http://iqdb.harry.lu', params={'url': url})
2017-10-15 01:54:29 +00:00
try:
2017-10-20 20:23:27 +00:00
value = BeautifulSoup(content, 'html.parser').find_all('a')[1].get('href')
if value != '#':
return value
else:
raise IndexError
2017-10-21 20:39:11 +00:00
2017-10-20 20:23:27 +00:00
except IndexError:
try:
raise exc.MatchError(re.search('\/([^\/]+)$', url).group(1))
2017-10-20 20:23:27 +00:00
except AttributeError:
raise exc.MissingArgument
2017-10-16 06:06:33 +00:00
async def get_image(url):
2017-10-20 20:23:27 +00:00
content = await u.fetch(url)
2017-10-16 06:06:33 +00:00
2017-10-20 20:23:27 +00:00
value = html.fromstring(content).xpath(
'string(/html/body/div[@id="content"]/div[@id="post-view"]/div[@class="content"]/div[2]/img/@src)')
2017-10-16 06:06:33 +00:00
2017-10-20 20:23:27 +00:00
return value