Source code for libs.urlextractor
import requests
from time import sleep
from lxml import html
# from random import randint
[docs]def next_cursor_selenium(page_data: str) -> str:
'''
:param str page_data: raw page source data of the page
:return: next page cursor
:rtype: str
:exception: IndexError - if no next page or any other error returns 0
:example: "/tag/steamprofile?cursor=MTQwYWI2MjA9NCY1OTBhY2FkMD03MiZkMTc0YjZiYz1OJTJGQQ"
| Made for selenuim version
| Accepts a deviant page source , searches for the next page cursor , and returns the cursor part
| if no next page or any other error returns 0
'''
try:
page_data = html.fromstring(page_data)
next_page = page_data.xpath('//*[text()="Next"]//@href')
return next_page[0]
except IndexError:
return 0 # type: ignore
[docs]def url_cleaner(steam_url: str) -> str:
'''
:param str steamurl: Steam url
:return: Steam url without filter tag
:rtype: str
| Accepts a steam url and removes the filter part of the url and returns the url
| Some urls have a filter part which is not needed for the program to work, this function removes the filter part and returns the url
| if no filter part is found returns the same url
| Removes # from the url as well
| converts http to https
'''
remove_stf = ["#", "]", "["]
for item in remove_stf:
steam_url = steam_url.replace(item, "")
steam_url = "https://"+steam_url[steam_url.find("s",6):]
position_filter = steam_url.find('?')
if position_filter == -1:
return steam_url
return steam_url[:position_filter]
[docs]def next_cursor_requests(base_url: str, cookie) -> str:
'''
:param str base_url: Deviant art page url
:param cookie cookie: cookie of the parent page
:return: next page cursor
:rtype: str
:example: "/tag/steamprofile?cursor=MTQwYWI2MjA9NCY1OTBhY2FkMD03MiZkMTc0YjZiYz1OJTJGQQ"
| Made for requests version
| Accepts a deviant url along with its parent cookie, searches for the next page cursor, and returns the cursor part
| if no next page or any other error returns 0
.. role:: raw-html(raw)
:format: html
.. deprecated:: 0.0.1
:raw-html:`<br />`
**Use :func:`deviantartapi.nextcursor_selenium` instead**
outdated since Deviantart changed their website and introduced login system,also
the requests version doesn't render js
'''
try:
page = requests.get(base_url, cookies=cookie)
sleep(0.5)
pagedata = html.fromstring(page.content)
nextpage = pagedata.xpath('//*[text()="Next"]//@href')
return nextpage[0]
except IndexError:
return 0 # type: ignore