Source code for libs.urlextractor

import requests
from time import sleep
from lxml import html
# from random import randint



[docs]def next_cursor_selenium(page_data: str) -> str: ''' :param str page_data: raw page source data of the page :return: next page cursor :rtype: str :exception: IndexError - if no next page or any other error returns 0 :example: "/tag/steamprofile?cursor=MTQwYWI2MjA9NCY1OTBhY2FkMD03MiZkMTc0YjZiYz1OJTJGQQ" | Made for selenuim version | Accepts a deviant page source , searches for the next page cursor , and returns the cursor part | if no next page or any other error returns 0 ''' try: page_data = html.fromstring(page_data) next_page = page_data.xpath('//*[text()="Next"]//@href') return next_page[0] except IndexError: return 0 # type: ignore
[docs]def url_cleaner(steam_url: str) -> str: ''' :param str steamurl: Steam url :return: Steam url without filter tag :rtype: str | Accepts a steam url and removes the filter part of the url and returns the url | Some urls have a filter part which is not needed for the program to work, this function removes the filter part and returns the url | if no filter part is found returns the same url | Removes # from the url as well | converts http to https ''' remove_stf = ["#", "]", "["] for item in remove_stf: steam_url = steam_url.replace(item, "") steam_url = "https://"+steam_url[steam_url.find("s",6):] position_filter = steam_url.find('?') if position_filter == -1: return steam_url return steam_url[:position_filter]
[docs]def next_cursor_requests(base_url: str, cookie) -> str: ''' :param str base_url: Deviant art page url :param cookie cookie: cookie of the parent page :return: next page cursor :rtype: str :example: "/tag/steamprofile?cursor=MTQwYWI2MjA9NCY1OTBhY2FkMD03MiZkMTc0YjZiYz1OJTJGQQ" | Made for requests version | Accepts a deviant url along with its parent cookie, searches for the next page cursor, and returns the cursor part | if no next page or any other error returns 0 .. role:: raw-html(raw) :format: html .. deprecated:: 0.0.1 :raw-html:`<br />` **Use :func:`deviantartapi.nextcursor_selenium` instead** outdated since Deviantart changed their website and introduced login system,also the requests version doesn't render js ''' try: page = requests.get(base_url, cookies=cookie) sleep(0.5) pagedata = html.fromstring(page.content) nextpage = pagedata.xpath('//*[text()="Next"]//@href') return nextpage[0] except IndexError: return 0 # type: ignore