Source code for libs.oldlibs

import json
import os
import requests

from bs4 import BeautifulSoup
from time import sleep
from libs import urlextractor
from json2html import json2html


[docs]def htmlgen(jsonfile) -> None:
    """
    :param str jsonfile: json file name(path)

    :return: None

    .. role:: raw-html(raw)
       :format: html

    .. deprecated:: 0.0.1
       :raw-html:`<br />`
       **Only kept as a reference as pandas will use different methods**


    | Generates and opens a html file for the given jsonfile 
    | A html page is generated and opened.
    | Note: A defualt brower must be set by windows 



    """
    if (os.path.getsize(jsonfile)) > 0:
        with open(jsonfile) as json_file:
            data = json.load(json_file)
        keys = data.keys()
        latesthtmldict = {}
        for key in keys:
            k = f'<a href="{key}">{key}</a>'
            latesthtmldict[k] = data[key]
        name = jsonfile.split('.')
        htmltabel = json2html.convert(
            json=latesthtmldict, escape=False)  # type: ignore
        writefile = open(f"{name[0]}.html", "w")
        writefile.write("<center>"+htmltabel+"</center></div>")  # type: ignore
        writefile.close()
        os.system(f"start {name[0]}.html")


[docs]def deviant_scrapper_pages(self, baseurl: str, nextpage: int = 2) -> list:
    '''

        :param str baseurl:  Deviant art page url
        :param int nextpage:  Number of pages to visit
        :return: - list of all the art page links
        :rtype: list

        | Accepts a deviant url and searches for the sub deviant art links
        | Then next page cursor is searched and the function is called again with the next page url
        | each page returns 24 links, then 24*nextpage links will be returned
        | so if nexpage is 2 then 48 links are returned

        .. role:: raw-html(raw)
           :format: html

        .. deprecated:: 0.0.1
            :raw-html:`<br />`
            **Use :func:`deviantartapi.selenium_scrapper` instead
            outdated since Deviantart changed their website and introduced login system**


    '''

    deviantartpages = []
    NextBtnClicker = 0
    page = requests.get(baseurl)
    page_cookie = page.cookies

    soup = BeautifulSoup(page.content, 'html.parser')
    print(f"Accessing page {NextBtnClicker+1} ={baseurl}....")
    for deviantdata in soup.findAll('div', {'class': "mWr4e"}):
        hrefval = deviantdata.select(
            'a[data-hook="deviation_link"]')[0]['href']
        deviantartpages.append(hrefval)
    # will retrive 24 urls
    NextBtnClicker += 1
    nexts = urlextractor.next_cursor_requests(baseurl, page_cookie)
    while NextBtnClicker <= nextpage-1 and nexts:

        joinedurl = "https://www.deviantart.com"+nexts
        print(f"Accessing page {NextBtnClicker+1} = {joinedurl}....")
        main_page = requests.get(joinedurl, cookies=page_cookie)
        mp_cookie = main_page.cookies
        soup = BeautifulSoup(main_page.content, 'html.parser')
        for deviantdata in soup.findAll('div', {'class': "mWr4e"}):
            hrefval = deviantdata.select(
                'a[data-hook="deviation_link"]')[0]['href']
            deviantartpages.append(hrefval)
        nexts = urlextractor.next_cursor_requests(joinedurl, mp_cookie)
        NextBtnClicker += 1
        sleep(2)

    return deviantartpages