Source code for libs.oldlibs
import json
import os
import requests
from bs4 import BeautifulSoup
from time import sleep
from libs import urlextractor
from json2html import json2html
[docs]def htmlgen(jsonfile) -> None:
"""
:param str jsonfile: json file name(path)
:return: None
.. role:: raw-html(raw)
:format: html
.. deprecated:: 0.0.1
:raw-html:`<br />`
**Only kept as a reference as pandas will use different methods**
| Generates and opens a html file for the given jsonfile
| A html page is generated and opened.
| Note: A defualt brower must be set by windows
"""
if (os.path.getsize(jsonfile)) > 0:
with open(jsonfile) as json_file:
data = json.load(json_file)
keys = data.keys()
latesthtmldict = {}
for key in keys:
k = f'<a href="{key}">{key}</a>'
latesthtmldict[k] = data[key]
name = jsonfile.split('.')
htmltabel = json2html.convert(
json=latesthtmldict, escape=False) # type: ignore
writefile = open(f"{name[0]}.html", "w")
writefile.write("<center>"+htmltabel+"</center></div>") # type: ignore
writefile.close()
os.system(f"start {name[0]}.html")
[docs]def deviant_scrapper_pages(self, baseurl: str, nextpage: int = 2) -> list:
'''
:param str baseurl: Deviant art page url
:param int nextpage: Number of pages to visit
:return: - list of all the art page links
:rtype: list
| Accepts a deviant url and searches for the sub deviant art links
| Then next page cursor is searched and the function is called again with the next page url
| each page returns 24 links, then 24*nextpage links will be returned
| so if nexpage is 2 then 48 links are returned
.. role:: raw-html(raw)
:format: html
.. deprecated:: 0.0.1
:raw-html:`<br />`
**Use :func:`deviantartapi.selenium_scrapper` instead
outdated since Deviantart changed their website and introduced login system**
'''
deviantartpages = []
NextBtnClicker = 0
page = requests.get(baseurl)
page_cookie = page.cookies
soup = BeautifulSoup(page.content, 'html.parser')
print(f"Accessing page {NextBtnClicker+1} ={baseurl}....")
for deviantdata in soup.findAll('div', {'class': "mWr4e"}):
hrefval = deviantdata.select(
'a[data-hook="deviation_link"]')[0]['href']
deviantartpages.append(hrefval)
# will retrive 24 urls
NextBtnClicker += 1
nexts = urlextractor.next_cursor_requests(baseurl, page_cookie)
while NextBtnClicker <= nextpage-1 and nexts:
joinedurl = "https://www.deviantart.com"+nexts
print(f"Accessing page {NextBtnClicker+1} = {joinedurl}....")
main_page = requests.get(joinedurl, cookies=page_cookie)
mp_cookie = main_page.cookies
soup = BeautifulSoup(main_page.content, 'html.parser')
for deviantdata in soup.findAll('div', {'class': "mWr4e"}):
hrefval = deviantdata.select(
'a[data-hook="deviation_link"]')[0]['href']
deviantartpages.append(hrefval)
nexts = urlextractor.next_cursor_requests(joinedurl, mp_cookie)
NextBtnClicker += 1
sleep(2)
return deviantartpages