Skip to content

Commit

Permalink
Migrate to PyExecJS from js2py
Browse files Browse the repository at this point in the history
- Fixes lnmtl, rewayatclub
  • Loading branch information
dipu-bd committed Jan 3, 2025
1 parent 36e153a commit 86ac5a0
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 80 deletions.
2 changes: 1 addition & 1 deletion lncrawl/core/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def init_scraper(self, session: Optional[Session] = None):
# debug=True,
# delay=10,
ssl_context=ctx,
interpreter="js2py",
# interpreter="nodejs",
)
except Exception:
logger.exception("Failed to initialize cloudscraper")
Expand Down
2 changes: 1 addition & 1 deletion requirements-app.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ requests>=2.20.0,<3.0.0
python-slugify>=4.0.0,<9.0.0
colorama>=0.4.0,<0.5.0
tqdm>=4.60,<5.0
js2py>=0.71
PyExecJS>=1.5.1,<2.0.0
ebooklib>=0.17.0,<1.0.0
pillow>=6.0.0
cloudscraper>=1.2.71
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ requests>=2.20.0,<3.0.0
python-slugify>=4.0.0,<9.0.0
colorama>=0.4.0,<0.5.0
tqdm>=4.60,<5.0
js2py>=0.71
PyExecJS>=1.5.1,<2.0.0
ebooklib>=0.17.0,<1.0.0
pillow>=6.0.0
cloudscraper>=1.2.71
Expand Down
39 changes: 19 additions & 20 deletions sources/ar/rewayatclub.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import math

import js2py
import execjs
from bs4.element import Tag

from lncrawl.core.crawler import Crawler
Expand All @@ -25,19 +25,11 @@ def read_novel_info(self):
self.is_rtl = True

soup = self.get_soup(self.novel_url)
script = soup.find(
lambda tag: isinstance(tag, Tag)
and tag.name == "script"
and tag.text.startswith("window.__NUXT__")
)
assert isinstance(script, Tag)

data = js2py.eval_js(script.text).to_dict()
assert isinstance(data, dict)
data = self.extract_nuxt_data(soup)

novel_info = data["fetch"][0]["novel"]
pagination = data["fetch"][1]["pagination"]
per_page = len(data["fetch"][1]["chapters"])
pagination = data["fetch"][0]["pagination"]
per_page = len(data["fetch"][0]["chapters"])

self.novel_title = novel_info["arabic"]
logger.info("Novel title: %s", self.novel_title)
Expand Down Expand Up @@ -81,14 +73,7 @@ def read_novel_info(self):

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])
script = soup.find(
lambda tag: isinstance(tag, Tag)
and tag.name == "script"
and tag.text.startswith("window.__NUXT__")
)
assert isinstance(script, Tag)

data = js2py.eval_js(script.text).to_dict()
data = self.extract_nuxt_data(soup)

contents = data["fetch"][0]["contentParts"]
contents = [x["content"] for y in contents for x in y]
Expand All @@ -97,3 +82,17 @@ def download_chapter_body(self, chapter):
html = html.replace("<span>", "").replace("</span>", "")
body = self.make_soup(html).find("body")
return self.cleaner.extract_contents(body)

def extract_nuxt_data(self, soup) -> dict:
script = soup.find(
lambda tag: isinstance(tag, Tag)
and tag.name == "script"
and tag.text.startswith("window.__NUXT__")
)
assert isinstance(script, Tag)
script_content = script.text.replace('window.__NUXT__=', '')[:-1]

data = execjs.eval(script_content)
assert isinstance(data, dict)

return data
9 changes: 5 additions & 4 deletions sources/en/l/lnmtl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from concurrent import futures

import js2py
import execjs
from bs4 import BeautifulSoup

from lncrawl.core.crawler import Crawler
Expand Down Expand Up @@ -84,9 +84,10 @@ def parse_volume_list(self, soup):
script = soup.find(name="main").find_next_sibling(name="script").string

try:
data = js2py.eval_js(
"(function() {" + script + "return window.lnmtl;})()"
).to_dict()
data = execjs.eval(
"(function() {var window = { lnmtl: {} }; var lnmtl = window.lnmtl;" + script + "return window.lnmtl;})()"
)
assert isinstance(data, dict)
for i, vol in enumerate(data["volumes"]):
title = vol.get("title", "") or ""
title = re.sub(r"[^\u0000-\u00FF]", "", title)
Expand Down
4 changes: 2 additions & 2 deletions sources/en/n/novelmao.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import logging
from urllib.parse import urlencode
import js2py
import execjs
from bs4.element import Tag

from lncrawl.core.crawler import Crawler
Expand All @@ -26,7 +26,7 @@ def read_novel_info(self):
assert isinstance(possible_script, Tag)
script_text = possible_script.get_text()

data = js2py.eval_js(script_text)
data = execjs.eval(script_text)
self.novel_title = data[2]["name"]
self.novel_cover = data[2]["image"]
self.novel_author = data[2]["author"]["name"]
Expand Down
8 changes: 3 additions & 5 deletions sources/en/r/ranobes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import logging
import re
import js2py
import execjs
from typing import Generator, Union

from bs4 import BeautifulSoup, Tag
Expand All @@ -25,8 +25,6 @@ class RanobeLibCrawler(SearchableBrowserTemplate):
"https://ranobes.net/",
"http://ranobes.net/",
]
has_manga = False
has_mtl = False

def initialize(self) -> None:
self.cleaner.bad_css.update([".free-support", 'div[id^="adfox_"]'])
Expand Down Expand Up @@ -108,7 +106,7 @@ def parse_chapter_list(
and tag.text.startswith("window.__DATA__")
)
assert isinstance(script, Tag)
data = js2py.eval_js(script.text).to_dict()
data = execjs.eval(script.text)
assert isinstance(data, dict)

pages_count = data["pages_count"]
Expand All @@ -131,7 +129,7 @@ def parse_chapter_list(
)
assert isinstance(script, Tag)

data = js2py.eval_js(script.text).to_dict()
data = execjs.eval(script.text)
assert isinstance(data, dict)

for chapter in reversed(data["chapters"]):
Expand Down
94 changes: 48 additions & 46 deletions sources/en/r/royalroad.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,54 @@

class RoyalRoadCrawler(Crawler):
base_url = "https://www.royalroad.com/",
watermark_set = set("This book's true home is on another platform. Check it out there for the real experience.",
"This tale has been unlawfully lifted from Royal Road. If you spot it on Amazon, please report it.",
"This novel's true home is a different platform. Support the author by finding it there.",
"Stolen from its rightful place, this narrative is not meant to be on Amazon; report any sightings.",
"If you discover this tale on Amazon, be aware that it has been stolen. Please report the violation.",
"If you find this story on Amazon, be aware that it has been stolen. Please report the infringement.",
"Enjoying this book? Seek out the original to ensure the author gets credit.",
"Did you know this text is from a different site? Read the official version to support the creator.",
"The tale has been illicitly lifted; should you spot it on Amazon, report the violation.",
"The tale has been taken without authorization; if you see it on Amazon, report the incident.",
"Ensure your favorite authors get the support they deserve. Read this novel on Royal Road.",
"Reading on Amazon or a pirate site? This novel is from Royal Road. Support the author by reading it there.",
"The tale has been stolen; if detected on Amazon, report the violation.",
"A case of content theft: this narrative is not rightfully on Amazon; if you spot it, report the violation.",
"Love this novel? Read it on Royal Road to ensure the author gets credit.",
"The story has been stolen; if detected on Amazon, report the violation.",
"If you come across this story on Amazon, be aware that it has been stolen from Royal Road. Please report it.",
"Stolen from its original source, this story is not meant to be on Amazon; report any sightings.",
"The author's narrative has been misappropriated; report any instances of this story on Amazon.",
"If you come across this story on Amazon, it's taken without permission from the author. Report it.",
"The author's tale has been misappropriated; report any instances of this story on Amazon.",
"Stolen from its rightful author, this tale is not meant to be on Amazon; report any sightings.",
"Stolen content alert: this content belongs on Royal Road. Report any occurrences.",
"Did you know this story is from Royal Road? Read the official version for free and support the author.",
"Unauthorized duplication: this tale has been taken without consent. Report sightings.",
"This narrative has been unlawfully taken from Royal Road. If you see it on Amazon, please report it.",
"Stolen content warning: this content belongs on Royal Road. Report any occurrences.",
"Help support creative writers by finding and reading their stories on the original site.",
"If you stumble upon this narrative on Amazon, it's taken without the author's consent. Report it.",
"If you discover this narrative on Amazon, be aware that it has been stolen. Please report the violation.",
"If you spot this narrative on Amazon, know that it has been stolen. Report the violation.",
"This tale has been unlawfully lifted without the author's consent. Report any appearances on Amazon.",
"If you encounter this tale on Amazon, note that it's taken without the author's consent. Report it.",
"This tale has been pilfered from Royal Road. If found on Amazon, kindly file a report.",
"This story has been stolen from Royal Road. If you read it on Amazon, please report it",
"Enjoying the story? Show your support by reading it on the official site.",
"The genuine version of this novel can be found on another site. Support the author by reading it there.",
"This story is posted elsewhere by the author. Help them out by reading the authentic version.",
"Love what you're reading? Discover and support the author on the platform they originally published on.",
"Stolen story; please report.",
"The narrative has been stolen; if detected on Amazon, report the infringement.",
"Support the creativity of authors by visiting the original site for this novel and more.",
"This tale has been unlawfully obtained from Royal Road. If you discover it on Amazon, kindly report it.",
"Reading on this site? This novel is published elsewhere. Support the author by seeking out the original.",
"Stolen from Royal Road, this story should be reported if encountered on Amazon.",
"This story originates from a different website. Ensure the author gets the support they deserve by reading it there.")
watermark_set = {
"This book's true home is on another platform. Check it out there for the real experience.",
"This tale has been unlawfully lifted from Royal Road. If you spot it on Amazon, please report it.",
"This novel's true home is a different platform. Support the author by finding it there.",
"Stolen from its rightful place, this narrative is not meant to be on Amazon; report any sightings.",
"If you discover this tale on Amazon, be aware that it has been stolen. Please report the violation.",
"If you find this story on Amazon, be aware that it has been stolen. Please report the infringement.",
"Enjoying this book? Seek out the original to ensure the author gets credit.",
"Did you know this text is from a different site? Read the official version to support the creator.",
"The tale has been illicitly lifted; should you spot it on Amazon, report the violation.",
"The tale has been taken without authorization; if you see it on Amazon, report the incident.",
"Ensure your favorite authors get the support they deserve. Read this novel on Royal Road.",
"Reading on Amazon or a pirate site? This novel is from Royal Road. Support the author by reading it there.",
"The tale has been stolen; if detected on Amazon, report the violation.",
"A case of content theft: this narrative is not rightfully on Amazon; if you spot it, report the violation.",
"Love this novel? Read it on Royal Road to ensure the author gets credit.",
"The story has been stolen; if detected on Amazon, report the violation.",
"If you come across this story on Amazon, be aware that it has been stolen from Royal Road. Please report it.",
"Stolen from its original source, this story is not meant to be on Amazon; report any sightings.",
"The author's narrative has been misappropriated; report any instances of this story on Amazon.",
"If you come across this story on Amazon, it's taken without permission from the author. Report it.",
"The author's tale has been misappropriated; report any instances of this story on Amazon.",
"Stolen from its rightful author, this tale is not meant to be on Amazon; report any sightings.",
"Stolen content alert: this content belongs on Royal Road. Report any occurrences.",
"Did you know this story is from Royal Road? Read the official version for free and support the author.",
"Unauthorized duplication: this tale has been taken without consent. Report sightings.",
"This narrative has been unlawfully taken from Royal Road. If you see it on Amazon, please report it.",
"Stolen content warning: this content belongs on Royal Road. Report any occurrences.",
"Help support creative writers by finding and reading their stories on the original site.",
"If you stumble upon this narrative on Amazon, it's taken without the author's consent. Report it.",
"If you discover this narrative on Amazon, be aware that it has been stolen. Please report the violation.",
"If you spot this narrative on Amazon, know that it has been stolen. Report the violation.",
"This tale has been unlawfully lifted without the author's consent. Report any appearances on Amazon.",
"If you encounter this tale on Amazon, note that it's taken without the author's consent. Report it.",
"This tale has been pilfered from Royal Road. If found on Amazon, kindly file a report.",
"This story has been stolen from Royal Road. If you read it on Amazon, please report it",
"Enjoying the story? Show your support by reading it on the official site.",
"The genuine version of this novel can be found on another site. Support the author by reading it there.",
"This story is posted elsewhere by the author. Help them out by reading the authentic version.",
"Love what you're reading? Discover and support the author on the platform they originally published on.",
"Stolen story; please report.",
"The narrative has been stolen; if detected on Amazon, report the infringement.",
"Support the creativity of authors by visiting the original site for this novel and more.",
"This tale has been unlawfully obtained from Royal Road. If you discover it on Amazon, kindly report it.",
"Reading on this site? This novel is published elsewhere. Support the author by seeking out the original.",
"Stolen from Royal Road, this story should be reported if encountered on Amazon.",
"This story originates from a different website. Ensure the author gets the support they deserve by reading it there.",
}

def initialize(self):
self.init_executor(1)
Expand Down

0 comments on commit 86ac5a0

Please sign in to comment.