add setup.py
This commit is contained in:
parent
1d92e0ec65
commit
c6a7d839d9
@ -13,4 +13,5 @@ try:
|
|||||||
vsetup_logging(log_level, logfile='', stream=sys.stderr)
|
vsetup_logging(log_level, logfile='', stream=sys.stderr)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
iMain(sys.argv[1:], bgui=False)
|
if __name__ == '__main__':
|
||||||
|
iMain(sys.argv[1:], bgui=False)
|
||||||
|
19
lookupdns.py
19
lookupdns.py
@ -1,9 +1,14 @@
|
|||||||
#!/usr/local/bin/python3.sh
|
#!/usr/local/bin/python3.sh
|
||||||
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*
|
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*
|
||||||
|
|
||||||
# Looks for urls https://dns.google/resolve?
|
"""
|
||||||
# and parses them to extract a magic field.
|
Looks for urls https://dns.google/resolve?
|
||||||
# https://dns.google/resolve?name=domain.name&type=TXT&cd=true&do=true
|
https://dns.google/resolve?name=domain.name&type=TXT&cd=true&do=true
|
||||||
|
and parses them to extract a magic field.
|
||||||
|
|
||||||
|
A good example of how you can parse json embedded in HTML with phantomjs.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@ -17,7 +22,7 @@ warnings.filterwarnings('ignore')
|
|||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
class LookFor(Render):
|
class LookFor(Render):
|
||||||
|
|
||||||
def __init__(self, app, do_print=True, do_save=False):
|
def __init__(self, app, do_print=True, do_save=False):
|
||||||
app.lfps = []
|
app.lfps = []
|
||||||
self._app = app
|
self._app = app
|
||||||
@ -37,7 +42,7 @@ class LookFor(Render):
|
|||||||
fp = fp[:i]
|
fp = fp[:i]
|
||||||
# threadsafe?
|
# threadsafe?
|
||||||
self._app.lfps.append(fp)
|
self._app.lfps.append(fp)
|
||||||
|
|
||||||
def _html_callback(self, *args):
|
def _html_callback(self, *args):
|
||||||
"""print(self, QPrinter, Callable[[bool], None])"""
|
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||||
if type(args[0]) is str:
|
if type(args[0]) is str:
|
||||||
@ -72,8 +77,8 @@ class LookFor(Render):
|
|||||||
self.we_run_this_tor_relay = False
|
self.we_run_this_tor_relay = False
|
||||||
LOG.warn(f"BAD {self.uri}")
|
LOG.warn(f"BAD {self.uri}")
|
||||||
return 2
|
return 2
|
||||||
|
|
||||||
def _loadFinished(self, result):
|
def _loadFinished(self, result):
|
||||||
LOG.debug(f"phantom.py: Loading finished {self.uri}")
|
LOG.debug(f"phantom.py: Loading finished {self.uri}")
|
||||||
self.toHtml(self._html_callback)
|
self.toHtml(self._html_callback)
|
||||||
|
|
||||||
|
109
phantompy.py
109
phantompy.py
@ -13,8 +13,8 @@ replacement for other bulky headless browser frameworks.
|
|||||||
|
|
||||||
If you have a display attached:
|
If you have a display attached:
|
||||||
|
|
||||||
./phantom.py [--pdf_output <pdf-file>] [--js_input <javascript-file>] <url-or-html-file>
|
./phantom.py [--pdf_output <pdf-file>] [--js_input <javascript-file>] <url-or-html-file>
|
||||||
|
|
||||||
If you don't have a display attached (i.e. on a remote server), you can use
|
If you don't have a display attached (i.e. on a remote server), you can use
|
||||||
xvfb-run, or don't add --show_gui - it should work without a display.
|
xvfb-run, or don't add --show_gui - it should work without a display.
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ CSS @media types, etc.
|
|||||||
Installation of dependencies in Debian Stretch is easy:
|
Installation of dependencies in Debian Stretch is easy:
|
||||||
|
|
||||||
apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
|
apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
|
||||||
|
|
||||||
Finding the equivalent for other OSes is an exercise that I leave to you.
|
Finding the equivalent for other OSes is an exercise that I leave to you.
|
||||||
|
|
||||||
|
|
||||||
@ -80,16 +80,16 @@ Given the following file /tmp/test.html
|
|||||||
document.getElementById('id1').innerHTML = "bar";
|
document.getElementById('id1').innerHTML = "bar";
|
||||||
</script>
|
</script>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
... and the following file /tmp/test.js:
|
... and the following file /tmp/test.js:
|
||||||
|
|
||||||
document.getElementById('id2').innerHTML = "baz";
|
document.getElementById('id2').innerHTML = "baz";
|
||||||
console.log("__PHANTOM_PY_DONE__");
|
console.log("__PHANTOM_PY_DONE__");
|
||||||
|
|
||||||
... and running this script (without attached display) ...
|
... and running this script (without attached display) ...
|
||||||
|
|
||||||
xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js
|
xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js
|
||||||
|
|
||||||
... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz".
|
... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz".
|
||||||
|
|
||||||
Note that the second occurrence of "foo" has been replaced by the web page's own
|
Note that the second occurrence of "foo" has been replaced by the web page's own
|
||||||
@ -130,8 +130,6 @@ from PyQt5.QtWidgets import QApplication
|
|||||||
from PyQt5.QtPrintSupport import QPrinter
|
from PyQt5.QtPrintSupport import QPrinter
|
||||||
from PyQt5.QtWebEngineWidgets import QWebEnginePage
|
from PyQt5.QtWebEngineWidgets import QWebEnginePage
|
||||||
|
|
||||||
from support_phantompy import vsetup_logging
|
|
||||||
|
|
||||||
global LOG
|
global LOG
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
@ -161,19 +159,19 @@ def prepare(sdir='/tmp'):
|
|||||||
</html>
|
</html>
|
||||||
""")
|
""")
|
||||||
LOG.debug(f"wrote {sfile} ")
|
LOG.debug(f"wrote {sfile} ")
|
||||||
|
|
||||||
class Render(QWebEnginePage):
|
class Render(QWebEnginePage):
|
||||||
def __init__(self, app, do_print=False, do_save=True):
|
def __init__(self, app, do_print=False, do_save=True):
|
||||||
app.ldone = []
|
app.ldone = []
|
||||||
self._app = app
|
self._app = app
|
||||||
self.do_print = do_print
|
self.do_print = do_print
|
||||||
self.do_save = do_save
|
self.do_save = do_save
|
||||||
self.percent = 0
|
self.percent = 0
|
||||||
self.uri = None
|
self.uri = None
|
||||||
self.jsfile = None
|
self.jsfile = None
|
||||||
self.htmlfile = None
|
self.htmlfile = None
|
||||||
self.pdffile = None
|
self.pdffile = None
|
||||||
QWebEnginePage.__init__(self)
|
QWebEnginePage.__init__(self)
|
||||||
|
|
||||||
def run(self, url, pdffile, htmlfile, jsfile):
|
def run(self, url, pdffile, htmlfile, jsfile):
|
||||||
self._app.lstart.append(id(self))
|
self._app.lstart.append(id(self))
|
||||||
@ -184,64 +182,65 @@ class Render(QWebEnginePage):
|
|||||||
self.pdffile = pdffile
|
self.pdffile = pdffile
|
||||||
self.outfile = pdffile or htmlfile
|
self.outfile = pdffile or htmlfile
|
||||||
LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}")
|
LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}")
|
||||||
qurl = QUrl.fromUserInput(url)
|
qurl = QUrl.fromUserInput(url)
|
||||||
|
|
||||||
# The PDF generation only happens when the special string __PHANTOM_PY_DONE__
|
# The PDF generation only happens when the special string __PHANTOM_PY_DONE__
|
||||||
# is sent to console.log(). The following JS string will be executed by
|
# is sent to console.log(). The following JS string will be executed by
|
||||||
# default, when no external JavaScript file is specified.
|
# default, when no external JavaScript file is specified.
|
||||||
self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);";
|
self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);";
|
||||||
|
|
||||||
if jsfile:
|
if jsfile:
|
||||||
try:
|
try:
|
||||||
with open(self.jsfile, 'rt') as f:
|
with open(self.jsfile, 'rt') as f:
|
||||||
self.js_contents = f.read()
|
self.js_contents = f.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.exception(f"error reading jsfile {self.jsfile}")
|
LOG.exception(f"error reading jsfile {self.jsfile}")
|
||||||
|
|
||||||
self.loadFinished.connect(self._loadFinished)
|
self.loadFinished.connect(self._loadFinished)
|
||||||
self.percent = 20
|
self.percent = 20
|
||||||
self.load(qurl)
|
self.load(qurl)
|
||||||
self.javaScriptConsoleMessage = self._onConsoleMessage
|
self.javaScriptConsoleMessage = self._onConsoleMessage
|
||||||
LOG.debug(f"phantom.py: loading 10")
|
LOG.debug(f"phantom.py: loading 10")
|
||||||
|
|
||||||
def _onConsoleMessage(self, *args):
|
def _onConsoleMessage(self, *args):
|
||||||
if len(args) > 3:
|
if len(args) > 3:
|
||||||
level, txt, lineno, filename = args
|
level, txt, lineno, filename = args
|
||||||
else:
|
else:
|
||||||
level = 1
|
level = 1
|
||||||
txt, lineno, filename = args
|
txt, lineno, filename = args
|
||||||
LOG.debug(f"CONSOLE {lineno} {txt} {filename}")
|
LOG.debug(f"CONSOLE {lineno} {txt} {filename}")
|
||||||
if "__PHANTOM_PY_DONE__" in txt:
|
if "__PHANTOM_PY_DONE__" in txt:
|
||||||
self.percent = 40
|
self.percent = 40
|
||||||
# If we get this magic string, it means that the external JS is done
|
# If we get this magic string, it means that the external JS is done
|
||||||
if self.do_save:
|
if self.do_save:
|
||||||
self.toHtml(self._html_callback)
|
self.toHtml(self._html_callback)
|
||||||
return
|
return
|
||||||
# drop through
|
# drop through
|
||||||
txt = "__PHANTOM_PY_SAVED__"
|
txt = "__PHANTOM_PY_SAVED__"
|
||||||
if "__PHANTOM_PY_SAVED__" in txt:
|
if "__PHANTOM_PY_SAVED__" in txt:
|
||||||
self.percent = 50
|
self.percent = 50
|
||||||
if self.do_print:
|
if self.do_print:
|
||||||
self._print()
|
self._print()
|
||||||
return
|
return
|
||||||
txt = "__PHANTOM_PY_PRINTED__"
|
txt = "__PHANTOM_PY_PRINTED__"
|
||||||
if "__PHANTOM_PY_PRINTED__" in txt:
|
if "__PHANTOM_PY_PRINTED__" in txt:
|
||||||
self.percent = 60
|
self.percent = 60
|
||||||
self._exit(level)
|
self._exit(level)
|
||||||
|
|
||||||
def _loadFinished(self, result):
|
def _loadFinished(self, result):
|
||||||
self.percent = 30
|
# RenderProcessTerminationStatus ?
|
||||||
LOG.info(f"phantom.py: _loadFinished {result} {self.percent}")
|
self.percent = 30
|
||||||
LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}")
|
LOG.info(f"phantom.py: _loadFinished {result} {self.percent}")
|
||||||
self.runJavaScript("document.documentElement.contentEditable=true")
|
LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}")
|
||||||
self.runJavaScript(self.js_contents)
|
self.runJavaScript("document.documentElement.contentEditable=true")
|
||||||
|
self.runJavaScript(self.js_contents)
|
||||||
|
|
||||||
def _html_callback(self, *args):
|
def _html_callback(self, *args):
|
||||||
"""print(self, QPrinter, Callable[[bool], None])"""
|
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||||
if type(args[0]) is str:
|
if type(args[0]) is str:
|
||||||
self._save(args[0])
|
self._save(args[0])
|
||||||
self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '')
|
self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '')
|
||||||
|
|
||||||
def _save(self, html):
|
def _save(self, html):
|
||||||
sfile = self.htmlfile
|
sfile = self.htmlfile
|
||||||
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
|
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
|
||||||
@ -267,7 +266,7 @@ class Render(QWebEnginePage):
|
|||||||
printer.setOutputFileName(sfile)
|
printer.setOutputFileName(sfile)
|
||||||
self.print(printer, self._printer_callback)
|
self.print(printer, self._printer_callback)
|
||||||
LOG.debug("phantom.py: Printed")
|
LOG.debug("phantom.py: Printed")
|
||||||
|
|
||||||
def _exit(self, val):
|
def _exit(self, val):
|
||||||
self.percent = 100
|
self.percent = 100
|
||||||
LOG.debug(f"phantom.py: Exiting with val {val}")
|
LOG.debug(f"phantom.py: Exiting with val {val}")
|
||||||
|
@ -13,6 +13,7 @@ from PyQt5.QtWidgets import (QProgressBar, QWidget, QVBoxLayout)
|
|||||||
|
|
||||||
from phantompy import Render
|
from phantompy import Render
|
||||||
# from lookupdns import LookFor as Render
|
# from lookupdns import LookFor as Render
|
||||||
|
from support_phantompy import vsetup_logging, omain_argparser
|
||||||
|
|
||||||
global LOG
|
global LOG
|
||||||
import logging
|
import logging
|
||||||
@ -35,7 +36,7 @@ class Widget(QtWidgets.QWidget):
|
|||||||
i = len(asyncio.all_tasks())
|
i = len(asyncio.all_tasks())
|
||||||
self._label.setText(str(i))
|
self._label.setText(str(i))
|
||||||
self.progress.setValue(int(text))
|
self.progress.setValue(int(text))
|
||||||
|
|
||||||
class ContextManager:
|
class ContextManager:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._seconds = 0
|
self._seconds = 0
|
||||||
@ -63,25 +64,22 @@ async def main(widget, app, ilen):
|
|||||||
app.exit()
|
app.exit()
|
||||||
# raise asyncio.CancelledError
|
# raise asyncio.CancelledError
|
||||||
return
|
return
|
||||||
LOG.debug(f"{app.ldone} {perc} {seconds}")
|
LOG.debug(f"{app.ldone} {seconds}")
|
||||||
except asyncio.CancelledError as ex:
|
except asyncio.CancelledError as ex:
|
||||||
LOG.debug("Task cancelled")
|
LOG.debug("Task cancelled")
|
||||||
|
|
||||||
def iMain(largs):
|
def iMain(largs):
|
||||||
parser = oMainArgparser()
|
parser = omain_argparser()
|
||||||
oargs = parser.parse_args(lArgs)
|
oargs = parser.parse_args(largs)
|
||||||
bgui=oargs.show_gui
|
bgui=oargs.show_gui
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from support_phantompy import vsetup_logging
|
|
||||||
d = int(os.environ.get('DEBUG', 0))
|
d = int(os.environ.get('DEBUG', 0))
|
||||||
if d > 0:
|
if d > 0:
|
||||||
vsetup_logging(10, stream=sys.stderr)
|
oargs.log_level = 10
|
||||||
else:
|
vsetup_logging(oargs.log_level, logfile='', stream=sys.stderr)
|
||||||
vsetup_logging(oargs.log_level, stream=sys.stderr)
|
|
||||||
vsetup_logging(log_level, logfile='', stream=sys.stderr)
|
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
app = QtWidgets.QApplication([])
|
app = QtWidgets.QApplication([])
|
||||||
app.lstart = []
|
app.lstart = []
|
||||||
if bgui:
|
if bgui:
|
||||||
@ -90,7 +88,7 @@ def iMain(largs):
|
|||||||
widget.show()
|
widget.show()
|
||||||
else:
|
else:
|
||||||
widget = None
|
widget = None
|
||||||
|
|
||||||
loop = qasync.QEventLoop(app)
|
loop = qasync.QEventLoop(app)
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
@ -105,9 +103,9 @@ def iMain(largs):
|
|||||||
uri = url.strip()
|
uri = url.strip()
|
||||||
r.run(uri, pdffile, htmlfile, jsfile)
|
r.run(uri, pdffile, htmlfile, jsfile)
|
||||||
LOG.debug(f"{r.percent} {app.lstart}")
|
LOG.debug(f"{r.percent} {app.lstart}")
|
||||||
|
|
||||||
LOG.info(f"queued {len(app.lstart)} urls")
|
LOG.info(f"queued {len(app.lstart)} urls")
|
||||||
|
|
||||||
task = loop.create_task(main(widget, app, 1))
|
task = loop.create_task(main(widget, app, 1))
|
||||||
loop.run_forever()
|
loop.run_forever()
|
||||||
|
|
||||||
@ -117,6 +115,6 @@ def iMain(largs):
|
|||||||
loop.run_until_complete(asyncio.gather(*tasks))
|
loop.run_until_complete(asyncio.gather(*tasks))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
iMain(sys.argv[1:])
|
iMain(sys.argv[1:])
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout):
|
|||||||
'NOTSET': logging.NOTSET,
|
'NOTSET': logging.NOTSET,
|
||||||
}
|
}
|
||||||
|
|
||||||
def omain__argparser(_=None):
|
def omain_argparser(_=None):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from OpenSSL import SSL
|
from OpenSSL import SSL
|
||||||
@ -106,9 +106,9 @@ def omain__argparser(_=None):
|
|||||||
help="Operate on the HTML file with javascript")
|
help="Operate on the HTML file with javascript")
|
||||||
parser.add_argument('--html_output', type=str, default='',
|
parser.add_argument('--html_output', type=str, default='',
|
||||||
help="Write loaded and javascripted result to a HTML file")
|
help="Write loaded and javascripted result to a HTML file")
|
||||||
parser.add_argument('--pdf_output', type=str, default=''),
|
parser.add_argument('--pdf_output', type=str, default='',
|
||||||
help="Write loaded and javascripted result to a PDF file")
|
help="Write loaded and javascripted result to a PDF file")
|
||||||
parser.add_argument('--show_gui', type=bool, store_action=True),
|
parser.add_argument('--show_gui', type=bool, default=False, store_action=True),
|
||||||
help="show a progress meter that doesn't work")
|
help="show a progress meter that doesn't work")
|
||||||
parser.add_argument('html_url', type=str, nargs='?',
|
parser.add_argument('html_url', type=str, nargs='?',
|
||||||
required=True,
|
required=True,
|
||||||
|
Loading…
Reference in New Issue
Block a user