From 1cb4e53ccead1c6d803813abcace69f5089438e3 Mon Sep 17 00:00:00 2001 From: emdee Date: Wed, 16 Nov 2022 15:33:50 +0000 Subject: [PATCH] added argparse --- README.md | 2 +- phantompy.py | 52 +++++++++++++++----------------------------- qasync_phantompy.py | 41 +++++++++++++++++++--------------- support_phantompy.py | 35 +++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index ce62184..249992a 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Qt picks up proxies from the environment, so this will respect * Python3 * PyQt5 (this should work with PySide2 and PyQt6 - let us know.) * [qasnyc](https://github.com/CabbageDevelopment/qasync) for the - standalone program ```qasync_lookup.py``` + standalone program ```qasync_phantompy.py``` ## Standalone diff --git a/phantompy.py b/phantompy.py index 5506f46..a0931f1 100644 --- a/phantompy.py +++ b/phantompy.py @@ -13,17 +13,18 @@ replacement for other bulky headless browser frameworks. If you have a display attached: - ./phantom.py [] + ./phantom.py [--pdf_output ] [--js_input ] -If you don't have a display attached (i.e. on a remote server): - - xvfb-run ./phantom.py [] +If you don't have a display attached (i.e. on a remote server), you can use +xvfb-run, or don't add --show_gui - it should work without a display. Arguments: +[--pdf_output ] (optional) Path and name of PDF file to generate +[--html_output ] (optional) Path and name of HTML file to generate +[--js_input ] (optional) Path and name of a JavaScript file to execute +--log_level 10=debug 20=info 30=warn 40=error Can be a http(s) URL or a path to a local file - Path and name of PDF file to generate -[] (optional) Path and name of a JavaScript file to execute ## Features @@ -55,6 +56,9 @@ CSS @media types, etc. * Python3 * PyQt5 +* [qasnyc](https://github.com/CabbageDevelopment/qasync) for the + standalone program ```qasnyc_phantompy.py``` + * xvfb (optional for display-less machines) Installation of dependencies in Debian Stretch is easy: @@ -167,15 +171,18 @@ class Render(QWebEnginePage): self.percent = 0 self.uri = None self.jsfile = None - self.outfile = None + self.htmlfile = None + self.pdffile = None QWebEnginePage.__init__(self) - def run(self, url, outfile, jsfile): + def run(self, url, pdffile, htmlfile, jsfile): self._app.lstart.append(id(self)) self.percent = 10 self.uri = url self.jsfile = jsfile - self.outfile = outfile + self.htmlfile = htmlfile + self.pdffile = pdffile + self.outfile = pdffile or htmlfile LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}") qurl = QUrl.fromUserInput(url) @@ -236,7 +243,7 @@ class Render(QWebEnginePage): self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '') def _save(self, html): - sfile = self.outfile.replace('.pdf','.html') + sfile = self.htmlfile # CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat with open(sfile, 'wt') as ofd: ofd.write(html) @@ -244,7 +251,6 @@ class Render(QWebEnginePage): def _printer_callback(self, *args): """print(self, QPrinter, Callable[[bool], None])""" - # print(f"_printer_callback {self.outfile} {args}") if args[0] is False: i = 1 else: @@ -252,7 +258,7 @@ class Render(QWebEnginePage): self._onConsoleMessage(i, "__PHANTOM_PY_PRINTED__", 0 , '') def _print(self): - sfile = self.outfile.replace('.html', '.pdf') + sfile = self.pdffile printer = QPrinter() printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter) printer.setPaperSize(QPrinter.A4) @@ -268,25 +274,3 @@ class Render(QWebEnginePage): # threadsafe? self._app.ldone.append(self.uri) -def omain(app, largs): - if (len(largs) < 2): - LOG.info("USAGE: ./phantom.py []") - return -1 - - url = largs[0] - outfile = largs[1] - jsfile = largs[2] if len(largs) > 2 else None - ilen = 1 - - r = Render(app, do_print=False, do_save=True) - r.run(url, outfile, jsfile) - for i in range(1, 120): - app.processEvents() - print(f"{app.ldone} {i}") - if len(app.ldone) == ilen: - print(f"{app.ldone} found {ilen}") - app.exit() - return r - time.sleep(1) - return r - diff --git a/qasync_phantompy.py b/qasync_phantompy.py index 3a5ebf3..f510f8a 100644 --- a/qasync_phantompy.py +++ b/qasync_phantompy.py @@ -67,7 +67,21 @@ async def main(widget, app, ilen): except asyncio.CancelledError as ex: LOG.debug("Task cancelled") -def iMain(largs, bgui=True): +def iMain(largs): + parser = oMainArgparser() + oargs = parser.parse_args(lArgs) + bgui=oargs.show_gui + + try: + from support_phantompy import vsetup_logging + d = int(os.environ.get('DEBUG', 0)) + if d > 0: + vsetup_logging(10, stream=sys.stderr) + else: + vsetup_logging(oargs.log_level, stream=sys.stderr) + vsetup_logging(log_level, logfile='', stream=sys.stderr) + except: pass + app = QtWidgets.QApplication([]) app.lstart = [] if bgui: @@ -80,14 +94,16 @@ def iMain(largs, bgui=True): loop = qasync.QEventLoop(app) asyncio.set_event_loop(loop) - largs = sys.argv[1:] - url = largs[0] - outfile = largs[1] - jsfile = largs[2] if len(largs) > 2 else None + url = oargs.html_url + htmlfile = oargs.html_output + pdffile = oargs.html_output + jsfile = oargs.js_input # run only starts the url loading - r = Render(app, do_print=False, do_save=True) + r = Render(app, + do_print=True if pdffile else False, + do_save=True if htmlfile else False) uri = url.strip() - r.run(uri, outfile, jsfile) + r.run(uri, pdffile, htmlfile, jsfile) LOG.debug(f"{r.percent} {app.lstart}") LOG.info(f"queued {len(app.lstart)} urls") @@ -101,15 +117,6 @@ def iMain(largs, bgui=True): loop.run_until_complete(asyncio.gather(*tasks)) if __name__ == '__main__': - try: - from exclude_badExits import vsetup_logging - d = int(os.environ.get('DEBUG', 0)) - if d > 0: - vsetup_logging(10, stream=sys.stderr) - else: - vsetup_logging(20, stream=sys.stderr) - vsetup_logging(log_level, logfile='', stream=sys.stderr) - except: pass - iMain(sys.argv[1:], bgui=False) + iMain(sys.argv[1:]) diff --git a/support_phantompy.py b/support_phantompy.py index 8d688bd..7286c36 100644 --- a/support_phantompy.py +++ b/support_phantompy.py @@ -3,6 +3,7 @@ import sys import os +import argparse try: if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: @@ -79,3 +80,37 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout): 'NOTSET': logging.NOTSET, } +def omain__argparser(_=None): + + try: + from OpenSSL import SSL + lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS + except: + lCAfs = [] + + CAfs = [] + for elt in lCAfs: + if os.path.exists(elt): + CAfs.append(elt) + if not CAfs: + CAfs = [''] + + parser = argparse.ArgumentParser(add_help=True, + epilog=__doc__) + parser.add_argument('--https_cafile', type=str, + help="Certificate Authority file (in PEM) (unused)", + default=CAfs[0]) + parser.add_argument('--log_level', type=int, default=20, + help="10=debug 20=info 30=warn 40=error") + parser.add_argument('--js_input', type=str, default='', + help="Operate on the HTML file with javascript") + parser.add_argument('--html_output', type=str, default='', + help="Write loaded and javascripted result to a HTML file") + parser.add_argument('--pdf_output', type=str, default=''), + help="Write loaded and javascripted result to a PDF file") + parser.add_argument('--show_gui', type=bool, store_action=True), + help="show a progress meter that doesn't work") + parser.add_argument('html_url', type=str, nargs='?', + required=True, + help='html file or url') + return parser