added argparse

This commit is contained in:
emdee 2022-11-16 15:33:50 +00:00
parent 71672da7af
commit 1cb4e53cce
4 changed files with 78 additions and 52 deletions

View File

@ -41,7 +41,7 @@ Qt picks up proxies from the environment, so this will respect
* Python3 * Python3
* PyQt5 (this should work with PySide2 and PyQt6 - let us know.) * PyQt5 (this should work with PySide2 and PyQt6 - let us know.)
* [qasnyc](https://github.com/CabbageDevelopment/qasync) for the * [qasnyc](https://github.com/CabbageDevelopment/qasync) for the
standalone program ```qasync_lookup.py``` standalone program ```qasync_phantompy.py```
## Standalone ## Standalone

View File

@ -13,17 +13,18 @@ replacement for other bulky headless browser frameworks.
If you have a display attached: If you have a display attached:
./phantom.py <url> <pdf-file> [<javascript-file>] ./phantom.py [--pdf_output <pdf-file>] [--js_input <javascript-file>] <url-or-html-file>
If you don't have a display attached (i.e. on a remote server): If you don't have a display attached (i.e. on a remote server), you can use
xvfb-run, or don't add --show_gui - it should work without a display.
xvfb-run ./phantom.py <url> <pdf-file> [<javascript-file>]
Arguments: Arguments:
[--pdf_output <pdf-file>] (optional) Path and name of PDF file to generate
[--html_output <html-file>] (optional) Path and name of HTML file to generate
[--js_input <javascript-file>] (optional) Path and name of a JavaScript file to execute
--log_level 10=debug 20=info 30=warn 40=error
<url> Can be a http(s) URL or a path to a local file <url> Can be a http(s) URL or a path to a local file
<pdf-file> Path and name of PDF file to generate
[<javascript-file>] (optional) Path and name of a JavaScript file to execute
## Features ## Features
@ -55,6 +56,9 @@ CSS @media types, etc.
* Python3 * Python3
* PyQt5 * PyQt5
* [qasnyc](https://github.com/CabbageDevelopment/qasync) for the
standalone program ```qasnyc_phantompy.py```
* xvfb (optional for display-less machines) * xvfb (optional for display-less machines)
Installation of dependencies in Debian Stretch is easy: Installation of dependencies in Debian Stretch is easy:
@ -167,15 +171,18 @@ class Render(QWebEnginePage):
self.percent = 0 self.percent = 0
self.uri = None self.uri = None
self.jsfile = None self.jsfile = None
self.outfile = None self.htmlfile = None
self.pdffile = None
QWebEnginePage.__init__(self) QWebEnginePage.__init__(self)
def run(self, url, outfile, jsfile): def run(self, url, pdffile, htmlfile, jsfile):
self._app.lstart.append(id(self)) self._app.lstart.append(id(self))
self.percent = 10 self.percent = 10
self.uri = url self.uri = url
self.jsfile = jsfile self.jsfile = jsfile
self.outfile = outfile self.htmlfile = htmlfile
self.pdffile = pdffile
self.outfile = pdffile or htmlfile
LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}") LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}")
qurl = QUrl.fromUserInput(url) qurl = QUrl.fromUserInput(url)
@ -236,7 +243,7 @@ class Render(QWebEnginePage):
self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '') self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '')
def _save(self, html): def _save(self, html):
sfile = self.outfile.replace('.pdf','.html') sfile = self.htmlfile
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat # CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
with open(sfile, 'wt') as ofd: with open(sfile, 'wt') as ofd:
ofd.write(html) ofd.write(html)
@ -244,7 +251,6 @@ class Render(QWebEnginePage):
def _printer_callback(self, *args): def _printer_callback(self, *args):
"""print(self, QPrinter, Callable[[bool], None])""" """print(self, QPrinter, Callable[[bool], None])"""
# print(f"_printer_callback {self.outfile} {args}")
if args[0] is False: if args[0] is False:
i = 1 i = 1
else: else:
@ -252,7 +258,7 @@ class Render(QWebEnginePage):
self._onConsoleMessage(i, "__PHANTOM_PY_PRINTED__", 0 , '') self._onConsoleMessage(i, "__PHANTOM_PY_PRINTED__", 0 , '')
def _print(self): def _print(self):
sfile = self.outfile.replace('.html', '.pdf') sfile = self.pdffile
printer = QPrinter() printer = QPrinter()
printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter) printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter)
printer.setPaperSize(QPrinter.A4) printer.setPaperSize(QPrinter.A4)
@ -268,25 +274,3 @@ class Render(QWebEnginePage):
# threadsafe? # threadsafe?
self._app.ldone.append(self.uri) self._app.ldone.append(self.uri)
def omain(app, largs):
if (len(largs) < 2):
LOG.info("USAGE: ./phantom.py <url> <pdf-file> [<javascript-file>]")
return -1
url = largs[0]
outfile = largs[1]
jsfile = largs[2] if len(largs) > 2 else None
ilen = 1
r = Render(app, do_print=False, do_save=True)
r.run(url, outfile, jsfile)
for i in range(1, 120):
app.processEvents()
print(f"{app.ldone} {i}")
if len(app.ldone) == ilen:
print(f"{app.ldone} found {ilen}")
app.exit()
return r
time.sleep(1)
return r

View File

@ -67,7 +67,21 @@ async def main(widget, app, ilen):
except asyncio.CancelledError as ex: except asyncio.CancelledError as ex:
LOG.debug("Task cancelled") LOG.debug("Task cancelled")
def iMain(largs, bgui=True): def iMain(largs):
parser = oMainArgparser()
oargs = parser.parse_args(lArgs)
bgui=oargs.show_gui
try:
from support_phantompy import vsetup_logging
d = int(os.environ.get('DEBUG', 0))
if d > 0:
vsetup_logging(10, stream=sys.stderr)
else:
vsetup_logging(oargs.log_level, stream=sys.stderr)
vsetup_logging(log_level, logfile='', stream=sys.stderr)
except: pass
app = QtWidgets.QApplication([]) app = QtWidgets.QApplication([])
app.lstart = [] app.lstart = []
if bgui: if bgui:
@ -80,14 +94,16 @@ def iMain(largs, bgui=True):
loop = qasync.QEventLoop(app) loop = qasync.QEventLoop(app)
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)
largs = sys.argv[1:] url = oargs.html_url
url = largs[0] htmlfile = oargs.html_output
outfile = largs[1] pdffile = oargs.html_output
jsfile = largs[2] if len(largs) > 2 else None jsfile = oargs.js_input
# run only starts the url loading # run only starts the url loading
r = Render(app, do_print=False, do_save=True) r = Render(app,
do_print=True if pdffile else False,
do_save=True if htmlfile else False)
uri = url.strip() uri = url.strip()
r.run(uri, outfile, jsfile) r.run(uri, pdffile, htmlfile, jsfile)
LOG.debug(f"{r.percent} {app.lstart}") LOG.debug(f"{r.percent} {app.lstart}")
LOG.info(f"queued {len(app.lstart)} urls") LOG.info(f"queued {len(app.lstart)} urls")
@ -101,15 +117,6 @@ def iMain(largs, bgui=True):
loop.run_until_complete(asyncio.gather(*tasks)) loop.run_until_complete(asyncio.gather(*tasks))
if __name__ == '__main__': if __name__ == '__main__':
try:
from exclude_badExits import vsetup_logging
d = int(os.environ.get('DEBUG', 0))
if d > 0:
vsetup_logging(10, stream=sys.stderr)
else:
vsetup_logging(20, stream=sys.stderr)
vsetup_logging(log_level, logfile='', stream=sys.stderr)
except: pass
iMain(sys.argv[1:], bgui=False) iMain(sys.argv[1:])

View File

@ -3,6 +3,7 @@
import sys import sys
import os import os
import argparse
try: try:
if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ:
@ -79,3 +80,37 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout):
'NOTSET': logging.NOTSET, 'NOTSET': logging.NOTSET,
} }
def omain__argparser(_=None):
try:
from OpenSSL import SSL
lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS
except:
lCAfs = []
CAfs = []
for elt in lCAfs:
if os.path.exists(elt):
CAfs.append(elt)
if not CAfs:
CAfs = ['']
parser = argparse.ArgumentParser(add_help=True,
epilog=__doc__)
parser.add_argument('--https_cafile', type=str,
help="Certificate Authority file (in PEM) (unused)",
default=CAfs[0])
parser.add_argument('--log_level', type=int, default=20,
help="10=debug 20=info 30=warn 40=error")
parser.add_argument('--js_input', type=str, default='',
help="Operate on the HTML file with javascript")
parser.add_argument('--html_output', type=str, default='',
help="Write loaded and javascripted result to a HTML file")
parser.add_argument('--pdf_output', type=str, default=''),
help="Write loaded and javascripted result to a PDF file")
parser.add_argument('--show_gui', type=bool, store_action=True),
help="show a progress meter that doesn't work")
parser.add_argument('html_url', type=str, nargs='?',
required=True,
help='html file or url')
return parser