336 lines
12 KiB
Python
336 lines
12 KiB
Python
|
# $Id: universal.py 9502 2023-12-14 22:39:08Z milde $
|
||
|
# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
|
||
|
# Maintainer: docutils-develop@lists.sourceforge.net
|
||
|
# Copyright: This module has been placed in the public domain.
|
||
|
|
||
|
"""
|
||
|
Transforms needed by most or all documents:
|
||
|
|
||
|
- `Decorations`: Generate a document's header & footer.
|
||
|
- `ExposeInternals`: Expose internal attributes.
|
||
|
- `Messages`: Placement of system messages generated after parsing.
|
||
|
- `FilterMessages`: Remove system messages below verbosity threshold.
|
||
|
- `TestMessages`: Like `Messages`, used on test runs.
|
||
|
- `StripComments`: Remove comment elements from the document tree.
|
||
|
- `StripClassesAndElements`: Remove elements with classes
|
||
|
in `self.document.settings.strip_elements_with_classes`
|
||
|
and class values in `self.document.settings.strip_classes`.
|
||
|
- `SmartQuotes`: Replace ASCII quotation marks with typographic form.
|
||
|
"""
|
||
|
|
||
|
__docformat__ = 'reStructuredText'
|
||
|
|
||
|
import re
|
||
|
import time
|
||
|
from docutils import nodes, utils
|
||
|
from docutils.transforms import Transform
|
||
|
from docutils.utils import smartquotes
|
||
|
|
||
|
|
||
|
class Decorations(Transform):
|
||
|
|
||
|
"""
|
||
|
Populate a document's decoration element (header, footer).
|
||
|
"""
|
||
|
|
||
|
default_priority = 820
|
||
|
|
||
|
def apply(self):
|
||
|
header_nodes = self.generate_header()
|
||
|
if header_nodes:
|
||
|
decoration = self.document.get_decoration()
|
||
|
header = decoration.get_header()
|
||
|
header.extend(header_nodes)
|
||
|
footer_nodes = self.generate_footer()
|
||
|
if footer_nodes:
|
||
|
decoration = self.document.get_decoration()
|
||
|
footer = decoration.get_footer()
|
||
|
footer.extend(footer_nodes)
|
||
|
|
||
|
def generate_header(self):
|
||
|
return None
|
||
|
|
||
|
def generate_footer(self):
|
||
|
# @@@ Text is hard-coded for now.
|
||
|
# Should be made dynamic (language-dependent).
|
||
|
# @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
|
||
|
# for the datestamp?
|
||
|
# See https://sourceforge.net/p/docutils/patches/132/
|
||
|
# and https://reproducible-builds.org/specs/source-date-epoch/
|
||
|
settings = self.document.settings
|
||
|
if (settings.generator or settings.datestamp
|
||
|
or settings.source_link or settings.source_url):
|
||
|
text = []
|
||
|
if (settings.source_link and settings._source
|
||
|
or settings.source_url):
|
||
|
if settings.source_url:
|
||
|
source = settings.source_url
|
||
|
else:
|
||
|
source = utils.relative_path(settings._destination,
|
||
|
settings._source)
|
||
|
text.extend([
|
||
|
nodes.reference('', 'View document source',
|
||
|
refuri=source),
|
||
|
nodes.Text('.\n')])
|
||
|
if settings.datestamp:
|
||
|
datestamp = time.strftime(settings.datestamp, time.gmtime())
|
||
|
text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
|
||
|
if settings.generator:
|
||
|
text.extend([
|
||
|
nodes.Text('Generated by '),
|
||
|
nodes.reference('', 'Docutils',
|
||
|
refuri='https://docutils.sourceforge.io/'),
|
||
|
nodes.Text(' from '),
|
||
|
nodes.reference('', 'reStructuredText',
|
||
|
refuri='https://docutils.sourceforge.io/'
|
||
|
'rst.html'),
|
||
|
nodes.Text(' source.\n')])
|
||
|
return [nodes.paragraph('', '', *text)]
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
|
||
|
class ExposeInternals(Transform):
|
||
|
|
||
|
"""
|
||
|
Expose internal attributes if ``expose_internals`` setting is set.
|
||
|
"""
|
||
|
|
||
|
default_priority = 840
|
||
|
|
||
|
def not_Text(self, node):
|
||
|
return not isinstance(node, nodes.Text)
|
||
|
|
||
|
def apply(self):
|
||
|
if self.document.settings.expose_internals:
|
||
|
for node in self.document.findall(self.not_Text):
|
||
|
for att in self.document.settings.expose_internals:
|
||
|
value = getattr(node, att, None)
|
||
|
if value is not None:
|
||
|
node['internal:' + att] = value
|
||
|
|
||
|
|
||
|
class Messages(Transform):
|
||
|
|
||
|
"""
|
||
|
Place any system messages generated after parsing into a dedicated section
|
||
|
of the document.
|
||
|
"""
|
||
|
|
||
|
default_priority = 860
|
||
|
|
||
|
def apply(self):
|
||
|
messages = self.document.transform_messages
|
||
|
loose_messages = [msg for msg in messages if not msg.parent]
|
||
|
if loose_messages:
|
||
|
section = nodes.section(classes=['system-messages'])
|
||
|
# @@@ get this from the language module?
|
||
|
section += nodes.title('', 'Docutils System Messages')
|
||
|
section += loose_messages
|
||
|
self.document.transform_messages[:] = []
|
||
|
self.document += section
|
||
|
|
||
|
|
||
|
class FilterMessages(Transform):
|
||
|
|
||
|
"""
|
||
|
Remove system messages below verbosity threshold.
|
||
|
|
||
|
Also convert <problematic> nodes referencing removed messages
|
||
|
to <Text> nodes and remove "System Messages" section if empty.
|
||
|
"""
|
||
|
|
||
|
default_priority = 870
|
||
|
|
||
|
def apply(self):
|
||
|
for node in tuple(self.document.findall(nodes.system_message)):
|
||
|
if node['level'] < self.document.reporter.report_level:
|
||
|
node.parent.remove(node)
|
||
|
try: # also remove id-entry
|
||
|
del self.document.ids[node['ids'][0]]
|
||
|
except (IndexError):
|
||
|
pass
|
||
|
for node in tuple(self.document.findall(nodes.problematic)):
|
||
|
if node['refid'] not in self.document.ids:
|
||
|
node.parent.replace(node, nodes.Text(node.astext()))
|
||
|
for node in self.document.findall(nodes.section):
|
||
|
if "system-messages" in node['classes'] and len(node) == 1:
|
||
|
node.parent.remove(node)
|
||
|
|
||
|
|
||
|
class TestMessages(Transform):
|
||
|
|
||
|
"""
|
||
|
Append all post-parse system messages to the end of the document.
|
||
|
|
||
|
Used for testing purposes.
|
||
|
"""
|
||
|
|
||
|
# marker for pytest to ignore this class during test discovery
|
||
|
__test__ = False
|
||
|
|
||
|
default_priority = 880
|
||
|
|
||
|
def apply(self):
|
||
|
for msg in self.document.transform_messages:
|
||
|
if not msg.parent:
|
||
|
self.document += msg
|
||
|
|
||
|
|
||
|
class StripComments(Transform):
|
||
|
|
||
|
"""
|
||
|
Remove comment elements from the document tree (only if the
|
||
|
``strip_comments`` setting is enabled).
|
||
|
"""
|
||
|
|
||
|
default_priority = 740
|
||
|
|
||
|
def apply(self):
|
||
|
if self.document.settings.strip_comments:
|
||
|
for node in tuple(self.document.findall(nodes.comment)):
|
||
|
node.parent.remove(node)
|
||
|
|
||
|
|
||
|
class StripClassesAndElements(Transform):
|
||
|
|
||
|
"""
|
||
|
Remove from the document tree all elements with classes in
|
||
|
`self.document.settings.strip_elements_with_classes` and all "classes"
|
||
|
attribute values in `self.document.settings.strip_classes`.
|
||
|
"""
|
||
|
|
||
|
default_priority = 420
|
||
|
|
||
|
def apply(self):
|
||
|
if self.document.settings.strip_elements_with_classes:
|
||
|
self.strip_elements = {*self.document.settings
|
||
|
.strip_elements_with_classes}
|
||
|
# Iterate over a tuple as removing the current node
|
||
|
# corrupts the iterator returned by `iter`:
|
||
|
for node in tuple(self.document.findall(self.check_classes)):
|
||
|
node.parent.remove(node)
|
||
|
|
||
|
if not self.document.settings.strip_classes:
|
||
|
return
|
||
|
strip_classes = self.document.settings.strip_classes
|
||
|
for node in self.document.findall(nodes.Element):
|
||
|
for class_value in strip_classes:
|
||
|
try:
|
||
|
node['classes'].remove(class_value)
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
def check_classes(self, node):
|
||
|
if not isinstance(node, nodes.Element):
|
||
|
return False
|
||
|
for class_value in node['classes'][:]:
|
||
|
if class_value in self.strip_elements:
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
class SmartQuotes(Transform):
|
||
|
|
||
|
"""
|
||
|
Replace ASCII quotation marks with typographic form.
|
||
|
|
||
|
Also replace multiple dashes with em-dash/en-dash characters.
|
||
|
"""
|
||
|
|
||
|
default_priority = 855
|
||
|
|
||
|
nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
|
||
|
"""Do not apply "smartquotes" to instances of these block-level nodes."""
|
||
|
|
||
|
literal_nodes = (nodes.FixedTextElement, nodes.Special,
|
||
|
nodes.image, nodes.literal, nodes.math,
|
||
|
nodes.raw, nodes.problematic)
|
||
|
"""Do not apply smartquotes to instances of these inline nodes."""
|
||
|
|
||
|
smartquotes_action = 'qDe'
|
||
|
"""Setting to select smartquote transformations.
|
||
|
|
||
|
The default 'qDe' educates normal quote characters: (", '),
|
||
|
em- and en-dashes (---, --) and ellipses (...).
|
||
|
"""
|
||
|
|
||
|
def __init__(self, document, startnode):
|
||
|
Transform.__init__(self, document, startnode=startnode)
|
||
|
self.unsupported_languages = set()
|
||
|
|
||
|
def get_tokens(self, txtnodes):
|
||
|
# A generator that yields ``(texttype, nodetext)`` tuples for a list
|
||
|
# of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
|
||
|
for node in txtnodes:
|
||
|
if (isinstance(node.parent, self.literal_nodes)
|
||
|
or isinstance(node.parent.parent, self.literal_nodes)):
|
||
|
yield 'literal', str(node)
|
||
|
else:
|
||
|
# SmartQuotes uses backslash escapes instead of null-escapes
|
||
|
# Insert backslashes before escaped "active" characters.
|
||
|
txt = re.sub('(?<=\x00)([-\\\'".`])', r'\\\1', str(node))
|
||
|
yield 'plain', txt
|
||
|
|
||
|
def apply(self):
|
||
|
smart_quotes = self.document.settings.setdefault('smart_quotes',
|
||
|
False)
|
||
|
if not smart_quotes:
|
||
|
return
|
||
|
try:
|
||
|
alternative = smart_quotes.startswith('alt')
|
||
|
except AttributeError:
|
||
|
alternative = False
|
||
|
|
||
|
document_language = self.document.settings.language_code
|
||
|
lc_smartquotes = self.document.settings.smartquotes_locales
|
||
|
if lc_smartquotes:
|
||
|
smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
|
||
|
|
||
|
# "Educate" quotes in normal text. Handle each block of text
|
||
|
# (TextElement node) as a unit to keep context around inline nodes:
|
||
|
for node in self.document.findall(nodes.TextElement):
|
||
|
# skip preformatted text blocks and special elements:
|
||
|
if isinstance(node, self.nodes_to_skip):
|
||
|
continue
|
||
|
# nested TextElements are not "block-level" elements:
|
||
|
if isinstance(node.parent, nodes.TextElement):
|
||
|
continue
|
||
|
|
||
|
# list of text nodes in the "text block":
|
||
|
txtnodes = [txtnode for txtnode in node.findall(nodes.Text)
|
||
|
if not isinstance(txtnode.parent,
|
||
|
nodes.option_string)]
|
||
|
|
||
|
# language: use typographical quotes for language "lang"
|
||
|
lang = node.get_language_code(document_language)
|
||
|
# use alternative form if `smart-quotes` setting starts with "alt":
|
||
|
if alternative:
|
||
|
if '-x-altquot' in lang:
|
||
|
lang = lang.replace('-x-altquot', '')
|
||
|
else:
|
||
|
lang += '-x-altquot'
|
||
|
# drop unsupported subtags:
|
||
|
for tag in utils.normalize_language_tag(lang):
|
||
|
if tag in smartquotes.smartchars.quotes:
|
||
|
lang = tag
|
||
|
break
|
||
|
else: # language not supported -- keep ASCII quotes
|
||
|
if lang not in self.unsupported_languages:
|
||
|
self.document.reporter.warning(
|
||
|
'No smart quotes defined for language "%s".' % lang,
|
||
|
base_node=node)
|
||
|
self.unsupported_languages.add(lang)
|
||
|
lang = ''
|
||
|
|
||
|
# Iterator educating quotes in plain text:
|
||
|
# (see "utils/smartquotes.py" for the attribute setting)
|
||
|
teacher = smartquotes.educate_tokens(
|
||
|
self.get_tokens(txtnodes),
|
||
|
attr=self.smartquotes_action, language=lang)
|
||
|
|
||
|
for txtnode, newtext in zip(txtnodes, teacher):
|
||
|
txtnode.parent.replace(txtnode, nodes.Text(newtext))
|
||
|
|
||
|
self.unsupported_languages.clear()
|