1655 lines
64 KiB
Python
1655 lines
64 KiB
Python
"""
|
|
pygments.lexers.markup
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for non-HTML markup languages.
|
|
|
|
:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import re
|
|
|
|
from pygments.lexers.html import XmlLexer
|
|
from pygments.lexers.javascript import JavascriptLexer
|
|
from pygments.lexers.css import CssLexer
|
|
from pygments.lexers.lilypond import LilyPondLexer
|
|
from pygments.lexers.data import JsonLexer
|
|
|
|
from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
|
|
using, this, do_insertions, default, words
|
|
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
|
|
Number, Punctuation, Generic, Other, Whitespace, Literal
|
|
from pygments.util import get_bool_opt, ClassNotFound
|
|
|
|
__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
|
|
'MozPreprocHashLexer', 'MozPreprocPercentLexer',
|
|
'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
|
|
'MozPreprocCssLexer', 'MarkdownLexer', 'OrgLexer', 'TiddlyWiki5Lexer',
|
|
'WikitextLexer']
|
|
|
|
|
|
class BBCodeLexer(RegexLexer):
|
|
"""
|
|
A lexer that highlights BBCode(-like) syntax.
|
|
"""
|
|
|
|
name = 'BBCode'
|
|
aliases = ['bbcode']
|
|
mimetypes = ['text/x-bbcode']
|
|
url = 'https://www.bbcode.org/'
|
|
version_added = '0.6'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'[^[]+', Text),
|
|
# tag/end tag begin
|
|
(r'\[/?\w+', Keyword, 'tag'),
|
|
# stray bracket
|
|
(r'\[', Text),
|
|
],
|
|
'tag': [
|
|
(r'\s+', Text),
|
|
# attribute with value
|
|
(r'(\w+)(=)("?[^\s"\]]+"?)',
|
|
bygroups(Name.Attribute, Operator, String)),
|
|
# tag argument (a la [color=green])
|
|
(r'(=)("?[^\s"\]]+"?)',
|
|
bygroups(Operator, String)),
|
|
# tag end
|
|
(r'\]', Keyword, '#pop'),
|
|
],
|
|
}
|
|
|
|
|
|
class MoinWikiLexer(RegexLexer):
|
|
"""
|
|
For MoinMoin (and Trac) Wiki markup.
|
|
"""
|
|
|
|
name = 'MoinMoin/Trac Wiki markup'
|
|
aliases = ['trac-wiki', 'moin']
|
|
filenames = []
|
|
mimetypes = ['text/x-trac-wiki']
|
|
url = 'https://moinmo.in'
|
|
version_added = '0.7'
|
|
|
|
flags = re.MULTILINE | re.IGNORECASE
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'^#.*$', Comment),
|
|
(r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
|
|
# Titles
|
|
(r'^(=+)([^=]+)(=+)(\s*#.+)?$',
|
|
bygroups(Generic.Heading, using(this), Generic.Heading, String)),
|
|
# Literal code blocks, with optional shebang
|
|
(r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
|
|
(r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
|
|
# Lists
|
|
(r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
|
|
(r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
|
|
# Other Formatting
|
|
(r'\[\[\w+.*?\]\]', Keyword), # Macro
|
|
(r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
|
|
bygroups(Keyword, String, Keyword)), # Link
|
|
(r'^----+$', Keyword), # Horizontal rules
|
|
(r'[^\n\'\[{!_~^,|]+', Text),
|
|
(r'\n', Text),
|
|
(r'.', Text),
|
|
],
|
|
'codeblock': [
|
|
(r'\}\}\}', Name.Builtin, '#pop'),
|
|
# these blocks are allowed to be nested in Trac, but not MoinMoin
|
|
(r'\{\{\{', Text, '#push'),
|
|
(r'[^{}]+', Comment.Preproc), # slurp boring text
|
|
(r'.', Comment.Preproc), # allow loose { or }
|
|
],
|
|
}
|
|
|
|
|
|
class RstLexer(RegexLexer):
|
|
"""
|
|
For reStructuredText markup.
|
|
|
|
Additional options accepted:
|
|
|
|
`handlecodeblocks`
|
|
Highlight the contents of ``.. sourcecode:: language``,
|
|
``.. code:: language`` and ``.. code-block:: language``
|
|
directives with a lexer for the given language (default:
|
|
``True``).
|
|
|
|
.. versionadded:: 0.8
|
|
"""
|
|
name = 'reStructuredText'
|
|
url = 'https://docutils.sourceforge.io/rst.html'
|
|
aliases = ['restructuredtext', 'rst', 'rest']
|
|
filenames = ['*.rst', '*.rest']
|
|
mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
|
|
version_added = '0.7'
|
|
flags = re.MULTILINE
|
|
|
|
def _handle_sourcecode(self, match):
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
# section header
|
|
yield match.start(1), Punctuation, match.group(1)
|
|
yield match.start(2), Text, match.group(2)
|
|
yield match.start(3), Operator.Word, match.group(3)
|
|
yield match.start(4), Punctuation, match.group(4)
|
|
yield match.start(5), Text, match.group(5)
|
|
yield match.start(6), Keyword, match.group(6)
|
|
yield match.start(7), Text, match.group(7)
|
|
|
|
# lookup lexer if wanted and existing
|
|
lexer = None
|
|
if self.handlecodeblocks:
|
|
try:
|
|
lexer = get_lexer_by_name(match.group(6).strip())
|
|
except ClassNotFound:
|
|
pass
|
|
indention = match.group(8)
|
|
indention_size = len(indention)
|
|
code = (indention + match.group(9) + match.group(10) + match.group(11))
|
|
|
|
# no lexer for this language. handle it like it was a code block
|
|
if lexer is None:
|
|
yield match.start(8), String, code
|
|
return
|
|
|
|
# highlight the lines with the lexer.
|
|
ins = []
|
|
codelines = code.splitlines(True)
|
|
code = ''
|
|
for line in codelines:
|
|
if len(line) > indention_size:
|
|
ins.append((len(code), [(0, Text, line[:indention_size])]))
|
|
code += line[indention_size:]
|
|
else:
|
|
code += line
|
|
yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
|
|
|
|
# from docutils.parsers.rst.states
|
|
closers = '\'")]}>\u2019\u201d\xbb!?'
|
|
unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
|
|
end_string_suffix = (rf'((?=$)|(?=[-/:.,; \n\x00{re.escape(unicode_delimiters)}{re.escape(closers)}]))')
|
|
|
|
tokens = {
|
|
'root': [
|
|
# Heading with overline
|
|
(r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
|
|
r'(.+)(\n)(\1)(\n)',
|
|
bygroups(Generic.Heading, Text, Generic.Heading,
|
|
Text, Generic.Heading, Text)),
|
|
# Plain heading
|
|
(r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
|
|
r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
|
|
bygroups(Generic.Heading, Text, Generic.Heading, Text)),
|
|
# Bulleted lists
|
|
(r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
|
|
bygroups(Text, Number, using(this, state='inline'))),
|
|
# Numbered lists
|
|
(r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
|
|
bygroups(Text, Number, using(this, state='inline'))),
|
|
(r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
|
|
bygroups(Text, Number, using(this, state='inline'))),
|
|
# Numbered, but keep words at BOL from becoming lists
|
|
(r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
|
|
bygroups(Text, Number, using(this, state='inline'))),
|
|
(r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
|
|
bygroups(Text, Number, using(this, state='inline'))),
|
|
# Line blocks
|
|
(r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
|
|
bygroups(Text, Operator, using(this, state='inline'))),
|
|
# Sourcecode directives
|
|
(r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
|
|
r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
|
|
_handle_sourcecode),
|
|
# A directive
|
|
(r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
|
|
bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
|
|
using(this, state='inline'))),
|
|
# A reference target
|
|
(r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
|
|
bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
|
|
# A footnote/citation target
|
|
(r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
|
|
bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
|
|
# A substitution def
|
|
(r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
|
|
bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
|
|
Punctuation, Text, using(this, state='inline'))),
|
|
# Comments
|
|
(r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment),
|
|
# Field list marker
|
|
(r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
|
|
bygroups(Text, Name.Class, Text)),
|
|
# Definition list
|
|
(r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
|
|
bygroups(using(this, state='inline'), using(this, state='inline'))),
|
|
# Code blocks
|
|
(r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
|
|
bygroups(String.Escape, Text, String, String, Text, String)),
|
|
include('inline'),
|
|
],
|
|
'inline': [
|
|
(r'\\.', Text), # escape
|
|
(r'``', String, 'literal'), # code
|
|
(r'(`.+?)(<.+?>)(`__?)', # reference with inline target
|
|
bygroups(String, String.Interpol, String)),
|
|
(r'`.+?`__?', String), # reference
|
|
(r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
|
|
bygroups(Name.Variable, Name.Attribute)), # role
|
|
(r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
|
|
bygroups(Name.Attribute, Name.Variable)), # role (content first)
|
|
(r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
|
|
(r'\*.+?\*', Generic.Emph), # Emphasis
|
|
(r'\[.*?\]_', String), # Footnote or citation
|
|
(r'<.+?>', Name.Tag), # Hyperlink
|
|
(r'[^\\\n\[*`:]+', Text),
|
|
(r'.', Text),
|
|
],
|
|
'literal': [
|
|
(r'[^`]+', String),
|
|
(r'``' + end_string_suffix, String, '#pop'),
|
|
(r'`', String),
|
|
]
|
|
}
|
|
|
|
def __init__(self, **options):
|
|
self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
|
|
RegexLexer.__init__(self, **options)
|
|
|
|
def analyse_text(text):
|
|
if text[:2] == '..' and text[2:3] != '.':
|
|
return 0.3
|
|
p1 = text.find("\n")
|
|
p2 = text.find("\n", p1 + 1)
|
|
if (p2 > -1 and # has two lines
|
|
p1 * 2 + 1 == p2 and # they are the same length
|
|
text[p1+1] in '-=' and # the next line both starts and ends with
|
|
text[p1+1] == text[p2-1]): # ...a sufficiently high header
|
|
return 0.5
|
|
|
|
|
|
class TexLexer(RegexLexer):
|
|
"""
|
|
Lexer for the TeX and LaTeX typesetting languages.
|
|
"""
|
|
|
|
name = 'TeX'
|
|
aliases = ['tex', 'latex']
|
|
filenames = ['*.tex', '*.aux', '*.toc']
|
|
mimetypes = ['text/x-tex', 'text/x-latex']
|
|
url = 'https://tug.org'
|
|
version_added = ''
|
|
|
|
tokens = {
|
|
'general': [
|
|
(r'%.*?\n', Comment),
|
|
(r'[{}]', Name.Builtin),
|
|
(r'[&_^]', Name.Builtin),
|
|
],
|
|
'root': [
|
|
(r'\\\[', String.Backtick, 'displaymath'),
|
|
(r'\\\(', String, 'inlinemath'),
|
|
(r'\$\$', String.Backtick, 'displaymath'),
|
|
(r'\$', String, 'inlinemath'),
|
|
(r'\\([a-zA-Z@_:]+|\S?)', Keyword, 'command'),
|
|
(r'\\$', Keyword),
|
|
include('general'),
|
|
(r'[^\\$%&_^{}]+', Text),
|
|
],
|
|
'math': [
|
|
(r'\\([a-zA-Z]+|\S?)', Name.Variable),
|
|
include('general'),
|
|
(r'[0-9]+', Number),
|
|
(r'[-=!+*/()\[\]]', Operator),
|
|
(r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
|
|
],
|
|
'inlinemath': [
|
|
(r'\\\)', String, '#pop'),
|
|
(r'\$', String, '#pop'),
|
|
include('math'),
|
|
],
|
|
'displaymath': [
|
|
(r'\\\]', String, '#pop'),
|
|
(r'\$\$', String, '#pop'),
|
|
(r'\$', Name.Builtin),
|
|
include('math'),
|
|
],
|
|
'command': [
|
|
(r'\[.*?\]', Name.Attribute),
|
|
(r'\*', Keyword),
|
|
default('#pop'),
|
|
],
|
|
}
|
|
|
|
def analyse_text(text):
|
|
for start in ("\\documentclass", "\\input", "\\documentstyle",
|
|
"\\relax"):
|
|
if text[:len(start)] == start:
|
|
return True
|
|
|
|
|
|
class GroffLexer(RegexLexer):
|
|
"""
|
|
Lexer for the (g)roff typesetting language, supporting groff
|
|
extensions. Mainly useful for highlighting manpage sources.
|
|
"""
|
|
|
|
name = 'Groff'
|
|
aliases = ['groff', 'nroff', 'man']
|
|
filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm']
|
|
mimetypes = ['application/x-troff', 'text/troff']
|
|
url = 'https://www.gnu.org/software/groff'
|
|
version_added = '0.6'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
|
|
(r'\.', Punctuation, 'request'),
|
|
# Regular characters, slurp till we find a backslash or newline
|
|
(r'[^\\\n]+', Text, 'textline'),
|
|
default('textline'),
|
|
],
|
|
'textline': [
|
|
include('escapes'),
|
|
(r'[^\\\n]+', Text),
|
|
(r'\n', Text, '#pop'),
|
|
],
|
|
'escapes': [
|
|
# groff has many ways to write escapes.
|
|
(r'\\"[^\n]*', Comment),
|
|
(r'\\[fn]\w', String.Escape),
|
|
(r'\\\(.{2}', String.Escape),
|
|
(r'\\.\[.*\]', String.Escape),
|
|
(r'\\.', String.Escape),
|
|
(r'\\\n', Text, 'request'),
|
|
],
|
|
'request': [
|
|
(r'\n', Text, '#pop'),
|
|
include('escapes'),
|
|
(r'"[^\n"]+"', String.Double),
|
|
(r'\d+', Number),
|
|
(r'\S+', String),
|
|
(r'\s+', Text),
|
|
],
|
|
}
|
|
|
|
def analyse_text(text):
|
|
if text[:1] != '.':
|
|
return False
|
|
if text[:3] == '.\\"':
|
|
return True
|
|
if text[:4] == '.TH ':
|
|
return True
|
|
if text[1:3].isalnum() and text[3].isspace():
|
|
return 0.9
|
|
|
|
|
|
class MozPreprocHashLexer(RegexLexer):
|
|
"""
|
|
Lexer for Mozilla Preprocessor files (with '#' as the marker).
|
|
|
|
Other data is left untouched.
|
|
"""
|
|
name = 'mozhashpreproc'
|
|
aliases = [name]
|
|
filenames = []
|
|
mimetypes = []
|
|
url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
|
|
version_added = '2.0'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'^#', Comment.Preproc, ('expr', 'exprstart')),
|
|
(r'.+', Other),
|
|
],
|
|
'exprstart': [
|
|
(r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
|
|
(words((
|
|
'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
|
|
'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
|
|
'include', 'includesubst', 'error')),
|
|
Comment.Preproc, '#pop'),
|
|
],
|
|
'expr': [
|
|
(words(('!', '!=', '==', '&&', '||')), Operator),
|
|
(r'(defined)(\()', bygroups(Keyword, Punctuation)),
|
|
(r'\)', Punctuation),
|
|
(r'[0-9]+', Number.Decimal),
|
|
(r'__\w+?__', Name.Variable),
|
|
(r'@\w+?@', Name.Class),
|
|
(r'\w+', Name),
|
|
(r'\n', Text, '#pop'),
|
|
(r'\s+', Text),
|
|
(r'\S', Punctuation),
|
|
],
|
|
}
|
|
|
|
|
|
class MozPreprocPercentLexer(MozPreprocHashLexer):
|
|
"""
|
|
Lexer for Mozilla Preprocessor files (with '%' as the marker).
|
|
|
|
Other data is left untouched.
|
|
"""
|
|
name = 'mozpercentpreproc'
|
|
aliases = [name]
|
|
filenames = []
|
|
mimetypes = []
|
|
url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
|
|
version_added = '2.0'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'^%', Comment.Preproc, ('expr', 'exprstart')),
|
|
(r'.+', Other),
|
|
],
|
|
}
|
|
|
|
|
|
class MozPreprocXulLexer(DelegatingLexer):
|
|
"""
|
|
Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
|
|
`XmlLexer`.
|
|
"""
|
|
name = "XUL+mozpreproc"
|
|
aliases = ['xul+mozpreproc']
|
|
filenames = ['*.xul.in']
|
|
mimetypes = []
|
|
url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
|
|
version_added = '2.0'
|
|
|
|
def __init__(self, **options):
|
|
super().__init__(XmlLexer, MozPreprocHashLexer, **options)
|
|
|
|
|
|
class MozPreprocJavascriptLexer(DelegatingLexer):
|
|
"""
|
|
Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
|
|
`JavascriptLexer`.
|
|
"""
|
|
name = "Javascript+mozpreproc"
|
|
aliases = ['javascript+mozpreproc']
|
|
filenames = ['*.js.in']
|
|
mimetypes = []
|
|
url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
|
|
version_added = '2.0'
|
|
|
|
def __init__(self, **options):
|
|
super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
|
|
|
|
|
|
class MozPreprocCssLexer(DelegatingLexer):
|
|
"""
|
|
Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
|
|
`CssLexer`.
|
|
"""
|
|
name = "CSS+mozpreproc"
|
|
aliases = ['css+mozpreproc']
|
|
filenames = ['*.css.in']
|
|
mimetypes = []
|
|
url = 'https://firefox-source-docs.mozilla.org/build/buildsystem/preprocessor.html'
|
|
version_added = '2.0'
|
|
|
|
def __init__(self, **options):
|
|
super().__init__(CssLexer, MozPreprocPercentLexer, **options)
|
|
|
|
|
|
class MarkdownLexer(RegexLexer):
|
|
"""
|
|
For Markdown markup.
|
|
"""
|
|
name = 'Markdown'
|
|
url = 'https://daringfireball.net/projects/markdown/'
|
|
aliases = ['markdown', 'md']
|
|
filenames = ['*.md', '*.markdown']
|
|
mimetypes = ["text/x-markdown"]
|
|
version_added = '2.2'
|
|
flags = re.MULTILINE
|
|
|
|
def _handle_codeblock(self, match):
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
yield match.start('initial'), String.Backtick, match.group('initial')
|
|
yield match.start('lang'), String.Backtick, match.group('lang')
|
|
if match.group('afterlang') is not None:
|
|
yield match.start('whitespace'), Whitespace, match.group('whitespace')
|
|
yield match.start('extra'), Text, match.group('extra')
|
|
yield match.start('newline'), Whitespace, match.group('newline')
|
|
|
|
# lookup lexer if wanted and existing
|
|
lexer = None
|
|
if self.handlecodeblocks:
|
|
try:
|
|
lexer = get_lexer_by_name(match.group('lang').strip())
|
|
except ClassNotFound:
|
|
pass
|
|
code = match.group('code')
|
|
# no lexer for this language. handle it like it was a code block
|
|
if lexer is None:
|
|
yield match.start('code'), String, code
|
|
else:
|
|
# FIXME: aren't the offsets wrong?
|
|
yield from do_insertions([], lexer.get_tokens_unprocessed(code))
|
|
|
|
yield match.start('terminator'), String.Backtick, match.group('terminator')
|
|
|
|
tokens = {
|
|
'root': [
|
|
# heading with '#' prefix (atx-style)
|
|
(r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
|
|
# subheading with '#' prefix (atx-style)
|
|
(r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
|
|
# heading with '=' underlines (Setext-style)
|
|
(r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
|
|
# subheading with '-' underlines (Setext-style)
|
|
(r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
|
|
# task list
|
|
(r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
|
|
bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))),
|
|
# bulleted list
|
|
(r'^(\s*)([*-])(\s)(.+\n)',
|
|
bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))),
|
|
# numbered list
|
|
(r'^(\s*)([0-9]+\.)( .+\n)',
|
|
bygroups(Whitespace, Keyword, using(this, state='inline'))),
|
|
# quote
|
|
(r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
|
|
# code block fenced by 3 backticks
|
|
(r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick),
|
|
# code block with language
|
|
# Some tools include extra stuff after the language name, just
|
|
# highlight that as text. For example: https://docs.enola.dev/use/execmd
|
|
(r'''(?x)
|
|
^(?P<initial>\s*```)
|
|
(?P<lang>[\w\-]+)
|
|
(?P<afterlang>
|
|
(?P<whitespace>[^\S\n]+)
|
|
(?P<extra>.*))?
|
|
(?P<newline>\n)
|
|
(?P<code>(.|\n)*?)
|
|
(?P<terminator>^\s*```$\n)
|
|
''',
|
|
_handle_codeblock),
|
|
|
|
include('inline'),
|
|
],
|
|
'inline': [
|
|
# escape
|
|
(r'\\.', Text),
|
|
# inline code
|
|
(r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)),
|
|
# warning: the following rules eat outer tags.
|
|
# eg. **foo _bar_ baz** => foo and baz are not recognized as bold
|
|
# bold fenced by '**'
|
|
(r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)),
|
|
# bold fenced by '__'
|
|
(r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)),
|
|
# italics fenced by '*'
|
|
(r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)),
|
|
# italics fenced by '_'
|
|
(r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)),
|
|
# strikethrough
|
|
(r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)),
|
|
# mentions and topics (twitter and github stuff)
|
|
(r'[@#][\w/:]+', Name.Entity),
|
|
# (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
|
|
(r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
|
|
bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
|
|
# reference-style links, e.g.:
|
|
# [an example][id]
|
|
# [id]: http://example.com/
|
|
(r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
|
|
bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
|
|
(r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
|
|
bygroups(Text, Name.Label, Text, Name.Attribute)),
|
|
|
|
# general text, must come last!
|
|
(r'[^\\\s]+', Text),
|
|
(r'.', Text),
|
|
],
|
|
}
|
|
|
|
def __init__(self, **options):
|
|
self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
|
|
RegexLexer.__init__(self, **options)
|
|
|
|
class OrgLexer(RegexLexer):
|
|
"""
|
|
For Org Mode markup.
|
|
"""
|
|
name = 'Org Mode'
|
|
url = 'https://orgmode.org'
|
|
aliases = ['org', 'orgmode', 'org-mode']
|
|
filenames = ['*.org']
|
|
mimetypes = ["text/org"]
|
|
version_added = '2.18'
|
|
|
|
def _inline(start, end):
|
|
return rf'(?<!\w){start}(.|\n(?!\n))+?{end}(?!\w)'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'^# .*', Comment.Single),
|
|
|
|
# Headings
|
|
(r'^(\* )(COMMENT)( .*)',
|
|
bygroups(Generic.Heading, Comment.Preproc, Generic.Heading)),
|
|
(r'^(\*\*+ )(COMMENT)( .*)',
|
|
bygroups(Generic.Subheading, Comment.Preproc, Generic.Subheading)),
|
|
(r'^(\* )(DONE)( .*)',
|
|
bygroups(Generic.Heading, Generic.Deleted, Generic.Heading)),
|
|
(r'^(\*\*+ )(DONE)( .*)',
|
|
bygroups(Generic.Subheading, Generic.Deleted, Generic.Subheading)),
|
|
(r'^(\* )(TODO)( .*)',
|
|
bygroups(Generic.Heading, Generic.Error, Generic.Heading)),
|
|
(r'^(\*\*+ )(TODO)( .*)',
|
|
bygroups(Generic.Subheading, Generic.Error, Generic.Subheading)),
|
|
|
|
(r'^(\* .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Heading, Generic.Emph)),
|
|
(r'^(\*\*+ .+?)( :[a-zA-Z0-9_@:]+:)?$', bygroups(Generic.Subheading, Generic.Emph)),
|
|
|
|
# Unordered lists items, including TODO items and description items
|
|
(r'^(?:( *)([+-] )|( +)(\* ))(\[[ X-]\])?(.+ ::)?',
|
|
bygroups(Whitespace, Keyword, Whitespace, Keyword, Generic.Prompt, Name.Label)),
|
|
|
|
# Ordered list items
|
|
(r'^( *)([0-9]+[.)])( \[@[0-9]+\])?', bygroups(Whitespace, Keyword, Generic.Emph)),
|
|
|
|
# Dynamic blocks
|
|
(r'(?i)^( *#\+begin: *)((?:.|\n)*?)(^ *#\+end: *$)',
|
|
bygroups(Operator.Word, using(this), Operator.Word)),
|
|
|
|
# Comment blocks
|
|
(r'(?i)^( *#\+begin_comment *\n)((?:.|\n)*?)(^ *#\+end_comment *$)',
|
|
bygroups(Operator.Word, Comment.Multiline, Operator.Word)),
|
|
|
|
# Source code blocks
|
|
# TODO: language-dependent syntax highlighting (see Markdown lexer)
|
|
(r'(?i)^( *#\+begin_src .*)((?:.|\n)*?)(^ *#\+end_src *$)',
|
|
bygroups(Operator.Word, Text, Operator.Word)),
|
|
|
|
# Other blocks
|
|
(r'(?i)^( *#\+begin_\w+)( *\n)((?:.|\n)*?)(^ *#\+end_\w+)( *$)',
|
|
bygroups(Operator.Word, Whitespace, Text, Operator.Word, Whitespace)),
|
|
|
|
# Keywords
|
|
(r'^(#\+\w+:)(.*)$', bygroups(Name.Namespace, Text)),
|
|
|
|
# Properties and drawers
|
|
(r'(?i)^( *:\w+: *\n)((?:.|\n)*?)(^ *:end: *$)',
|
|
bygroups(Name.Decorator, Comment.Special, Name.Decorator)),
|
|
|
|
# Line break operator
|
|
(r'\\\\$', Operator),
|
|
|
|
# Deadline, Scheduled, CLOSED
|
|
(r'(?i)^( *(?:DEADLINE|SCHEDULED): )(<.+?> *)$',
|
|
bygroups(Generic.Error, Literal.Date)),
|
|
(r'(?i)^( *CLOSED: )(\[.+?\] *)$',
|
|
bygroups(Generic.Deleted, Literal.Date)),
|
|
|
|
# Bold
|
|
(_inline(r'\*', r'\*+'), Generic.Strong),
|
|
# Italic
|
|
(_inline(r'/', r'/'), Generic.Emph),
|
|
# Verbatim
|
|
(_inline(r'=', r'='), String), # TODO token
|
|
# Code
|
|
(_inline(r'~', r'~'), String),
|
|
# Strikethrough
|
|
(_inline(r'\+', r'\+'), Generic.Deleted),
|
|
# Underline
|
|
(_inline(r'_', r'_+'), Generic.EmphStrong),
|
|
|
|
# Dates
|
|
(r'<.+?>', Literal.Date),
|
|
# Macros
|
|
(r'\{\{\{.+?\}\}\}', Comment.Preproc),
|
|
# Footnotes
|
|
(r'(?<!\[)\[fn:.+?\]', Name.Tag),
|
|
# Links
|
|
(r'(?s)(\[\[)(.*?)(\]\[)(.*?)(\]\])',
|
|
bygroups(Punctuation, Name.Attribute, Punctuation, Name.Tag, Punctuation)),
|
|
(r'(?s)(\[\[)(.+?)(\]\])', bygroups(Punctuation, Name.Attribute, Punctuation)),
|
|
(r'(<<)(.+?)(>>)', bygroups(Punctuation, Name.Attribute, Punctuation)),
|
|
|
|
# Tables
|
|
(r'^( *)(\|[ -].*?[ -]\|)$', bygroups(Whitespace, String)),
|
|
|
|
# Any other text
|
|
(r'[^#*+\-0-9:\\/=~_<{\[|\n]+', Text),
|
|
(r'[#*+\-0-9:\\/=~_<{\[|\n]', Text),
|
|
],
|
|
}
|
|
|
|
class TiddlyWiki5Lexer(RegexLexer):
|
|
"""
|
|
For TiddlyWiki5 markup.
|
|
"""
|
|
name = 'tiddler'
|
|
url = 'https://tiddlywiki.com/#TiddlerFiles'
|
|
aliases = ['tid']
|
|
filenames = ['*.tid']
|
|
mimetypes = ["text/vnd.tiddlywiki"]
|
|
version_added = '2.7'
|
|
flags = re.MULTILINE
|
|
|
|
def _handle_codeblock(self, match):
|
|
"""
|
|
match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
|
|
"""
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
# section header
|
|
yield match.start(1), String, match.group(1)
|
|
yield match.start(2), String, match.group(2)
|
|
yield match.start(3), Text, match.group(3)
|
|
|
|
# lookup lexer if wanted and existing
|
|
lexer = None
|
|
if self.handlecodeblocks:
|
|
try:
|
|
lexer = get_lexer_by_name(match.group(2).strip())
|
|
except ClassNotFound:
|
|
pass
|
|
code = match.group(4)
|
|
|
|
# no lexer for this language. handle it like it was a code block
|
|
if lexer is None:
|
|
yield match.start(4), String, code
|
|
return
|
|
|
|
yield from do_insertions([], lexer.get_tokens_unprocessed(code))
|
|
|
|
yield match.start(5), String, match.group(5)
|
|
|
|
def _handle_cssblock(self, match):
|
|
"""
|
|
match args: 1:style tag 2:newline, 3:code, 4:closing style tag
|
|
"""
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
# section header
|
|
yield match.start(1), String, match.group(1)
|
|
yield match.start(2), String, match.group(2)
|
|
|
|
lexer = None
|
|
if self.handlecodeblocks:
|
|
try:
|
|
lexer = get_lexer_by_name('css')
|
|
except ClassNotFound:
|
|
pass
|
|
code = match.group(3)
|
|
|
|
# no lexer for this language. handle it like it was a code block
|
|
if lexer is None:
|
|
yield match.start(3), String, code
|
|
return
|
|
|
|
yield from do_insertions([], lexer.get_tokens_unprocessed(code))
|
|
|
|
yield match.start(4), String, match.group(4)
|
|
|
|
tokens = {
|
|
'root': [
|
|
# title in metadata section
|
|
(r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
|
|
# headings
|
|
(r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
|
|
(r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
|
|
# bulleted or numbered lists or single-line block quotes
|
|
# (can be mixed)
|
|
(r'^(\s*)([*#>]+)(\s*)(.+\n)',
|
|
bygroups(Text, Keyword, Text, using(this, state='inline'))),
|
|
# multi-line block quotes
|
|
(r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
|
|
# table header
|
|
(r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
|
|
# table footer or caption
|
|
(r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
|
|
# table class
|
|
(r'^(\|.*?\|k)$', bygroups(Name.Tag)),
|
|
# definitions
|
|
(r'^(;.*)$', bygroups(Generic.Strong)),
|
|
# text block
|
|
(r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
|
|
# code block with language
|
|
(r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
|
|
# CSS style block
|
|
(r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
|
|
|
|
include('keywords'),
|
|
include('inline'),
|
|
],
|
|
'keywords': [
|
|
(words((
|
|
'\\define', '\\end', 'caption', 'created', 'modified', 'tags',
|
|
'title', 'type'), prefix=r'^', suffix=r'\b'),
|
|
Keyword),
|
|
],
|
|
'inline': [
|
|
# escape
|
|
(r'\\.', Text),
|
|
# created or modified date
|
|
(r'\d{17}', Number.Integer),
|
|
# italics
|
|
(r'(\s)(//[^/]+//)((?=\W|\n))',
|
|
bygroups(Text, Generic.Emph, Text)),
|
|
# superscript
|
|
(r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
|
|
# subscript
|
|
(r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
|
|
# underscore
|
|
(r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
|
|
# bold
|
|
(r"(\s)(''[^']+'')((?=\W|\n))",
|
|
bygroups(Text, Generic.Strong, Text)),
|
|
# strikethrough
|
|
(r'(\s)(~~[^~]+~~)((?=\W|\n))',
|
|
bygroups(Text, Generic.Deleted, Text)),
|
|
# TiddlyWiki variables
|
|
(r'<<[^>]+>>', Name.Tag),
|
|
(r'\$\$[^$]+\$\$', Name.Tag),
|
|
(r'\$\([^)]+\)\$', Name.Tag),
|
|
# TiddlyWiki style or class
|
|
(r'^@@.*$', Name.Tag),
|
|
# HTML tags
|
|
(r'</?[^>]+>', Name.Tag),
|
|
# inline code
|
|
(r'`[^`]+`', String.Backtick),
|
|
# HTML escaped symbols
|
|
(r'&\S*?;', String.Regex),
|
|
# Wiki links
|
|
(r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
|
|
# External links
|
|
(r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
|
|
bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
|
|
# Transclusion
|
|
(r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
|
|
# URLs
|
|
(r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
|
|
|
|
# general text, must come last!
|
|
(r'[\w]+', Text),
|
|
(r'.', Text)
|
|
],
|
|
}
|
|
|
|
def __init__(self, **options):
|
|
self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
|
|
RegexLexer.__init__(self, **options)
|
|
|
|
|
|
class WikitextLexer(RegexLexer):
|
|
"""
|
|
For MediaWiki Wikitext.
|
|
|
|
Parsing Wikitext is tricky, and results vary between different MediaWiki
|
|
installations, so we only highlight common syntaxes (built-in or from
|
|
popular extensions), and also assume templates produce no unbalanced
|
|
syntaxes.
|
|
"""
|
|
name = 'Wikitext'
|
|
url = 'https://www.mediawiki.org/wiki/Wikitext'
|
|
aliases = ['wikitext', 'mediawiki']
|
|
filenames = []
|
|
mimetypes = ['text/x-wiki']
|
|
version_added = '2.15'
|
|
flags = re.MULTILINE
|
|
|
|
def nowiki_tag_rules(tag_name):
|
|
return [
|
|
(rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
|
|
Name.Tag, Whitespace, Punctuation), '#pop'),
|
|
include('entity'),
|
|
include('text'),
|
|
]
|
|
|
|
def plaintext_tag_rules(tag_name):
|
|
return [
|
|
(rf'(?si)(.*?)(</)({tag_name})(\s*)(>)', bygroups(Text,
|
|
Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
|
|
]
|
|
|
|
def delegate_tag_rules(tag_name, lexer, **lexer_kwargs):
|
|
return [
|
|
(rf'(?i)(</)({tag_name})(\s*)(>)', bygroups(Punctuation,
|
|
Name.Tag, Whitespace, Punctuation), '#pop'),
|
|
(rf'(?si).+?(?=</{tag_name}\s*>)', using(lexer, **lexer_kwargs)),
|
|
]
|
|
|
|
def text_rules(token):
|
|
return [
|
|
(r'\w+', token),
|
|
(r'[^\S\n]+', token),
|
|
(r'(?s).', token),
|
|
]
|
|
|
|
def handle_syntaxhighlight(self, match, ctx):
|
|
from pygments.lexers import get_lexer_by_name
|
|
|
|
attr_content = match.group()
|
|
start = 0
|
|
index = 0
|
|
while True:
|
|
index = attr_content.find('>', start)
|
|
# Exclude comment end (-->)
|
|
if attr_content[index-2:index] != '--':
|
|
break
|
|
start = index + 1
|
|
|
|
if index == -1:
|
|
# No tag end
|
|
yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
|
|
return
|
|
attr = attr_content[:index]
|
|
yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
|
|
yield match.start(3) + index, Punctuation, '>'
|
|
|
|
lexer = None
|
|
content = attr_content[index+1:]
|
|
lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
|
|
|
|
if len(lang_match) >= 1:
|
|
# Pick the last match in case of multiple matches
|
|
lang = lang_match[-1][1]
|
|
try:
|
|
lexer = get_lexer_by_name(lang)
|
|
except ClassNotFound:
|
|
pass
|
|
|
|
if lexer is None:
|
|
yield match.start() + index + 1, Text, content
|
|
else:
|
|
yield from lexer.get_tokens_unprocessed(content)
|
|
|
|
def handle_score(self, match, ctx):
|
|
attr_content = match.group()
|
|
start = 0
|
|
index = 0
|
|
while True:
|
|
index = attr_content.find('>', start)
|
|
# Exclude comment end (-->)
|
|
if attr_content[index-2:index] != '--':
|
|
break
|
|
start = index + 1
|
|
|
|
if index == -1:
|
|
# No tag end
|
|
yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
|
|
return
|
|
attr = attr_content[:index]
|
|
content = attr_content[index+1:]
|
|
yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
|
|
yield match.start(3) + index, Punctuation, '>'
|
|
|
|
lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
|
|
# Pick the last match in case of multiple matches
|
|
lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
|
|
|
|
if lang == 'lilypond': # Case sensitive
|
|
yield from LilyPondLexer().get_tokens_unprocessed(content)
|
|
else: # ABC
|
|
# FIXME: Use ABC lexer in the future
|
|
yield match.start() + index + 1, Text, content
|
|
|
|
# a-z removed to prevent linter from complaining, REMEMBER to use (?i)
|
|
title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
|
|
nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
|
|
link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
|
|
link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
|
|
double_slashes_i = {
|
|
'__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
|
|
'__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
|
|
}
|
|
double_slashes = {
|
|
'__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
|
|
'__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
|
|
'__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
|
|
}
|
|
protocols = {
|
|
'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
|
|
'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
|
|
'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
|
|
'worldwind://', 'xmpp:', '//',
|
|
}
|
|
non_relative_protocols = protocols - {'//'}
|
|
html_tags = {
|
|
'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
|
|
'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
|
|
'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
|
|
'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
|
|
'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
|
|
}
|
|
parser_tags = {
|
|
'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
|
|
'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
|
|
'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
|
|
'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
|
|
'maplink', 'ce', 'references',
|
|
}
|
|
variant_langs = {
|
|
# ZhConverter.php
|
|
'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
|
|
# WuuConverter.php
|
|
'wuu', 'wuu-hans', 'wuu-hant',
|
|
# UzConverter.php
|
|
'uz', 'uz-latn', 'uz-cyrl',
|
|
# TlyConverter.php
|
|
'tly', 'tly-cyrl',
|
|
# TgConverter.php
|
|
'tg', 'tg-latn',
|
|
# SrConverter.php
|
|
'sr', 'sr-ec', 'sr-el',
|
|
# ShiConverter.php
|
|
'shi', 'shi-tfng', 'shi-latn',
|
|
# ShConverter.php
|
|
'sh-latn', 'sh-cyrl',
|
|
# KuConverter.php
|
|
'ku', 'ku-arab', 'ku-latn',
|
|
# IuConverter.php
|
|
'iu', 'ike-cans', 'ike-latn',
|
|
# GanConverter.php
|
|
'gan', 'gan-hans', 'gan-hant',
|
|
# EnConverter.php
|
|
'en', 'en-x-piglatin',
|
|
# CrhConverter.php
|
|
'crh', 'crh-cyrl', 'crh-latn',
|
|
# BanConverter.php
|
|
'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
|
|
}
|
|
magic_vars_i = {
|
|
'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
|
|
}
|
|
magic_vars = {
|
|
'!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
|
|
'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
|
|
'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
|
|
'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
|
|
'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
|
|
'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
|
|
'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
|
|
'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
|
|
'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
|
|
'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
|
|
'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
|
|
'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
|
|
'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
|
|
'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
|
|
'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
|
|
}
|
|
parser_functions_i = {
|
|
'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
|
|
'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
|
|
'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
|
|
'URLENCODE',
|
|
}
|
|
parser_functions = {
|
|
'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
|
|
'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
|
|
'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
|
|
'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
|
|
'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
|
|
'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
|
|
'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
|
|
'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
|
|
'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
|
|
'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
|
|
'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
|
|
}
|
|
|
|
tokens = {
|
|
'root': [
|
|
# Redirects
|
|
(r"""(?xi)
|
|
(\A\s*?)(\#REDIRECT:?) # may contain a colon
|
|
(\s+)(\[\[) (?=[^\]\n]* \]\]$)
|
|
""",
|
|
bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
|
|
# Subheadings
|
|
(r'^(={2,6})(.+?)(\1)(\s*$\n)',
|
|
bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
|
|
# Headings
|
|
(r'^(=.+?=)(\s*$\n)',
|
|
bygroups(Generic.Heading, Whitespace)),
|
|
# Double-slashed magic words
|
|
(words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
|
|
(words(double_slashes), Name.Function.Magic),
|
|
# Raw URLs
|
|
(r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
|
|
link_address, link_char_class), Name.Label),
|
|
# Magic links
|
|
(rf'\b(?:RFC|PMID){nbsp_char}+[0-9]+\b',
|
|
Name.Function.Magic),
|
|
(r"""(?x)
|
|
\bISBN {nbsp_char}
|
|
(?: 97[89] {nbsp_dash}? )?
|
|
(?: [0-9] {nbsp_dash}? ){{9}} # escape format()
|
|
[0-9Xx]\b
|
|
""".format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
|
|
include('list'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'redirect-inner': [
|
|
(r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
|
|
(r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
|
|
(rf'(?i)[{title_char}]+', Name.Tag),
|
|
],
|
|
'list': [
|
|
# Description lists
|
|
(r'^;', Keyword, 'dt'),
|
|
# Ordered lists, unordered lists and indents
|
|
(r'^[#:*]+', Keyword),
|
|
# Horizontal rules
|
|
(r'^-{4,}', Keyword),
|
|
],
|
|
'inline': [
|
|
# Signatures
|
|
(r'~{3,5}', Keyword),
|
|
# Entities
|
|
include('entity'),
|
|
# Bold & italic
|
|
(r"('')(''')(?!')", bygroups(Generic.Emph,
|
|
Generic.EmphStrong), 'inline-italic-bold'),
|
|
(r"'''(?!')", Generic.Strong, 'inline-bold'),
|
|
(r"''(?!')", Generic.Emph, 'inline-italic'),
|
|
# Comments & parameters & templates
|
|
include('replaceable'),
|
|
# Media links
|
|
(
|
|
r"""(?xi)
|
|
(\[\[)
|
|
(File|Image) (:)
|
|
((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*)
|
|
(?: (\#) ([{}]*?) )?
|
|
""".format(title_char, f'{title_char}#'),
|
|
bygroups(Punctuation, Name.Namespace, Punctuation,
|
|
using(this, state=['wikilink-name']), Punctuation, Name.Label),
|
|
'medialink-inner'
|
|
),
|
|
# Wikilinks
|
|
(
|
|
r"""(?xi)
|
|
(\[\[)(?!{}) # Should not contain URLs
|
|
(?: ([{}]*) (:))?
|
|
((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
|
|
(?: (\#) ([{}]*?) )?
|
|
(\]\])
|
|
""".format('|'.join(protocols), title_char.replace('/', ''),
|
|
title_char, f'{title_char}#'),
|
|
bygroups(Punctuation, Name.Namespace, Punctuation,
|
|
using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation)
|
|
),
|
|
(
|
|
r"""(?xi)
|
|
(\[\[)(?!{})
|
|
(?: ([{}]*) (:))?
|
|
((?: [{}] | \{{{{2,3}}[^{{}}]*?\}}{{2,3}} | <!--[\s\S]*?--> )*?)
|
|
(?: (\#) ([{}]*?) )?
|
|
(\|)
|
|
""".format('|'.join(protocols), title_char.replace('/', ''),
|
|
title_char, f'{title_char}#'),
|
|
bygroups(Punctuation, Name.Namespace, Punctuation,
|
|
using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation),
|
|
'wikilink-inner'
|
|
),
|
|
# External links
|
|
(
|
|
r"""(?xi)
|
|
(\[)
|
|
((?:{}) {} {}*)
|
|
(\s*)
|
|
""".format('|'.join(protocols), link_address, link_char_class),
|
|
bygroups(Punctuation, Name.Label, Whitespace),
|
|
'extlink-inner'
|
|
),
|
|
# Tables
|
|
(r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
|
|
Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
|
|
# HTML tags
|
|
(r'(?i)(<)({})\b'.format('|'.join(html_tags)),
|
|
bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
|
|
(r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
# <nowiki>
|
|
(r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-nowiki', 'tag-inner')),
|
|
# <pre>
|
|
(r'(?i)(<)(pre)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-pre', 'tag-inner')),
|
|
# <categorytree>
|
|
(r'(?i)(<)(categorytree)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
|
|
# <hiero>
|
|
(r'(?i)(<)(hiero)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-hiero', 'tag-inner')),
|
|
# <math>
|
|
(r'(?i)(<)(math)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-math', 'tag-inner')),
|
|
# <chem>
|
|
(r'(?i)(<)(chem)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-chem', 'tag-inner')),
|
|
# <ce>
|
|
(r'(?i)(<)(ce)\b', bygroups(Punctuation,
|
|
Name.Tag), ('tag-ce', 'tag-inner')),
|
|
# <charinsert>
|
|
(r'(?i)(<)(charinsert)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
|
|
# <templatedata>
|
|
(r'(?i)(<)(templatedata)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
|
|
# <gallery>
|
|
(r'(?i)(<)(gallery)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
|
|
# <graph>
|
|
(r'(?i)(<)(gallery)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
|
|
# <dynamicpagelist>
|
|
(r'(?i)(<)(dynamicpagelist)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
|
|
# <inputbox>
|
|
(r'(?i)(<)(inputbox)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
|
|
# <rss>
|
|
(r'(?i)(<)(rss)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
|
|
# <imagemap>
|
|
(r'(?i)(<)(imagemap)\b', bygroups(
|
|
Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
|
|
# <syntaxhighlight>
|
|
(r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
(r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
|
|
bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
|
|
# <syntaxhighlight>: Fallback case for self-closing tags
|
|
(r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
|
|
Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
|
|
# <source>
|
|
(r'(?i)(</)(source)\b(\s*)(>)',
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
(r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
|
|
bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
|
|
# <source>: Fallback case for self-closing tags
|
|
(r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
|
|
Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
|
|
# <score>
|
|
(r'(?i)(</)(score)\b(\s*)(>)',
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
(r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
|
|
bygroups(Punctuation, Name.Tag, handle_score)),
|
|
# <score>: Fallback case for self-closing tags
|
|
(r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
|
|
Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
|
|
# Other parser tags
|
|
(r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
|
|
bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
|
|
(r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
# LanguageConverter markups
|
|
(
|
|
r"""(?xi)
|
|
(-\{{) # Use {{ to escape format()
|
|
([^|]) (\|)
|
|
(?:
|
|
(?: ([^;]*?) (=>))?
|
|
(\s* (?:{variants}) \s*) (:)
|
|
)?
|
|
""".format(variants='|'.join(variant_langs)),
|
|
bygroups(Punctuation, Keyword, Punctuation,
|
|
using(this, state=['root', 'lc-raw']),
|
|
Operator, Name.Label, Punctuation),
|
|
'lc-inner'
|
|
),
|
|
# LanguageConverter markups: composite conversion grammar
|
|
(
|
|
r"""(?xi)
|
|
(-\{)
|
|
([a-z\s;-]*?) (\|)
|
|
""",
|
|
bygroups(Punctuation,
|
|
using(this, state=['root', 'lc-flag']),
|
|
Punctuation),
|
|
'lc-raw'
|
|
),
|
|
# LanguageConverter markups: fallbacks
|
|
(
|
|
r"""(?xi)
|
|
(-\{{) (?!\{{) # Use {{ to escape format()
|
|
(?: (\s* (?:{variants}) \s*) (:))?
|
|
""".format(variants='|'.join(variant_langs)),
|
|
bygroups(Punctuation, Name.Label, Punctuation),
|
|
'lc-inner'
|
|
),
|
|
],
|
|
'wikilink-name': [
|
|
include('replaceable'),
|
|
(r'[^{<]+', Name.Tag),
|
|
(r'(?s).', Name.Tag),
|
|
],
|
|
'wikilink-inner': [
|
|
# Quit in case of another wikilink
|
|
(r'(?=\[\[)', Punctuation, '#pop'),
|
|
(r'\]\]', Punctuation, '#pop'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'medialink-inner': [
|
|
(r'\]\]', Punctuation, '#pop'),
|
|
(r'(\|)([^\n=|]*)(=)',
|
|
bygroups(Punctuation, Name.Attribute, Operator)),
|
|
(r'\|', Punctuation),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'quote-common': [
|
|
# Quit in case of link/template endings
|
|
(r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
|
|
(r'\n', Text, '#pop'),
|
|
],
|
|
'inline-italic': [
|
|
include('quote-common'),
|
|
(r"('')(''')(?!')", bygroups(Generic.Emph,
|
|
Generic.Strong), ('#pop', 'inline-bold')),
|
|
(r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')),
|
|
(r"''(?!')", Generic.Emph, '#pop'),
|
|
include('inline'),
|
|
include('text-italic'),
|
|
],
|
|
'inline-bold': [
|
|
include('quote-common'),
|
|
(r"(''')('')(?!')", bygroups(
|
|
Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
|
|
(r"'''(?!')", Generic.Strong, '#pop'),
|
|
(r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')),
|
|
include('inline'),
|
|
include('text-bold'),
|
|
],
|
|
'inline-bold-italic': [
|
|
include('quote-common'),
|
|
(r"('')(''')(?!')", bygroups(Generic.EmphStrong,
|
|
Generic.Strong), '#pop'),
|
|
(r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
|
|
(r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
|
|
include('inline'),
|
|
include('text-bold-italic'),
|
|
],
|
|
'inline-italic-bold': [
|
|
include('quote-common'),
|
|
(r"(''')('')(?!')", bygroups(
|
|
Generic.EmphStrong, Generic.Emph), '#pop'),
|
|
(r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')),
|
|
(r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')),
|
|
include('inline'),
|
|
include('text-bold-italic'),
|
|
],
|
|
'lc-flag': [
|
|
(r'\s+', Whitespace),
|
|
(r';', Punctuation),
|
|
*text_rules(Keyword),
|
|
],
|
|
'lc-inner': [
|
|
(
|
|
r"""(?xi)
|
|
(;)
|
|
(?: ([^;]*?) (=>))?
|
|
(\s* (?:{variants}) \s*) (:)
|
|
""".format(variants='|'.join(variant_langs)),
|
|
bygroups(Punctuation, using(this, state=['root', 'lc-raw']),
|
|
Operator, Name.Label, Punctuation)
|
|
),
|
|
(r';?\s*?\}-', Punctuation, '#pop'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'lc-raw': [
|
|
(r'\}-', Punctuation, '#pop'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'replaceable': [
|
|
# Comments
|
|
(r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
|
|
# Parameters
|
|
(
|
|
r"""(?x)
|
|
(\{{3})
|
|
([^|]*?)
|
|
(?=\}{3}|\|)
|
|
""",
|
|
bygroups(Punctuation, Name.Variable),
|
|
'parameter-inner',
|
|
),
|
|
# Magic variables
|
|
(r'(?i)(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars_i)),
|
|
bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
|
|
(r'(\{{\{{)(\s*)({})(\s*)(\}}\}})'.format('|'.join(magic_vars)),
|
|
bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
|
|
# Parser functions & templates
|
|
(r'\{\{', Punctuation, 'template-begin-space'),
|
|
# <tvar> legacy syntax
|
|
(r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
|
|
Name.Tag, Punctuation, String, Punctuation)),
|
|
(r'</>', Punctuation, '#pop'),
|
|
# <tvar>
|
|
(r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
|
|
(r'(?i)(</)(tvar)\b(\s*)(>)',
|
|
bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
|
|
],
|
|
'parameter-inner': [
|
|
(r'\}{3}', Punctuation, '#pop'),
|
|
(r'\|', Punctuation),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'template-begin-space': [
|
|
# Templates allow line breaks at the beginning, and due to how MediaWiki handles
|
|
# comments, an extra state is required to handle things like {{\n<!---->\n name}}
|
|
(r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
|
|
(r'\s+', Whitespace),
|
|
# Parser functions
|
|
(
|
|
r'(?i)(\#[{}]*?|{})(:)'.format(title_char,
|
|
'|'.join(parser_functions_i)),
|
|
bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
|
|
),
|
|
(
|
|
r'({})(:)'.format('|'.join(parser_functions)),
|
|
bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
|
|
),
|
|
# Templates
|
|
(
|
|
rf'(?i)([{title_char}]*?)(:)',
|
|
bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
|
|
),
|
|
default(('#pop', 'template-name'),),
|
|
],
|
|
'template-name': [
|
|
(r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
|
|
(r'\}\}', Punctuation, '#pop'),
|
|
(r'\n', Text, '#pop'),
|
|
include('replaceable'),
|
|
*text_rules(Name.Tag),
|
|
],
|
|
'template-inner': [
|
|
(r'\}\}', Punctuation, '#pop'),
|
|
(r'\|', Punctuation),
|
|
(
|
|
r"""(?x)
|
|
(?<=\|)
|
|
( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
|
|
(=)
|
|
""",
|
|
bygroups(Name.Label, Operator)
|
|
),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'table': [
|
|
# Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
|
|
# Endings
|
|
(r'^([ \t\n\r\0\x0B]*?)(\|\})',
|
|
bygroups(Whitespace, Punctuation), '#pop'),
|
|
# Table rows
|
|
(r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
|
|
using(this, state=['root', 'attr']))),
|
|
# Captions
|
|
(
|
|
r"""(?x)
|
|
^([ \t\n\r\0\x0B]*?)(\|\+)
|
|
# Exclude links, template and tags
|
|
(?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
|
|
(.*?)$
|
|
""",
|
|
bygroups(Whitespace, Punctuation, using(this, state=[
|
|
'root', 'attr']), Punctuation, Generic.Heading),
|
|
),
|
|
# Table data
|
|
(
|
|
r"""(?x)
|
|
( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
|
|
(?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
|
|
""",
|
|
bygroups(Punctuation, using(this, state=[
|
|
'root', 'attr']), Punctuation),
|
|
),
|
|
# Table headers
|
|
(
|
|
r"""(?x)
|
|
( ^(?:[ \t\n\r\0\x0B]*?)! )
|
|
(?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
|
|
""",
|
|
bygroups(Punctuation, using(this, state=[
|
|
'root', 'attr']), Punctuation),
|
|
'table-header',
|
|
),
|
|
include('list'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'table-header': [
|
|
# Requires another state for || handling inside headers
|
|
(r'\n', Text, '#pop'),
|
|
(
|
|
r"""(?x)
|
|
(!!|\|\|)
|
|
(?:
|
|
( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
|
|
(\|)(?!\|)
|
|
)?
|
|
""",
|
|
bygroups(Punctuation, using(this, state=[
|
|
'root', 'attr']), Punctuation)
|
|
),
|
|
*text_rules(Generic.Subheading),
|
|
],
|
|
'entity': [
|
|
(r'&\S*?;', Name.Entity),
|
|
],
|
|
'dt': [
|
|
(r'\n', Text, '#pop'),
|
|
include('inline'),
|
|
(r':', Keyword, '#pop'),
|
|
include('text'),
|
|
],
|
|
'extlink-inner': [
|
|
(r'\]', Punctuation, '#pop'),
|
|
include('inline'),
|
|
include('text'),
|
|
],
|
|
'nowiki-ish': [
|
|
include('entity'),
|
|
include('text'),
|
|
],
|
|
'attr': [
|
|
include('replaceable'),
|
|
(r'\s+', Whitespace),
|
|
(r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
|
|
(r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
|
|
(r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
|
|
(r'[\w:-]+', Name.Attribute),
|
|
|
|
],
|
|
'attr-val-0': [
|
|
(r'\s', Whitespace, '#pop'),
|
|
include('replaceable'),
|
|
*text_rules(String),
|
|
],
|
|
'attr-val-1': [
|
|
(r"'", String.Single, '#pop'),
|
|
include('replaceable'),
|
|
*text_rules(String.Single),
|
|
],
|
|
'attr-val-2': [
|
|
(r'"', String.Double, '#pop'),
|
|
include('replaceable'),
|
|
*text_rules(String.Double),
|
|
],
|
|
'tag-inner-ordinary': [
|
|
(r'/?\s*>', Punctuation, '#pop'),
|
|
include('tag-attr'),
|
|
],
|
|
'tag-inner': [
|
|
# Return to root state for self-closing tags
|
|
(r'/\s*>', Punctuation, '#pop:2'),
|
|
(r'\s*>', Punctuation, '#pop'),
|
|
include('tag-attr'),
|
|
],
|
|
# There states below are just like their non-tag variants, the key difference is
|
|
# they forcibly quit when encountering tag closing markup
|
|
'tag-attr': [
|
|
include('replaceable'),
|
|
(r'\s+', Whitespace),
|
|
(r'(=)(\s*)(")', bygroups(Operator,
|
|
Whitespace, String.Double), 'tag-attr-val-2'),
|
|
(r"(=)(\s*)(')", bygroups(Operator,
|
|
Whitespace, String.Single), 'tag-attr-val-1'),
|
|
(r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
|
|
(r'[\w:-]+', Name.Attribute),
|
|
|
|
],
|
|
'tag-attr-val-0': [
|
|
(r'\s', Whitespace, '#pop'),
|
|
(r'/?>', Punctuation, '#pop:2'),
|
|
include('replaceable'),
|
|
*text_rules(String),
|
|
],
|
|
'tag-attr-val-1': [
|
|
(r"'", String.Single, '#pop'),
|
|
(r'/?>', Punctuation, '#pop:2'),
|
|
include('replaceable'),
|
|
*text_rules(String.Single),
|
|
],
|
|
'tag-attr-val-2': [
|
|
(r'"', String.Double, '#pop'),
|
|
(r'/?>', Punctuation, '#pop:2'),
|
|
include('replaceable'),
|
|
*text_rules(String.Double),
|
|
],
|
|
'tag-nowiki': nowiki_tag_rules('nowiki'),
|
|
'tag-pre': nowiki_tag_rules('pre'),
|
|
'tag-categorytree': plaintext_tag_rules('categorytree'),
|
|
'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
|
|
'tag-hiero': plaintext_tag_rules('hiero'),
|
|
'tag-inputbox': plaintext_tag_rules('inputbox'),
|
|
'tag-imagemap': plaintext_tag_rules('imagemap'),
|
|
'tag-charinsert': plaintext_tag_rules('charinsert'),
|
|
'tag-timeline': plaintext_tag_rules('timeline'),
|
|
'tag-gallery': plaintext_tag_rules('gallery'),
|
|
'tag-graph': plaintext_tag_rules('graph'),
|
|
'tag-rss': plaintext_tag_rules('rss'),
|
|
'tag-math': delegate_tag_rules('math', TexLexer, state='math'),
|
|
'tag-chem': delegate_tag_rules('chem', TexLexer, state='math'),
|
|
'tag-ce': delegate_tag_rules('ce', TexLexer, state='math'),
|
|
'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
|
|
'text-italic': text_rules(Generic.Emph),
|
|
'text-bold': text_rules(Generic.Strong),
|
|
'text-bold-italic': text_rules(Generic.EmphStrong),
|
|
'text': text_rules(Text),
|
|
}
|