9578053 Jan 22 2022 distfiles.gentoo.org/distfiles/gajim-1.3.3-2.tar.gz
This commit is contained in:
parent
a5b3822651
commit
4c1b226bff
1045 changed files with 753037 additions and 18 deletions
88
gajim/common/regex.py
Normal file
88
gajim/common/regex.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
import re
|
||||
|
||||
def _get_link_pattern():
|
||||
# regexp meta characters are: . ^ $ * + ? { } [ ] \ | ( )
|
||||
# one escapes the metachars with \
|
||||
# \S matches anything but ' ' '\t' '\n' '\r' '\f' and '\v'
|
||||
# \s matches any whitespace character
|
||||
# \w any alphanumeric character
|
||||
# \W any non-alphanumeric character
|
||||
# \b means word boundary. This is a zero-width assertion that
|
||||
# matches only at the beginning or end of a word.
|
||||
# ^ matches at the beginning of lines
|
||||
#
|
||||
# * means 0 or more times
|
||||
# + means 1 or more times
|
||||
# ? means 0 or 1 time
|
||||
# | means or
|
||||
# [^*] anything but '*' (inside [] you don't have to escape metachars)
|
||||
# [^\s*] anything but whitespaces and '*'
|
||||
# (?<!\S) is a one char lookbehind assertion and asks for any leading
|
||||
# whitespace
|
||||
# and matches beginning of lines so we have correct formatting detection
|
||||
# even if the text is just '*foo*'
|
||||
# (?!\S) is the same thing but it's a lookahead assertion
|
||||
# \S*[^\s\W] --> in the matching string don't match ? or ) etc.. if at
|
||||
# the end
|
||||
# so http://be) will match http://be and http://be)be) will match
|
||||
# http://be)be
|
||||
|
||||
legacy_prefixes = r"((?<=\()(www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$"\
|
||||
r"&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+(?=\)))"\
|
||||
r"|((www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]"\
|
||||
r"|%[A-Fa-f0-9]{2})+"\
|
||||
r"\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+)"
|
||||
# NOTE: it's ok to catch www.gr such stuff exist!
|
||||
|
||||
# FIXME: recognize xmpp: and treat it specially
|
||||
links = r"((?<=\()[A-Za-z][A-Za-z0-9\+\.\-]*:"\
|
||||
r"([\w\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+"\
|
||||
r"(?=\)))|(\w[\w\+\.\-]*:([^<>\s]|%[A-Fa-f0-9]{2})+)"
|
||||
|
||||
# 2nd one: at_least_one_char@at_least_one_char.at_least_one_char
|
||||
mail = r'\bmailto:\S*[^\s\W]|' r'\b\S+@\S+\.\S*[^\s\W]'
|
||||
|
||||
link_pattern = links + '|' + mail + '|' + legacy_prefixes
|
||||
return link_pattern
|
||||
|
||||
def _get_basic_pattern():
|
||||
basic_pattern = _get_link_pattern()
|
||||
# detects eg. *b* *bold* *bold bold* test *bold* *bold*! (*bold*)
|
||||
# doesn't detect (it's a feature :P) * bold* *bold * * bold * test*bold*
|
||||
formatting = r'|(?<!\w)' r'\*[^\s*]' r'([^*]*[^\s*])?' r'\*(?!\w)|'\
|
||||
r'(?<!\S)' r'~[^\s~]' r'([^~]*[^\s~])?' r'~(?!\S)|'\
|
||||
r'(?<!\w)' r'_[^\s_]' r'([^_]*[^\s_])?' r'_(?!\w)'
|
||||
return basic_pattern + formatting
|
||||
|
||||
def _get_emot_and_basic_pattern(use_ascii_formatting=True):
|
||||
from gajim.gui.emoji_data import emoji_data
|
||||
# because emoticons match later (in the string) they need to be after
|
||||
# basic matches that may occur earlier
|
||||
emoticons = emoji_data.get_regex()
|
||||
|
||||
if use_ascii_formatting:
|
||||
pattern = _get_basic_pattern()
|
||||
else:
|
||||
pattern = _get_link_pattern()
|
||||
|
||||
return '%s|%s' % (pattern, emoticons)
|
||||
|
||||
LINK_REGEX = re.compile(_get_link_pattern(), re.I | re.U)
|
||||
|
||||
# link pattern + ASCII formatting
|
||||
BASIC_REGEX = re.compile(_get_basic_pattern(), re.IGNORECASE)
|
||||
|
||||
# emoticons + link pattern
|
||||
EMOT_AND_LINK_REGEX = re.compile(_get_emot_and_basic_pattern(False),
|
||||
re.IGNORECASE)
|
||||
|
||||
# emoticons + link pattern + ASCII formatting
|
||||
EMOT_AND_BASIC_REGEX = re.compile(_get_emot_and_basic_pattern(True),
|
||||
re.IGNORECASE)
|
||||
|
||||
INVALID_XML_CHARS_REGEX = re.compile(
|
||||
'[\x00-\x08]|[\x0b-\x0c]|[\x0e-\x1f]|[\ud800-\udfff]|[\ufffe-\uffff]')
|
||||
|
||||
# at least one character in 3 parts (before @, after @, after .)
|
||||
STH_AT_STH_DOT_STH_REGEX = re.compile(
|
||||
r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$')
|
Loading…
Add table
Add a link
Reference in a new issue