From d08b34fd57984337c0dea8179201019f8f7d3513 Mon Sep 17 00:00:00 2001 From: emdee Date: Tue, 29 Nov 2022 12:54:36 +0000 Subject: [PATCH] Added notice_log --- exclude_badExits.bash | 20 +- exclude_badExits.py | 499 +++++++++++++++++++++++++----------------- support_onions.py | 26 +++ 3 files changed, 335 insertions(+), 210 deletions(-) diff --git a/exclude_badExits.bash b/exclude_badExits.bash index fd8eafb..6120fc9 100644 --- a/exclude_badExits.bash +++ b/exclude_badExits.bash @@ -4,21 +4,22 @@ PROG=exclude_badExits.py SOCKS_PORT=9050 CAFILE=/etc/ssl/certs/ca-certificates.crt +# you may have a special python for installed packages +EXE=`which python3.bash` +$EXE exclude_badExits.py --help > exclude_badExits.hlp & # an example of running exclude_badExits with full debugging # expected to take an hour or so declare -a LARGS LARGS=( - --log_level 10 - ) -# you may have a special python for installed packages -EXE=`which python3.bash` -LARGS+=( - --strict_nodes 1 + # --strict_nodes 1 --points_timeout 120 + --log_level 10 + --https_cafile $CAFILE + ) +LARGS+=( --proxy-host 127.0.0.1 --proxy-port $SOCKS_PORT - --https_cafile $CAFILE ) if [ -f '/run/tor/control' ] ; then @@ -34,8 +35,9 @@ LARGS+=( --white_onions $ddg ) # you may need to be the tor user to read /run/tor/control grep -q ^debian-tor /etc/group && TORU=debian-tor || { grep -q ^tor /etc/group && TORU=tor -} -sudo -u $TORU $EXE exclude_badExits.py "${LARGS[@]}" \ + } +# --saved_only +sudo -u $TORU $EXE exclude_badExits.py "${LARGS[@]}" "$@" \ 2>&1|tee exclude_badExits6.log # The DEBUG statements contain the detail of why the relay was considered bad. diff --git a/exclude_badExits.py b/exclude_badExits.py index ad72d6d..6e4fc5e 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -37,7 +37,7 @@ By default all sections of the goodnodes.yaml are used as a whitelist. BadNodes: ExcludeExitNodes: BadExit: - # $0000000000000000000000000000000000000007 + - 0000000000000000000000000000000000000007 ``` That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML) https://github.com/yaml/pyyaml/ or ```ruamel```: do @@ -99,7 +99,9 @@ For usage, do ```python3 exclude_badExits.py --help` import argparse import os import json +import re import sys +import tempfile import time from io import StringIO @@ -157,24 +159,30 @@ try: except ImportError: oPARSER = None +oCONTACT_RE = re.compile(r'([^:]*)(\s+)(email|url|proof|ciissversion|abuse|gpg):') + ETC_DIR = '/usr/local/etc/tor/yaml' -aTRUST_DB = {} -aTRUST_DB_INDEX = {} +aGOOD_CONTACTS_DB = {} +aGOOD_CONTACTS_FPS = {} +aBAD_CONTACTS_DB = {} aRELAYS_DB = {} aRELAYS_DB_INDEX = {} aFP_EMAIL = {} aDOMAIN_FPS = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping -sEXCLUDE_EXIT_KEY = 'ExcludeNodes' +sEXCLUDE_EXIT_GROUP = 'ExcludeNodes' sINCLUDE_EXIT_KEY = 'ExitNodes' oBAD_ROOT = 'BadNodes' -oBAD_NODES = safe_load(""" +aBAD_NODES = safe_load(""" BadNodes: ExcludeDomains: [] ExcludeNodes: + # BadExit will be overwritten BadExit: [] + # list MyBadExit in --bad_sections if you want it used + MyBadExit: [] """) sGOOD_ROOT = 'GoodNodes' @@ -193,30 +201,32 @@ GoodNodes: lKNOWN_NODNS = [] tMAYBE_NODNS = set() def lYamlBadNodes(sFile, - section=sEXCLUDE_EXIT_KEY, - lWanted=['BadExit']): - global oBAD_NODES + section=sEXCLUDE_EXIT_GROUP, + tWanted=None): + global aBAD_NODES global lKNOWN_NODNS global tMAYBE_NODNS + l = [] + if tWanted is None: tWanted = {'BadExit'} if not yaml: - return [] + return l if os.path.exists(sFile): with open(sFile, 'rt') as oFd: - oBAD_NODES = safe_load(oFd) + aBAD_NODES = safe_load(oFd) - # BROKEN -# root = sEXCLUDE_EXIT_KEY + root = sEXCLUDE_EXIT_GROUP # for elt in o[oBAD_ROOT][root][section].keys(): -# if lWanted and elt not in lWanted: continue +# if tWanted and elt not in tWanted: continue # # l += o[oBAD_ROOT][root][section][elt] - l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] + for sub in tWanted: + l += aBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_GROUP][sub] tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS))) root = sEXCLUDE_DOMAINS - if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]: - tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root]) + if sEXCLUDE_DOMAINS in aBAD_NODES[oBAD_ROOT] and aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS]: + tMAYBE_NODNS.update(set(aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS])) return l def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): @@ -252,9 +262,19 @@ def bdomain_is_bad(domain, fp): tBAD_URLS = set() lAT_REPS = ['[]', ' at ', '(at)', '[at]', '', '(att)', '_at_', '~at~', '.at.', '!at!', 't', '<(a)>', '|__at-|', '<:at:>', - '[__at ]', '"a t"', 'removeme at '] + '[__at ]', '"a t"', 'removeme at ', ' a7 ', '{at-}' + '[at}', 'atsign', '-at-', '(at_sign)', 'a.t', + 'atsignhere', ' _a_ ', ' (at-sign) ', "'at sign'", + '(a)', ' atsign ', '(at symbol)', ' anat ', '=at=', + '-at-', '-dot-', ' [a] ','(at)', '', '[at sign]', + '"at"', '{at}', '-----symbol for email----', '[at@]', + '(at sign here)', '==at', '|=dot|','/\t', + ] lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', - '<:dot:>', '|dot--|', + '<:dot:>', '|dot--|', ' d07 ', '', '(dot]', '{dot)', + 'd.t', "'dot'", '(d)', '-dot-', ' adot ', + '(d)', ' . ', '[punto]', '(point)', '"dot"', '{.}', + '--separator--', '|=dot|', ' period ', ')dot(', ] lNO_EMAIL = [ '', @@ -279,18 +299,26 @@ lNO_EMAIL = [ 'your@email.com', r'', ] +# +lMORONS = ['hoster:Quintex Alliance Consulting '] + def sCleanEmail(s): s = s.lower() for elt in lAT_REPS: - s = s.replace(' ' + elt + ' ', '@').replace(elt, '@') + if not elt.startswith(' '): + s = s.replace(' ' + elt + ' ', '@') + s = s.replace(elt, '@') for elt in lDOT_REPS: + if not elt.startswith(' '): + s = s.replace(' ' + elt + ' ', '.') s = s.replace(elt, '.') s = s.replace('(dash)', '-') + s = s.replace('hyphen ', '-') for elt in lNO_EMAIL: - s = s.replace(elt, '') + s = s.replace(elt, '?') return s -lATS = ['abuse', 'email'] +lEMAILS = ['abuse', 'email'] lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', 'sandbox', 'offlinemasterkey'] @@ -305,7 +333,7 @@ def aCleanContact(a): a[elt] = True else: a[elt] = False - for elt in lATS: + for elt in lEMAILS: if elt not in a: continue a[elt] = sCleanEmail(a[elt]) if 'url' in a.keys(): @@ -324,8 +352,8 @@ def bVerifyContact(a=None, fp=None, https_cafile=None): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS - global aTRUST_DB - global aTRUST_DB_INDEX + global aGOOD_CONTACTS_DB + global aGOOD_CONTACTS_FPS assert a assert fp assert https_cafile @@ -346,10 +374,10 @@ def bVerifyContact(a=None, fp=None, https_cafile=None): LOG.warn(f"{fp} 'proof' not in {keys}") return a - if aTRUST_DB_INDEX and fp in aTRUST_DB_INDEX.keys(): - aCachedContact = aTRUST_DB_INDEX[fp] + if aGOOD_CONTACTS_FPS and fp in aGOOD_CONTACTS_FPS.keys(): + aCachedContact = aGOOD_CONTACTS_FPS[fp] if aCachedContact['email'] == a['email']: - LOG.info(f"{fp} in aTRUST_DB_INDEX") + LOG.info(f"{fp} in aGOOD_CONTACTS_FPS") return aCachedContact if 'url' not in keys: @@ -377,53 +405,16 @@ def bVerifyContact(a=None, fp=None, https_cafile=None): lKNOWN_NODNS.append(domain) return a - if a['proof'] in ['dns-rsa']: - # only support uri for now - if False and ub_ctx: - fp_domain = fp + '.' + domain - if idns_validate(fp_domain, - libunbound_resolv_file='resolv.conf', - dnssec_DS_file='dnssec-root-trust', - ) == 0: - pass - LOG.warn(f"{fp} proof={a['proof']} - assumed good") - a['fps'] = [fp] - aTRUST_DB_INDEX[fp] = a - return a return True -# async -# If we keep a cache of FPs that we have gotten by downloading a URL -# we can avoid re-downloading the URL of other FP in the list of relays. -# If we paralelize the gathering of the URLs, we may have simultaneous -# gathers of the same URL from different relays, defeating the advantage -# of going parallel. The cache is global aDOMAIN_FPS. -def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): - global aFP_EMAIL - global tBAD_URLS - global lKNOWN_NODNS - global aDOMAIN_FPS - - assert a - assert fp - assert https_cafile - - r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile) - if r is not True: - return r - - domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/') - if domain in aDOMAIN_FPS.keys(): - a['fps'] = aDOMAIN_FPS[domain] - return a - -# LOG.debug(f"{len(keys)} contact fields for {fp}") - url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" - if url in aDOMAIN_FPS.keys(): - a['fps'] = aDOMAIN_FPS[url] - return a +def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): if bAreWeConnected() is False: raise SystemExit("we are not connected") + if url in tBAD_URLS: + LOG.debug(f"BC Known bad url from {domain} for {fp}") + return None + + o = None try: if httpx: LOG.debug(f"Downloading from {domain} for {fp}") @@ -438,35 +429,99 @@ def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0 content_type='text/plain') # requests response: text "reason", "status_code" except AttributeError as e: - LOG.exception(f"AttributeError downloading from {domain} {e}") + LOG.exception(f"BC AttributeError downloading from {domain} {e}") + tBAD_URLS.add(url) except CertificateError as e: - LOG.warn(f"CertificateError downloading from {domain} {e}") - tBAD_URLS.add(a['url']) + LOG.warn(f"BC CertificateError downloading from {domain} {e}") + tBAD_URLS.add(url) except TrustorError as e: if e.args == "HTTP Errorcode 404": aFP_EMAIL[fp] = a['email'] - LOG.warn(f"TrustorError 404 from {domain} {e.args}") + LOG.warn(f"BC TrustorError 404 from {domain} {e.args}") else: - LOG.warn(f"TrustorError downloading from {domain} {e.args}") - tBAD_URLS.add(a['url']) + LOG.warn(f"BC TrustorError downloading from {domain} {e.args}") + tBAD_URLS.add(url) except (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) as e: # noqa # # maybe offline - not bad - LOG.warn(f"MaxRetryError downloading from {domain} {e}") + LOG.warn(f"BC MaxRetryError downloading from {domain} {e}") except (BaseException) as e: - LOG.error(f"Exception {type(e)} downloading from {domain} {e}") + LOG.error(f"BC Exception {type(e)} downloading from {domain} {e}") else: - a = aContactFps(oargs, a, o, domain) - LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}") - aDOMAIN_FPS[domain] = a['fps'] + return o + return None + +# async +# If we keep a cache of FPs that we have gotten by downloading a URL +# we can avoid re-downloading the URL of other FP in the list of relays. +# If we paralelize the gathering of the URLs, we may have simultaneous +# gathers of the same URL from different relays, defeating the advantage +# of going parallel. The cache is global aDOMAIN_FPS. +def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): + global aFP_EMAIL + global tBAD_URLS + global lKNOWN_NODNS + global aDOMAIN_FPS + global aBAD_CONTACTS_DB + + assert a + assert fp + assert https_cafile + + domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/') + a['url'] = 'https://' + domain + if domain in aDOMAIN_FPS.keys(): + a['fps'] = aDOMAIN_FPS[domain] + return a + + r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile) + if r is not True: + return r + if a['url'] in tBAD_URLS: + a['fps'] = [] + return a + + if a['proof'] == 'dns-rsa': + if ub_ctx: + fp_domain = fp + '.' + domain + if idns_validate(fp_domain, + libunbound_resolv_file='resolv.conf', + dnssec_DS_file='dnssec-root-trust', + ) == 0: + LOG.warn(f"{fp} proof={a['proof']} - validated good") + a['fps'] = [fp] + aGOOD_CONTACTS_FPS[fp] = a + else: + a['fps'] = [] + return a + # only test url for now drop through url = a['url'] - aDOMAIN_FPS[url] = a['fps'] + else: + url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" + o = oVerifyUrl(url, domain, fp=fp, https_cafile=https_cafile, timeout=timeout, host=host, port=port, oargs=oargs) + if not o: + LOG.warn(f"BC Failed Download from {url} ") + a['fps'] = [] + tBAD_URLS.add(url) + aBAD_CONTACTS_DB[fp] = a + elif a['proof'] == 'dns-rsa': + # well let the test of the URL be enough for now + LOG.debug(f"Downloaded from {url} ") + a['fps'] = [fp] + aDOMAIN_FPS[domain] = a['fps'] + elif a['proof'] == 'uri-rsa': + a = aContactFps(oargs, a, o, domain) + if a['fps']: + LOG.debug(f"Downloaded from {url} {len(a['fps'])} FPs for {fp}") + else: + aBAD_CONTACTS_DB[fp] = a + LOG.debug(f"BC Downloaded from {url} NO FPs for {fp}") + aDOMAIN_FPS[domain] = a['fps'] return a def aContactFps(oargs, a, o, domain): global aFP_EMAIL global tBAD_URLS - global lKNOWN_NODNS global aDOMAIN_FPS if hasattr(o, 'status'): @@ -496,7 +551,7 @@ def aContactFps(oargs, a, o, domain): with open(sfile, 'wt') as oFd: oFd.write(data) except Exception as e: - LOG.warn(f"Error wirting {sfile} {e}") + LOG.warn(f"Error writing {sfile} {e}") a['modified'] = int(time.time()) if not l: @@ -506,7 +561,6 @@ def aContactFps(oargs, a, o, domain): and len(elt) == 40 \ and not elt.startswith('#')] LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") - aDOMAIN_FPS[domain] = a['fps'] return a def aParseContact(contact, fp): @@ -516,23 +570,33 @@ def aParseContact(contact, fp): """ a = {} if not contact: - LOG.warn(f"null contact for {fp}") + LOG.warn(f"BC null contact for {fp}") LOG.debug(f"{fp} {contact}") return {} + + contact = contact.split(r'\n')[0] + for elt in lMORONS: + contact = contact.replace(elt) + m = oCONTACT_RE.match(contact) + # 450 matches! + if m and m.groups and len(m.groups(0)) > 2 and m.span()[1] > 0: + i = len(m.groups(0)[0]) + len(m.groups(0)[1]) + contact = contact[i:] + # shlex? lelts = contact.split(' ') if not lelts: - LOG.warn(f"empty contact for {fp}") + LOG.warn(f"BC empty contact for {fp}") LOG.debug(f"{fp} {contact}") return {} + for elt in lelts: if ':' not in elt: - if elt == 'DFRI': - # oddball - continue # hoster:Quintex Alliance Consulting - LOG.warn(f"no : in {elt} for {contact} in {fp}") - return {} + LOG.warn(f"BC no : in {elt} for {contact} in {fp}") + # return {} + # try going with what we have + break (key , val,) = elt.split(':', 1) if key == '': continue @@ -599,12 +663,14 @@ def oMainArgparser(_=None): parser.add_argument('--bad_nodes', type=str, default=os.path.join(ETC_DIR, 'badnodes.yaml'), help="Yaml file of bad nodes that should also be excluded") - parser.add_argument('--bad_on', type=str, default='Empty,NotGood', + parser.add_argument('--bad_on', type=str, default='Empty,NoEmail,NotGood', help="comma sep list of conditions - Empty,NoEmail,NotGood") parser.add_argument('--bad_contacts', type=str, default=os.path.join(ETC_DIR, 'badcontacts.yaml'), help="Yaml file of bad contacts that bad FPs are using") - + parser.add_argument('--saved_only', default=False, + action='store_true', + help="Just use the info in the last *.yaml files without querying the Tor controller") parser.add_argument('--strict_nodes', type=str, default=0, choices=['0', '1'], help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable") @@ -615,14 +681,17 @@ def oMainArgparser(_=None): parser.add_argument('--log_level', type=int, default=20, help="10=debug 20=info 30=warn 40=error") parser.add_argument('--bad_sections', type=str, - default='MyBadExit', - help="sections of the badnodes.yaml to use, comma separated, '' BROKEN") + default='BadExit', + help="sections of the badnodes.yaml to use, in addition to BadExit, comma separated") parser.add_argument('--white_onions', type=str, default='', help="comma sep. list of onions to whitelist their introduction points - BROKEN") parser.add_argument('--torrc_output', type=str, default=os.path.join(ETC_DIR, 'torrc.new'), help="Write the torrc configuration to a file") + parser.add_argument('--notice_log', type=str, + default='', + help="Parse the notice log for relays and services (not yet)") parser.add_argument('--relays_output', type=str, default=os.path.join(ETC_DIR, 'relays.json'), help="Write the download relays in json to a file") @@ -634,40 +703,43 @@ def oMainArgparser(_=None): return parser def vwrite_good_contacts(oargs): - global aTRUST_DB + global aGOOD_CONTACTS_DB good_contacts_tmp = oargs.good_contacts + '.tmp' with open(good_contacts_tmp, 'wt') as oFYaml: - yaml.dump(aTRUST_DB, oFYaml) + yaml.dump(aGOOD_CONTACTS_DB, oFYaml) oFYaml.close() if os.path.exists(oargs.good_contacts): bak = oargs.good_contacts +'.bak' os.rename(oargs.good_contacts, bak) os.rename(good_contacts_tmp, oargs.good_contacts) - LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}") + LOG.info(f"Wrote {len(list(aGOOD_CONTACTS_DB.keys()))} good contact details to {oargs.good_contacts}") + bad_contacts_tmp = good_contacts_tmp.replace('.tmp', '.bad') + with open(bad_contacts_tmp, 'wt') as oFYaml: + yaml.dump(aBAD_CONTACTS_DB, oFYaml) + oFYaml.close() -def vwrite_badnodes(oargs, oBAD_NODES, slen): - if oargs.bad_nodes: - tmp = oargs.bad_nodes +'.tmp' - bak = oargs.bad_nodes +'.bak' - with open(tmp, 'wt') as oFYaml: - yaml.dump(oBAD_NODES, oFYaml) - LOG.info(f"Wrote {slen} to {oargs.bad_nodes}") - oFYaml.close() - if os.path.exists(oargs.bad_nodes): - os.rename(oargs.bad_nodes, bak) - os.rename(tmp, oargs.bad_nodes) +def vwrite_badnodes(oargs, aBAD_NODES, slen): + if not aBAD_NODES: return + tmp = oargs.bad_nodes +'.tmp' + bak = oargs.bad_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(aBAD_NODES, oFYaml) + LOG.info(f"Wrote {slen} to {oargs.bad_nodes}") + oFYaml.close() + if os.path.exists(oargs.bad_nodes): + os.rename(oargs.bad_nodes, bak) + os.rename(tmp, oargs.bad_nodes) def vwrite_goodnodes(oargs, oGOOD_NODES, ilen): - if oargs.good_nodes: - tmp = oargs.good_nodes +'.tmp' - bak = oargs.good_nodes +'.bak' - with open(tmp, 'wt') as oFYaml: - yaml.dump(oGOOD_NODES, oFYaml) - LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}") - oFYaml.close() - if os.path.exists(oargs.good_nodes): - os.rename(oargs.good_nodes, bak) - os.rename(tmp, oargs.good_nodes) + tmp = oargs.good_nodes +'.tmp' + bak = oargs.good_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(oGOOD_NODES, oFYaml) + LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}") + oFYaml.close() + if os.path.exists(oargs.good_nodes): + os.rename(oargs.good_nodes, bak) + os.rename(tmp, oargs.good_nodes) def lget_onionoo_relays(oargs): import requests @@ -780,18 +852,19 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout): LOG.addHandler(oHandler) LOG.info(f"SSetting log_level to {log_level!s}") -def vwritefinale(oargs, lNotInaRELAYS_DB): - if len(lNotInaRELAYS_DB): - LOG.warn(f"{len(lNotInaRELAYS_DB)} relays from stem were not in onionoo.torproject.org") +def vwritefinale(oargs): + global lNOT_IN_RELAYS_DB + + if len(lNOT_IN_RELAYS_DB): + LOG.warn(f"{len(lNOT_IN_RELAYS_DB)} relays from stem were not in onionoo.torproject.org") LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/.html") - LOG.info(f"For info on relays, use: https://onionoo.torproject.org/details") + LOG.info(f"For info on relays, try: https://onionoo.torproject.org/details") # https://onionoo.torproject.org/details - LOG.info(f"although it's often broken") def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): - global aTRUST_DB - global aTRUST_DB_INDEX + global aGOOD_CONTACTS_DB + global aGOOD_CONTACTS_FPS sofar = '' fp = b['fp'] # need to skip urllib3.exceptions.MaxRetryError @@ -813,15 +886,16 @@ def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): LOG.info(f"{fp} GOOD {b['url']} {sofar}") # add our contact info to the trustdb - aTRUST_DB[fp] = b + aGOOD_CONTACTS_DB[fp] = b for elt in b['fps']: - aTRUST_DB_INDEX[elt] = b + aGOOD_CONTACTS_FPS[elt] = b return True -def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): - global aTRUST_DB - global aTRUST_DB_INDEX +def bCheckFp(relay, sofar, lConds, texclude_set): + global aGOOD_CONTACTS_DB + global aGOOD_CONTACTS_FPS + global lNOT_IN_RELAYS_DB if not is_valid_fingerprint(relay.fingerprint): LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) @@ -830,17 +904,17 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): fp = relay.fingerprint if aRELAYS_DB and fp not in aRELAYS_DB.keys(): LOG.warn(f"{fp} not in aRELAYS_DB") - lNotInaRELAYS_DB += [fp] + lNOT_IN_RELAYS_DB += [fp] if not relay.exit_policy.is_exiting_allowed(): - if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY: + if sEXCLUDE_EXIT_GROUP == sEXCLUDE_EXIT_GROUP: pass # LOG.debug(f"{fp} not an exit {sofar}") else: pass # LOG.warn(f"{fp} not an exit {sofar}") # return None # great contact had good fps and we are in them - if fp in aTRUST_DB_INDEX.keys(): + if fp in aGOOD_CONTACTS_FPS.keys(): # a cached entry return None @@ -856,8 +930,8 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): contact = sCleanEmail(relay.contact) # fail if the contact has no email - unreliable - if ('NoEmail' in lConds and relay.contact and - ('@' not in contact and 'email:' not in contact)): + if 'NoEmail' in lConds and relay.contact and \ + ('@' not in contact): LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") LOG.debug(f"{fp} {relay.contact} {sofar}") texclude_set.add(fp) @@ -881,8 +955,8 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): return True def oMainPreamble(lArgs): - global aTRUST_DB - global aTRUST_DB_INDEX + global aGOOD_CONTACTS_DB + global aGOOD_CONTACTS_FPS parser = oMainArgparser() oargs = parser.parse_args(lArgs) @@ -899,20 +973,20 @@ def oMainPreamble(lArgs): if sFile and os.path.exists(sFile): try: with open(sFile, 'rt') as oFd: - aTRUST_DB = safe_load(oFd) - LOG.info(f"{len(aTRUST_DB.keys())} trusted contacts from {sFile}") + aGOOD_CONTACTS_DB = safe_load(oFd) + LOG.info(f"{len(aGOOD_CONTACTS_DB.keys())} trusted contacts from {sFile}") # reverse lookup of fps to contacts # but... - for (k, v,) in aTRUST_DB.items(): + for (k, v,) in aGOOD_CONTACTS_DB.items(): if 'modified' not in v.keys(): v['modified'] = int(time.time()) - aTRUST_DB_INDEX[k] = v - if 'fps' in aTRUST_DB[k].keys(): - for fp in aTRUST_DB[k]['fps']: - if fp in aTRUST_DB_INDEX: + aGOOD_CONTACTS_FPS[k] = v + if 'fps' in aGOOD_CONTACTS_DB[k].keys(): + for fp in aGOOD_CONTACTS_DB[k]['fps']: + if fp in aGOOD_CONTACTS_FPS: continue - aTRUST_DB_INDEX[fp] = v - LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}") + aGOOD_CONTACTS_FPS[fp] = v + LOG.info(f"{len(aGOOD_CONTACTS_FPS.keys())} good relays from {sFile}") except Exception as e: LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") @@ -935,9 +1009,9 @@ def oStemController(oargs): # does it work dynamically? return 2 - elt = controller.get_conf(sEXCLUDE_EXIT_KEY) + elt = controller.get_conf(sEXCLUDE_EXIT_GROUP) if elt and elt != '{??}': - LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already") + LOG.warn(f"{sEXCLUDE_EXIT_GROUP} is in use already") return controller @@ -951,14 +1025,34 @@ def tWhitelistSet(oargs, controller): if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \ 'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys(): t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints']) - + + if oargs.notice_log and os.path.exists(oargs.notice_log): + tmp = tempfile.mktemp() + i = os.system(f"grep 'Every introduction point for service' {oargs.notice_log} |sed -e 's/.* service //' -e 's/ is .*//'|sort -u |sed -e '/ /d' > {tmp}") + if i: + with open(tmp, 'rt') as oFd: + lnew = oFd.readlines() + t.update(set(lnew)) + LOG.info(f"Whitelist {len(lnew)} services from {oargs.notice_log}") + os.remove(tmp) + w = set() if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys(): w = set(oGOOD_NODES[sGOOD_ROOT]['Services']) - twhitelist_set.update(w) if len(w) > 0: - LOG.info(f"Whitelist {len(t)} relays from Services") + LOG.info(f"Whitelist {len(w)} relays from {sGOOD_ROOT}/Services") + if oargs.notice_log and os.path.exists(oargs.notice_log): + tmp = tempfile.mktemp() + i = os.system(f"grep 'Wanted to contact directory mirror \$' /var/lib/tor/.SelekTOR/3xx/cache/9050/notice.log|sed -e 's/.* \$//' -e 's/[~ ].*//'|sort -u > {tmp}") + if i: + with open(tmp, 'rt') as oFd: + lnew = oFd.readlines() + w.update(set(lnew)) + LOG.info(f"Whitelist {len(lnew)} relays from {oargs.notice_log}") + os.remove(tmp) + twhitelist_set.update(w) + w = set() if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys(): # Provides the descriptor for a hidden service. The **address** is the @@ -977,63 +1071,68 @@ def tWhitelistSet(oargs, controller): def tExcludeSet(oargs): texclude_set = set() + sections = {'BadExit'} if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): - if False and oargs.bad_sections: - # BROKEN - sections = oargs.bad_sections.split(',') - texclude_set = set(lYamlBadNodes(oargs.bad_nodes, - lWanted=sections, - section=sEXCLUDE_EXIT_KEY)) - LOG.info(f"Preloaded {len(texclude_set)} bad fps") + if oargs.bad_sections: + sections.update(oargs.bad_sections.split(',')) + texclude_set = set(lYamlBadNodes(oargs.bad_nodes, + tWanted=sections, + section=sEXCLUDE_EXIT_GROUP)) + LOG.info(f"Preloaded {len(texclude_set)} bad fps") return texclude_set # async def iMain(lArgs): - global aTRUST_DB - global aTRUST_DB_INDEX - global oBAD_NODES + global aGOOD_CONTACTS_DB + global aGOOD_CONTACTS_FPS + global aBAD_CONTACTS_DB + global aBAD_NODES global oGOOD_NODES global lKNOWN_NODNS global aRELAYS_DB global aRELAYS_DB_INDEX global tBAD_URLS - + global lNOT_IN_RELAYS_DB + oargs = oMainPreamble(lArgs) controller = oStemController(oargs) twhitelist_set = tWhitelistSet(oargs, controller) texclude_set = tExcludeSet(oargs) - ttrust_db_index = aTRUST_DB_INDEX.keys() - tdns_urls = set() + ttrust_db_index = aGOOD_CONTACTS_FPS.keys() iFakeContact = 0 iTotalContacts = 0 aBadContacts = {} - lNotInaRELAYS_DB = [] + lNOT_IN_RELAYS_DB = [] iR = 0 relays = controller.get_server_descriptors() lqueue = [] socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}" + if oargs.saved_only: + relays = [] for relay in relays: iR += 1 fp = relay.fingerprint = relay.fingerprint.upper() - sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" + sofar = f"G:{len(aGOOD_CONTACTS_DB.keys())} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" lConds = oargs.bad_on.split(',') - r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB) + r = bCheckFp(relay, sofar, lConds, texclude_set) if r is not True: continue # if it has a ciissversion in contact we count it in total iTotalContacts += 1 # only proceed if 'NotGood' not in lConds: - if 'NotGood' not in lConds: continue + if 'NotGood' not in lConds: + continue # fail if the contact does not have url: to pass a = aParseContact(relay.contact, fp) if not a: - LOG.warn(f"{fp} contact did not parse {sofar}") + LOG.warn(f"{fp} BC contact did not parse {sofar}") texclude_set.add(fp) + aBAD_CONTACTS_DB[fp] = a continue if 'url' in a and a['url']: @@ -1048,23 +1147,17 @@ def iMain(lArgs): # fail if the contact uses a domain we already know does not resolve if domain in lKNOWN_NODNS: # The fp is using a contact with a URL we know is bogus - LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}") + LOG.info(f"{fp} BC skipping in lKNOWN_NODNS {a} {sofar}") LOG.debug(f"{fp} {relay} {sofar}") texclude_set.add(fp) + aBAD_CONTACTS_DB[fp] = a continue # drop through - if 'dns-rsa' in relay.contact.lower(): - # skip if the contact uses a dns-rsa url we dont handle - target = f"{fp}.{domain}" - LOG.info(f"skipping 'dns-rsa' {target} {sofar}") - tdns_urls.add(target) - continue - - if 'proof:uri-rsa' in relay.contact.lower(): + if 'proof' in a and a['proof'] in ['uri-rsa', 'dns-rsa']: if domain in aDOMAIN_FPS.keys(): continue - a['fp'] = fp if httpx: + a['fp'] = fp lqueue.append(asyncio.create_task( aVerifyContact(a=a, fp=fp, @@ -1099,14 +1192,12 @@ def iMain(lArgs): LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) - # accept the dns-rsa urls for now until we test them - texclude_set = texclude_set.difference(tdns_urls) - LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}") + LOG.info(f"{len(list(aGOOD_CONTACTS_DB.keys()))} good contacts out of {iTotalContacts}") if oargs.torrc_output and texclude_set: with open(oargs.torrc_output, 'wt') as oFTorrc: - oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n") - oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n") + oFTorrc.write(f"{sEXCLUDE_EXIT_GROUP} {','.join(texclude_set)}\n") + oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aGOOD_CONTACTS_FPS.keys())}\n") oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n") LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") oFTorrc.close() @@ -1117,35 +1208,37 @@ def iMain(lArgs): yaml.dump(aBadContacts, oFYaml) oFYaml.close() - if oargs.good_contacts != '' and aTRUST_DB: + if oargs.good_contacts != '' and aGOOD_CONTACTS_DB: vwrite_good_contacts(oargs) - oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set) - oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS - vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set))) + aBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_GROUP]['BadExit'] = list(texclude_set) + aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS + if oargs.bad_nodes: + vwrite_badnodes(oargs, aBAD_NODES, str(len(texclude_set))) - oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) + oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aGOOD_CONTACTS_FPS.keys()) # EntryNodes are readony - vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) + if oargs.good_nodes: + vwrite_goodnodes(oargs, oGOOD_NODES, len(aGOOD_CONTACTS_FPS.keys())) - vwritefinale(oargs, lNotInaRELAYS_DB) + vwritefinale(oargs) retval = 0 try: logging.getLogger('stem').setLevel(30) if texclude_set: try: - LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays") - controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set)) + LOG.info(f"controller {sEXCLUDE_EXIT_GROUP} {len(texclude_set)} net bad relays") + controller.set_conf(sEXCLUDE_EXIT_GROUP, list(texclude_set)) except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa - LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}") + LOG.error(f"Failed setting {sEXCLUDE_EXIT_GROUP} bad exit relays in Tor {e}") LOG.debug(repr(texclude_set)) retval += 1 - if aTRUST_DB_INDEX.keys(): - l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40] + if aGOOD_CONTACTS_FPS.keys(): + l = [elt for elt in aGOOD_CONTACTS_FPS.keys() if len (elt) == 40] try: - LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays") + LOG.info(f"controller {sINCLUDE_EXIT_KEY} {len(l)} good relays") controller.set_conf(sINCLUDE_EXIT_KEY, l) except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}") @@ -1165,8 +1258,13 @@ def iMain(lArgs): cur = controller.get_conf('StrictNodes') if oargs.strict_nodes and int(cur) != oargs.strict_nodes: - LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}") controller.set_conf('StrictNodes', oargs.strict_nodes) + cur = controller.get_conf('StrictNodes') + if int(cur) != oargs.strict_nodes: + LOG.warn(f"OVERRIDING StrictNodes NOT {oargs.strict_nodes}") + else: + LOG.info(f"OVERRODE StrictNodes to {oargs.strict_nodes}") + else: LOG.info(f"StrictNodes is set to {cur}") @@ -1188,7 +1286,6 @@ def iMain(lArgs): except Exception as e: LOG.warn(str(e)) - sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n') return retval if __name__ == '__main__': diff --git a/support_onions.py b/support_onions.py index 426c1fd..e68edcc 100644 --- a/support_onions.py +++ b/support_onions.py @@ -33,6 +33,32 @@ bHAVE_TORR = shutil.which('tor-resolve') # in the wild we'll keep a copy here so we can avoid restesting yKNOWN_NODNS = """ --- + - for-privacy.net + - backup.spekadyon.org + - verification-for-nusenu.net + - prsv.ch + - ezyn.de + - dfri.se + - dtf.contact + - galtland.network + - dotsrc.org + - nicdex.com + - unzane.com + - a9.wtf + - tor.skankhunt42.pw + - tor-exit-3.aa78i2efsewr0neeknk.xyz + - privacysvcs.net + - apt96.com + - mkg20001.io + - kryptonit.org + - sebastian-elisa-pfeifer.eu + - nx42.de + - www.defcon.org + - 0x0.is + - transliberation.today + - tor-exit-2.aa78i2efsewr0neeknk.xyz + - interfesse.net + - axims.net - a9.wtf - heraldonion.org - linkspartei.org