# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*- # https://github.com/nusenu/noContactInfo_Exit_Excluder # https://github.com/TheSmashy/TorExitRelayExclude """ This extends nusenu's basic idea of using the stem library to dynamically exclude nodes that are likely to be bad by putting them on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. * https://github.com/nusenu/noContactInfo_Exit_Excluder * https://github.com/TheSmashy/TorExitRelayExclude The basic idea is to exclude Exit nodes that do not have ContactInfo: * https://github.com/nusenu/ContactInfo-Information-Sharing-Specification That can be extended to relays that do not have an email in the contact, or to relays that do not have ContactInfo that is verified to include them. """ __prolog__ = __doc__ __doc__ +="""But there's a problem, and your Tor notice.log will tell you about it: you could exclude the relays needed to access hidden services or mirror directories. So we need to add to the process the concept of a whitelist. In addition, we may have our own blacklist of nodes we want to exclude, or use these lists for other applications like selektor. So we make two files that are structured in YAML: ``` /etc/tor/yaml/torrc-goodnodes.yaml GoodNodes: Relays: IntroductionPoints: - NODEFINGERPRINT ... By default all sections of the goodnodes.yaml are used as a whitelist. /etc/tor/yaml/torrc-badnodes.yaml BadNodes: ExcludeExitNodes: BadExit: # $0000000000000000000000000000000000000007 ``` That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML) https://github.com/yaml/pyyaml/ or ```ruamel```: do ```pip3 install ruamel``` or ```pip3 install PyYAML```; the advantage of the former is that it preserves comments. (You may have to run this as the Tor user to get RW access to /run/tor/control, in which case the directory for the YAML files must be group Tor writeable, and its parents group Tor RX.) Because you don't want to exclude the introduction points to any onion you want to connect to, ```--white_onions``` should whitelist the introduction points to a comma sep list of onions; we fixed stem to do this: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 ```--torrc_output``` will write the torrc ExcludeNodes configuration to a file. ```--good_contacts``` will write the contact info as a ciiss dictionary to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints is downloaded and the fingerprints are added on a 'fps' field we create of that fingerprint's entry of the YAML dictionary. This file is read at the beginning of the program to start with a trust database, and only new contact info from new relays are added to the dictionary. Now for the final part: we lookup the Contact info of every relay that is currently in our Tor, and check it the existence of the well-known file that lists the fingerprints of the relays it runs. If it fails to provide the well-know url, we assume its a bad relay and add it to a list of nodes that goes on ```ExcludeNodes``` (not just ExcludeExitNodes```). If the Contact info is good, we add the list of fingerprints to ```ExitNodes```, a whitelist of relays to use as exits. ```--bad_on``` We offer the users 3 levels of cleaning: 1. clean relays that have no contact ```=Empty``` 2. clean relays that don't have an email in the contact (implies 1) ```=Empty,NoEmail``` 3. clean relays that don't have "good' contactinfo. (implies 1) ```=Empty,NoEmail,NotGood``` The default is ```=Empty,NotGood``` ; ```NoEmail``` is inherently imperfect in that many of the contact-as-an-email are obfuscated, but we try anyway. To be "good" the ContactInfo must: 1. have a url for the well-defined-file to be gotten 2. must have a file that can be gotten at the URL 3. must support getting the file with a valid SSL cert from a recognized authority 4. (not in the spec but added by Python) must use a TLS SSL > v1 5. must have a fingerprint list in the file 6. must have the FP that got us the contactinfo in the fingerprint list in the file, For usage, do ```python3 exclude_badExits.py --help` """ # https://github.com/nusenu/trustor-example-trust-config/blob/main/trust_config # https://github.com/nusenu/tor-relay-operator-ids-trust-information import argparse import os import json import sys import time from io import StringIO import stem from stem import InvalidRequest from stem.connection import IncorrectPassword from stem.util.tor_tools import is_valid_fingerprint import urllib3 from urllib3.util.ssl_match_hostname import CertificateError # list(ipaddress._find_address_range(ipaddress.IPv4Network('172.16.0.0/12')) try: from ruamel.yaml import YAML yaml = YAML(typ='rt') yaml.indent(mapping=2, sequence=2) safe_load = yaml.load except: yaml = None if yaml is None: try: import yaml safe_load = yaml.safe_load except: yaml = None try: from unbound import RR_CLASS_IN, RR_TYPE_TXT, ub_ctx except: ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints, oGetStemController, vwait_for_controller, yKNOWN_NODNS, zResolveDomain) from trustor_poc import TrustorError, idns_validate try: import xxxhttpx import asyncio from trustor_poc import oDownloadUrlHttpx except: httpx = None from trustor_poc import oDownloadUrlUrllib3Socks as oDownloadUrl global LOG import logging import warnings warnings.filterwarnings('ignore') LOG = logging.getLogger() try: from torcontactinfo import TorContactInfoParser oPARSER = TorContactInfoParser() except ImportError: oPARSER = None ETC_DIR = '/usr/local/etc/tor/yaml' aTRUST_DB = {} aTRUST_DB_INDEX = {} aRELAYS_DB = {} aRELAYS_DB_INDEX = {} aFP_EMAIL = {} aDOMAIN_FPS = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping sEXCLUDE_EXIT_KEY = 'ExcludeNodes' sINCLUDE_EXIT_KEY = 'ExitNodes' oBAD_ROOT = 'BadNodes' oBAD_NODES = safe_load(""" BadNodes: ExcludeDomains: [] ExcludeNodes: BadExit: [] """) sGOOD_ROOT = 'GoodNodes' sINCLUDE_GUARD_KEY = 'EntryNodes' sEXCLUDE_DOMAINS = 'ExcludeDomains' oGOOD_NODES = safe_load(""" GoodNodes: EntryNodes: [] Relays: ExitNodes: [] IntroductionPoints: [] Onions: [] Services: [] """) lKNOWN_NODNS = [] tMAYBE_NODNS = set() def lYamlBadNodes(sFile, section=sEXCLUDE_EXIT_KEY, lWanted=['BadExit']): global oBAD_NODES global lKNOWN_NODNS global tMAYBE_NODNS if not yaml: return [] if os.path.exists(sFile): with open(sFile, 'rt') as oFd: oBAD_NODES = safe_load(oFd) # BROKEN # root = sEXCLUDE_EXIT_KEY # for elt in o[oBAD_ROOT][root][section].keys(): # if lWanted and elt not in lWanted: continue # # l += o[oBAD_ROOT][root][section][elt] l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS))) root = sEXCLUDE_DOMAINS if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]: tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root]) return l def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): global oGOOD_NODES l = [] if not yaml: return l if os.path.exists(sFile): with open(sFile, 'rt') as oFd: o = safe_load(oFd) oGOOD_NODES = o if 'EntryNodes' in o[sGOOD_ROOT].keys(): l = o[sGOOD_ROOT]['EntryNodes'] # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml return l def bdomain_is_bad(domain, fp): global lKNOWN_NODNS if domain in lKNOWN_NODNS: return True if domain in tMAYBE_NODNS: ip = zResolveDomain(domain) if ip == '': LOG.debug(f"{fp} {domain} does not resolve") lKNOWN_NODNS.append(domain) tMAYBE_NODNS.remove(domain) return True for elt in '@(){}$!': if elt in domain: LOG.warn(f"{elt} in domain {domain}") return True return False tBAD_URLS = set() lAT_REPS = ['[]', ' at ', '(at)', '[at]', '', '(att)', '_at_', '~at~', '.at.', '!at!', 't', '<(a)>', '|__at-|', '<:at:>', '[__at ]', '"a t"', 'removeme at '] lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', '<:dot:>', '|dot--|', ] lNO_EMAIL = [ '', '', '', '@snowden', 'ano ano@fu.dk', 'anonymous', 'anonymous@buzzzz.com', 'check http://highwaytohoell.de', 'no-spam@tor.org', 'no@no.no', 'noreply@bytor.com', 'not a person ', 'not@needed.com', 'not@needed.com', 'not@re.al', 'nothanks', 'nottellingyou@mail.info', 'ur@mom.com', 'your@e-mail', 'your@email.com', r'', ] def sCleanEmail(s): s = s.lower() for elt in lAT_REPS: s = s.replace(' ' + elt + ' ', '@').replace(elt, '@') for elt in lDOT_REPS: s = s.replace(elt, '.') s = s.replace('(dash)', '-') for elt in lNO_EMAIL: s = s.replace(elt, '') return s lATS = ['abuse', 'email'] lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', 'sandbox', 'offlinemasterkey'] def aCleanContact(a): # cleanups for elt in lINTS: if elt in a: a[elt] = int(a[elt]) for elt in lBOOLS: if elt not in a: continue if a[elt] in ['y', 'yes', 'true', 'True']: a[elt] = True else: a[elt] = False for elt in lATS: if elt not in a: continue a[elt] = sCleanEmail(a[elt]) if 'url' in a.keys(): a['url'] = a['url'].rstrip('/') if a['url'].startswith('http://'): domain = a['url'].replace('http://', '') elif a['url'].startswith('https://'): domain = a['url'].replace('https://', '') else: domain = a['url'] a['url'] = 'https://' + domain a.update({'fps': []}) return a def bVerifyContact(a=None, fp=None, https_cafile=None): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS global aTRUST_DB global aTRUST_DB_INDEX assert a assert fp assert https_cafile keys = list(a.keys()) a = aCleanContact(a) a['fp'] = fp if 'email' not in keys: a['email'] = '' if 'ciissversion' not in keys: aFP_EMAIL[fp] = a['email'] LOG.warn(f"{fp} 'ciissversion' not in {keys}") return a # test the url for fps and add it to the array if 'proof' not in keys: aFP_EMAIL[fp] = a['email'] LOG.warn(f"{fp} 'proof' not in {keys}") return a if aTRUST_DB_INDEX and fp in aTRUST_DB_INDEX.keys(): aCachedContact = aTRUST_DB_INDEX[fp] if aCachedContact['email'] == a['email']: LOG.info(f"{fp} in aTRUST_DB_INDEX") return aCachedContact if 'url' not in keys: if 'uri' not in keys: a['url'] = '' aFP_EMAIL[fp] = a['email'] LOG.warn(f"{fp} url and uri not in {keys}") return a a['url'] = a['uri'] aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} 'uri' but not 'url' in {keys}") # drop through domain = a['url'].replace('https://', '').replace('http://', '') # domain should be a unique key for contacts? if bdomain_is_bad(domain, fp): LOG.warn(f"{domain} is bad - {a['url']}") LOG.debug(f"{fp} is bad from {a}") return a ip = zResolveDomain(domain) if ip == '': aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} {domain} does not resolve") lKNOWN_NODNS.append(domain) return a if a['proof'] in ['dns-rsa']: # only support uri for now if False and ub_ctx: fp_domain = fp + '.' + domain if idns_validate(fp_domain, libunbound_resolv_file='resolv.conf', dnssec_DS_file='dnssec-root-trust', ) == 0: pass LOG.warn(f"{fp} proof={a['proof']} - assumed good") a['fps'] = [fp] aTRUST_DB_INDEX[fp] = a return a return True # async # If we keep a cache of FPs that we have gotten by downloading a URL # we can avoid re-downloading the URL of other FP in the list of relays. # If we paralelize the gathering of the URLs, we may have simultaneous # gathers of the same URL from different relays, defeating the advantage # of going parallel. The cache is global aDOMAIN_FPS. def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS global aDOMAIN_FPS assert a assert fp assert https_cafile r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile) if r is not True: return r domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/') if domain in aDOMAIN_FPS.keys(): a['fps'] = aDOMAIN_FPS[domain] return a # LOG.debug(f"{len(keys)} contact fields for {fp}") url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" if url in aDOMAIN_FPS.keys(): a['fps'] = aDOMAIN_FPS[url] return a if bAreWeConnected() is False: raise SystemExit("we are not connected") try: if httpx: LOG.debug(f"Downloading from {domain} for {fp}") # await o = oDownloadUrl(url, https_cafile, timeout=timeout, host=host, port=port, content_type='text/plain') else: LOG.debug(f"Downloading from {domain} for {fp}") o = oDownloadUrl(url, https_cafile, timeout=timeout, host=host, port=port, content_type='text/plain') # requests response: text "reason", "status_code" except AttributeError as e: LOG.exception(f"AttributeError downloading from {domain} {e}") except CertificateError as e: LOG.warn(f"CertificateError downloading from {domain} {e}") tBAD_URLS.add(a['url']) except TrustorError as e: if e.args == "HTTP Errorcode 404": aFP_EMAIL[fp] = a['email'] LOG.warn(f"TrustorError 404 from {domain} {e.args}") else: LOG.warn(f"TrustorError downloading from {domain} {e.args}") tBAD_URLS.add(a['url']) except (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) as e: # noqa # # maybe offline - not bad LOG.warn(f"MaxRetryError downloading from {domain} {e}") except (BaseException) as e: LOG.error(f"Exception {type(e)} downloading from {domain} {e}") else: a = aContactFps(oargs, a, o, domain) LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}") aDOMAIN_FPS[domain] = a['fps'] url = a['url'] aDOMAIN_FPS[url] = a['fps'] return a def aContactFps(oargs, a, o, domain): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS global aDOMAIN_FPS if hasattr(o, 'status'): status_code = o.status else: status_code = o.status_code if status_code >= 300: aFP_EMAIL[fp] = a['email'] LOG.warn(f"Error from {domain} {status_code} {o.reason}") # any reason retry? tBAD_URLS.add(a['url']) return a if hasattr(o, 'text'): data = o.text else: data = str(o.data, 'UTF-8') l = data.upper().strip().split('\n') LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes") if oargs.wellknown_output: sdir = os.path.join(oargs.wellknown_output, domain, '.well-known', 'tor-relay') try: if not os.path.isdir(sdir): os.makedirs(sdir) sfile = os.path.join(sdir, "rsa-fingerprint.txt") with open(sfile, 'wt') as oFd: oFd.write(data) except Exception as e: LOG.warn(f"Error wirting {sfile} {e}") a['modified'] = int(time.time()) if not l: LOG.warn(f"Downloaded from {domain} empty for {fp}") else: a['fps'] = [elt.strip() for elt in l if elt \ and len(elt) == 40 \ and not elt.startswith('#')] LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") aDOMAIN_FPS[domain] = a['fps'] return a def aParseContact(contact, fp): """ See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ """ a = {} if not contact: LOG.warn(f"null contact for {fp}") LOG.debug(f"{fp} {contact}") return {} # shlex? lelts = contact.split(' ') if not lelts: LOG.warn(f"empty contact for {fp}") LOG.debug(f"{fp} {contact}") return {} for elt in lelts: if ':' not in elt: if elt == 'DFRI': # oddball continue # hoster:Quintex Alliance Consulting LOG.warn(f"no : in {elt} for {contact} in {fp}") return {} (key , val,) = elt.split(':', 1) if key == '': continue key = key.rstrip(':') a[key] = val a = aCleanContact(a) return a def aParseContactYaml(contact, fp): """ See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ """ l = [line for line in contact.strip().replace('"', '').split(' ') if ':' in line] LOG.debug(f"{fp} {len(l)} fields") s = f'"{fp}":\n' s += '\n'.join([f" {line}\"".replace(':', ': \"', 1) for line in l]) oFd = StringIO(s) a = safe_load(oFd) return a def oMainArgparser(_=None): try: from OpenSSL import SSL lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS except: lCAfs = [] CAfs = [] for elt in lCAfs: if os.path.exists(elt): CAfs.append(elt) if not CAfs: CAfs = [''] parser = argparse.ArgumentParser(add_help=True, epilog=__prolog__) parser.add_argument('--https_cafile', type=str, help="Certificate Authority file (in PEM)", default=CAfs[0]) parser.add_argument('--proxy_host', '--proxy-host', type=str, default='127.0.0.1', help='proxy host') parser.add_argument('--proxy_port', '--proxy-port', default=9050, type=int, help='proxy control port') parser.add_argument('--proxy_ctl', '--proxy-ctl', default='/run/tor/control' if os.path.exists('/run/tor/control') else 9051, type=str, help='control socket - or port') parser.add_argument('--torrc', default='/etc/tor/torrc-defaults', type=str, help='torrc to check for suggestions') parser.add_argument('--timeout', default=60, type=int, help='proxy download connect timeout') parser.add_argument('--good_nodes', type=str, default=os.path.join(ETC_DIR, 'goodnodes.yaml'), help="Yaml file of good info that should not be excluded") parser.add_argument('--bad_nodes', type=str, default=os.path.join(ETC_DIR, 'badnodes.yaml'), help="Yaml file of bad nodes that should also be excluded") parser.add_argument('--bad_on', type=str, default='Empty,NotGood', help="comma sep list of conditions - Empty,NoEmail,NotGood") parser.add_argument('--bad_contacts', type=str, default=os.path.join(ETC_DIR, 'badcontacts.yaml'), help="Yaml file of bad contacts that bad FPs are using") parser.add_argument('--strict_nodes', type=str, default=0, choices=['0', '1'], help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable") parser.add_argument('--wait_boot', type=int, default=120, help="Seconds to wait for Tor to booststrap") parser.add_argument('--points_timeout', type=int, default=0, help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs") parser.add_argument('--log_level', type=int, default=20, help="10=debug 20=info 30=warn 40=error") parser.add_argument('--bad_sections', type=str, default='MyBadExit', help="sections of the badnodes.yaml to use, comma separated, '' BROKEN") parser.add_argument('--white_onions', type=str, default='', help="comma sep. list of onions to whitelist their introduction points - BROKEN") parser.add_argument('--torrc_output', type=str, default=os.path.join(ETC_DIR, 'torrc.new'), help="Write the torrc configuration to a file") parser.add_argument('--relays_output', type=str, default=os.path.join(ETC_DIR, 'relays.json'), help="Write the download relays in json to a file") parser.add_argument('--wellknown_output', type=str, default=os.path.join(ETC_DIR, 'https'), help="Write the well-known files to a directory") parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), help="Write the proof data of the included nodes to a YAML file") return parser def vwrite_good_contacts(oargs): global aTRUST_DB good_contacts_tmp = oargs.good_contacts + '.tmp' with open(good_contacts_tmp, 'wt') as oFYaml: yaml.dump(aTRUST_DB, oFYaml) oFYaml.close() if os.path.exists(oargs.good_contacts): bak = oargs.good_contacts +'.bak' os.rename(oargs.good_contacts, bak) os.rename(good_contacts_tmp, oargs.good_contacts) LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}") def vwrite_badnodes(oargs, oBAD_NODES, slen): if oargs.bad_nodes: tmp = oargs.bad_nodes +'.tmp' bak = oargs.bad_nodes +'.bak' with open(tmp, 'wt') as oFYaml: yaml.dump(oBAD_NODES, oFYaml) LOG.info(f"Wrote {slen} to {oargs.bad_nodes}") oFYaml.close() if os.path.exists(oargs.bad_nodes): os.rename(oargs.bad_nodes, bak) os.rename(tmp, oargs.bad_nodes) def vwrite_goodnodes(oargs, oGOOD_NODES, ilen): if oargs.good_nodes: tmp = oargs.good_nodes +'.tmp' bak = oargs.good_nodes +'.bak' with open(tmp, 'wt') as oFYaml: yaml.dump(oGOOD_NODES, oFYaml) LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}") oFYaml.close() if os.path.exists(oargs.good_nodes): os.rename(oargs.good_nodes, bak) os.rename(tmp, oargs.good_nodes) def lget_onionoo_relays(oargs): import requests adata = {} if oargs.relays_output and os.path.exists(oargs.relays_output): # and less than a day old? LOG.info(f"Getting OO relays from {oargs.relays_output}") try: with open(oargs.relays_output, 'rt') as ofd: sdata = ofd.read() adata = json.loads(sdata) except Exception as e: LOG.error(f"Getting data relays from {oargs.relays_output}") adata = {} if not adata: surl = "https://onionoo.torproject.org/details" LOG.info(f"Getting OO relays from {surl}") sCAfile = oargs.https_cafile assert os.path.exists(sCAfile), sCAfile if True: try: o = oDownloadUrl(surl, sCAfile, timeout=oargs.timeout, host=oargs.proxy_host, port=oargs.proxy_port, content_type='') if hasattr(o, 'text'): sdata = o.text else: sdata = str(o.data, 'UTF-8') except Exception as e: # simplejson.errors.JSONDecodeError # urllib3.exceptions import ConnectTimeoutError, NewConnectionError # (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) LOG.exception(f"JSON error {e}") return [] else: LOG.debug(f"Downloaded {surl} {len(sdata)} bytes") adata = json.loads(sdata) else: odata = requests.get(surl, verify=sCAfile) try: adata = odata.json() except Exception as e: # simplejson.errors.JSONDecodeError LOG.exception(f"JSON error {e}") return [] else: LOG.debug(f"Downloaded {surl} {len(adata)} relays") sdata = repr(adata) if oargs.relays_output: try: with open(oargs.relays_output, 'wt') as ofd: ofd.write(sdata) except Exception as e: LOG.warn(f"Error {oargs.relays_output} {e}") else: LOG.debug(f"Wrote {oargs.relays_output} {len(sdata)} bytes") lonionoo_relays = [r for r in adata["relays"] if 'fingerprint' in r.keys()] return lonionoo_relays def vsetup_logging(log_level, logfile='', stream=sys.stdout): global LOG add = True try: if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red' # https://pypi.org/project/coloredlogs/ import coloredlogs except ImportError: coloredlogs = False # stem fucks up logging # from stem.util import log logging.getLogger('stem').setLevel(30) logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' logging._defaultFormatter.default_msec_format = '' kwargs = dict(level=log_level, force=True, format='%(levelname)s %(message)s') if logfile: add = logfile.startswith('+') sub = logfile.startswith('-') if add or sub: logfile = logfile[1:] kwargs['filename'] = logfile if coloredlogs: # https://pypi.org/project/coloredlogs/ aKw = dict(level=log_level, logger=LOG, stream=stream, fmt='%(levelname)s %(message)s' ) coloredlogs.install(**aKw) if logfile: oHandler = logging.FileHandler(logfile) LOG.addHandler(oHandler) LOG.info(f"CSetting log_level to {log_level} {stream}") else: logging.basicConfig(**kwargs) if add and logfile: oHandler = logging.StreamHandler(stream) LOG.addHandler(oHandler) LOG.info(f"SSetting log_level to {log_level!s}") def vwritefinale(oargs, lNotInaRELAYS_DB): if len(lNotInaRELAYS_DB): LOG.warn(f"{len(lNotInaRELAYS_DB)} relays from stem were not in onionoo.torproject.org") LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/.html") LOG.info(f"For info on relays, use: https://onionoo.torproject.org/details") # https://onionoo.torproject.org/details LOG.info(f"although it's often broken") def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): global aTRUST_DB global aTRUST_DB_INDEX sofar = '' fp = b['fp'] # need to skip urllib3.exceptions.MaxRetryError if not b or 'fps' not in b or not b['fps'] or not b['url']: LOG.warn(f"{fp} did NOT VERIFY {sofar}") LOG.debug(f"{fp} {b} {sofar}") # If it's giving contact info that doesnt check out # it could be a bad exit with fake contact info texclude_set.add(fp) aBadContacts[fp] = b return None if fp not in b['fps']: LOG.warn(f"{fp} the FP IS NOT in the list of fps {sofar}") # assume a fp is using a bogus contact texclude_set.add(fp) aBadContacts[fp] = b return False LOG.info(f"{fp} GOOD {b['url']} {sofar}") # add our contact info to the trustdb aTRUST_DB[fp] = b for elt in b['fps']: aTRUST_DB_INDEX[elt] = b return True def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): global aTRUST_DB global aTRUST_DB_INDEX if not is_valid_fingerprint(relay.fingerprint): LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) return None fp = relay.fingerprint if aRELAYS_DB and fp not in aRELAYS_DB.keys(): LOG.warn(f"{fp} not in aRELAYS_DB") lNotInaRELAYS_DB += [fp] if not relay.exit_policy.is_exiting_allowed(): if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY: pass # LOG.debug(f"{fp} not an exit {sofar}") else: pass # LOG.warn(f"{fp} not an exit {sofar}") # return None # great contact had good fps and we are in them if fp in aTRUST_DB_INDEX.keys(): # a cached entry return None if type(relay.contact) == bytes: # dunno relay.contact = str(relay.contact, 'UTF-8') # fail if the contact is empty if ('Empty' in lConds and not relay.contact): LOG.info(f"{fp} skipping empty contact - Empty {sofar}") texclude_set.add(fp) return None contact = sCleanEmail(relay.contact) # fail if the contact has no email - unreliable if ('NoEmail' in lConds and relay.contact and ('@' not in contact and 'email:' not in contact)): LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") LOG.debug(f"{fp} {relay.contact} {sofar}") texclude_set.add(fp) return None # fail if the contact does not pass if ('NotGood' in lConds and relay.contact and ('ciissversion:' not in relay.contact)): LOG.info(f"{fp} skipping no ciissversion in contact {sofar}") LOG.debug(f"{fp} {relay.contact} {sofar}") texclude_set.add(fp) return None # fail if the contact does not have url: to pass if relay.contact and 'url' not in relay.contact: LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}") LOG.debug(f"{fp} {relay.contact} {sofar}") if ('NotGood' in lConds): texclude_set.add(fp) return None return True def oMainPreamble(lArgs): global aTRUST_DB global aTRUST_DB_INDEX parser = oMainArgparser() oargs = parser.parse_args(lArgs) vsetup_logging(oargs.log_level) if bAreWeConnected() is False: raise SystemExit("we are not connected") sFile = oargs.torrc if sFile and os.path.exists(sFile): icheck_torrc(sFile, oargs) sFile = oargs.good_contacts if sFile and os.path.exists(sFile): try: with open(sFile, 'rt') as oFd: aTRUST_DB = safe_load(oFd) LOG.info(f"{len(aTRUST_DB.keys())} trusted contacts from {sFile}") # reverse lookup of fps to contacts # but... for (k, v,) in aTRUST_DB.items(): if 'modified' not in v.keys(): v['modified'] = int(time.time()) aTRUST_DB_INDEX[k] = v if 'fps' in aTRUST_DB[k].keys(): for fp in aTRUST_DB[k]['fps']: if fp in aTRUST_DB_INDEX: continue aTRUST_DB_INDEX[fp] = v LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}") except Exception as e: LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") return oargs def oStemController(oargs): if os.path.exists(oargs.proxy_ctl): controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) else: port =int(oargs.proxy_ctl) controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port) vwait_for_controller(controller, oargs.wait_boot) elt = controller.get_conf('UseMicrodescriptors') if elt != '0': LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') controller.set_conf('UseMicrodescriptors', 0) # does it work dynamically? return 2 elt = controller.get_conf(sEXCLUDE_EXIT_KEY) if elt and elt != '{??}': LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already") return controller def tWhitelistSet(oargs, controller): twhitelist_set = set() twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes))) LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} EntryNodes from {oargs.good_nodes}") t = set() if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \ 'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys(): t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints']) w = set() if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys(): w = set(oGOOD_NODES[sGOOD_ROOT]['Services']) twhitelist_set.update(w) if len(w) > 0: LOG.info(f"Whitelist {len(t)} relays from Services") w = set() if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys(): # Provides the descriptor for a hidden service. The **address** is the # '.onion' address of the hidden service w = set(oGOOD_NODES[sGOOD_ROOT]['Onions']) if oargs.white_onions: w.update(oargs.white_onions.split(',')) if oargs.points_timeout > 0: LOG.info(f"{len(w)} services will be checked from IntroductionPoints") t.update(lIntroductionPoints(controller, w, itimeout=oargs.points_timeout)) if len(t) > 0: LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions") twhitelist_set.update(t) return twhitelist_set def tExcludeSet(oargs): texclude_set = set() if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): if False and oargs.bad_sections: # BROKEN sections = oargs.bad_sections.split(',') texclude_set = set(lYamlBadNodes(oargs.bad_nodes, lWanted=sections, section=sEXCLUDE_EXIT_KEY)) LOG.info(f"Preloaded {len(texclude_set)} bad fps") return texclude_set # async def iMain(lArgs): global aTRUST_DB global aTRUST_DB_INDEX global oBAD_NODES global oGOOD_NODES global lKNOWN_NODNS global aRELAYS_DB global aRELAYS_DB_INDEX global tBAD_URLS oargs = oMainPreamble(lArgs) controller = oStemController(oargs) twhitelist_set = tWhitelistSet(oargs, controller) texclude_set = tExcludeSet(oargs) ttrust_db_index = aTRUST_DB_INDEX.keys() tdns_urls = set() iFakeContact = 0 iTotalContacts = 0 aBadContacts = {} lNotInaRELAYS_DB = [] iR = 0 relays = controller.get_server_descriptors() lqueue = [] socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}" for relay in relays: iR += 1 fp = relay.fingerprint = relay.fingerprint.upper() sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" lConds = oargs.bad_on.split(',') r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB) if r is not True: continue # if it has a ciissversion in contact we count it in total iTotalContacts += 1 # only proceed if 'NotGood' not in lConds: if 'NotGood' not in lConds: continue # fail if the contact does not have url: to pass a = aParseContact(relay.contact, fp) if not a: LOG.warn(f"{fp} contact did not parse {sofar}") texclude_set.add(fp) continue if 'url' in a and a['url']: # fail if the contact uses a url we already know is bad if a['url'] in tBAD_URLS: LOG.info(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}") LOG.debug(f"{fp} {a} {sofar}") texclude_set.add(fp) continue domain = a['url'].replace('https://', '').replace('http://', '') # fail if the contact uses a domain we already know does not resolve if domain in lKNOWN_NODNS: # The fp is using a contact with a URL we know is bogus LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}") LOG.debug(f"{fp} {relay} {sofar}") texclude_set.add(fp) continue # drop through if 'dns-rsa' in relay.contact.lower(): # skip if the contact uses a dns-rsa url we dont handle target = f"{fp}.{domain}" LOG.info(f"skipping 'dns-rsa' {target} {sofar}") tdns_urls.add(target) continue if 'proof:uri-rsa' in relay.contact.lower(): if domain in aDOMAIN_FPS.keys(): continue a['fp'] = fp if httpx: lqueue.append(asyncio.create_task( aVerifyContact(a=a, fp=fp, https_cafile=oargs.https_cafile, timeout=oargs.timeout, host=oargs.proxy_host, port=oargs.proxy_port, oargs=oargs))) else: b = aVerifyContact(a=a, fp=fp, https_cafile=oargs.https_cafile, timeout=oargs.timeout, host=oargs.proxy_host, port=oargs.proxy_port, oargs=oargs) r = bProcessContact(b, texclude_set, aBadContacts, iFakeContact) if r is False: iFakeContact += 1 if httpx: # for b in asyncio.as_completed(lqueue): for b in lqueue: # r = await b r = b r = bProcessContact(r, texclude_set, aBadContacts, iFakeContact) if r is False: iFakeContact += 1 elif r is True: # iGoodContact += 1 pass LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) # accept the dns-rsa urls for now until we test them texclude_set = texclude_set.difference(tdns_urls) LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}") if oargs.torrc_output and texclude_set: with open(oargs.torrc_output, 'wt') as oFTorrc: oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n") oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n") oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n") LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") oFTorrc.close() if oargs.bad_contacts and aBadContacts: # for later analysis with open(oargs.bad_contacts, 'wt') as oFYaml: yaml.dump(aBadContacts, oFYaml) oFYaml.close() if oargs.good_contacts != '' and aTRUST_DB: vwrite_good_contacts(oargs) oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set) oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set))) oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) # EntryNodes are readony vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) vwritefinale(oargs, lNotInaRELAYS_DB) retval = 0 try: logging.getLogger('stem').setLevel(30) if texclude_set: try: LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays") controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set)) except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}") LOG.debug(repr(texclude_set)) retval += 1 if aTRUST_DB_INDEX.keys(): l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40] try: LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays") controller.set_conf(sINCLUDE_EXIT_KEY, l) except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}") LOG.debug(repr(l)) retval += 1 if 'EntryNodes' in oGOOD_NODES[sGOOD_ROOT].keys(): try: LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])} guard nodes") # FixMe for now override StrictNodes it may be unusable otherwise controller.set_conf(sINCLUDE_GUARD_KEY, oGOOD_NODES[sGOOD_ROOT]['EntryNodes']) except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa LOG.error(f"Failed setting {sINCLUDE_GUARD_KEY} guard nodes in Tor {e}") LOG.debug(repr(list(oGOOD_NODES[sGOOD_ROOT]['EntryNodes']))) retval += 1 cur = controller.get_conf('StrictNodes') if oargs.strict_nodes and int(cur) != oargs.strict_nodes: LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}") controller.set_conf('StrictNodes', oargs.strict_nodes) else: LOG.info(f"StrictNodes is set to {cur}") except KeyboardInterrupt: return 0 except Exception as e: LOG.exception(str(e)) retval = 2 finally: # wierd we are getting stem errors during the final return # with a traceback that doesnt correspond to any real flow # File "/usr/lib/python3.9/site-packages/stem/control.py", line 2474, in set_conf # self.set_options({param: value}, False) logging.getLogger('stem').setLevel(40) try: for elt in controller._event_listeners: controller.remove_event_listener(elt) controller.close() except Exception as e: LOG.warn(str(e)) sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n') return retval if __name__ == '__main__': try: # i = asyncio.run(iMain(sys.argv[1:])) i = iMain(sys.argv[1:]) except IncorrectPassword as e: LOG.error(e) i = 1 except KeyboardInterrupt: i = 0 except Exception as e: LOG.exception(e) i = 2 sys.exit(i)