diff --git a/README.md b/README.md index 8be6c22..371459b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,18 @@ + This extends nusenu's basic idea of using the stem library to dynamically exclude nodes that are likely to be bad by putting them on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. * https://github.com/nusenu/noContactInfo_Exit_Excluder * https://github.com/TheSmashy/TorExitRelayExclude -The basic cut is to exclude Exit nodes that do not have a contact. -That can be extended to nodes that do not have an email in the contact etc. +The basic idea is to exclude Exit nodes that do not have ContactInfo: +* https://github.com/nusenu/ContactInfo-Information-Sharing-Specification +That can be extended to relays that do not have an email in the contact, +or to relays that do not have ContactInfo that is verified to include them. But there's a problem, and your Tor notice.log will tell you about it: -you could exclude the nodes needed to access hidden services or -directorues. So we need to add to the process the concept of a whitelist. +you could exclude the relays needed to access hidden services or mirror +directories. So we need to add to the process the concept of a whitelist. In addition, we may have our own blacklist of nodes we want to exclude, or use these lists for other applications like selektor. @@ -30,96 +33,96 @@ BadNodes: # $0000000000000000000000000000000000000007 ``` That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML) -https://github.com/yaml/pyyaml/ +https://github.com/yaml/pyyaml/ or ```ruamel```: do +```pip3 install ruamel``` or ```pip3 install PyYAML```; +the advantage of the former is that it preserves comments. -Right now only the ExcludeExitNodes section is used by we may add ExcludeNodes -later, and by default all sub-sections of the badnodes.yaml are used as a -ExcludeExitNodes but it can be customized with the lWanted commandline arg. - -The original idea has also been extended to add different conditions for -exclusion: the ```--contact``` commandline arg is a comma sep list of conditions: -* Empty - no contact info -* NoEmail - no @ sign in the contact', -More may be added later. +(You may have to run this as the Tor user to get RW access to +/run/tor/control, in which case the directory for the YAML files must +be group Tor writeable, and its parents group Tor RX.) Because you don't want to exclude the introduction points to any onion you want to connect to, ```--white_onions``` should whitelist the -introduction points to a comma sep list of onions, but is -currently broken in stem 1.8.0: see: +introduction points to a comma sep list of onions; we fixed stem to do this: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 ```--torrc_output``` will write the torrc ExcludeNodes configuration to a file. -Now for the final part: we lookup the Contact info of every server -that is currently in our Tor, and check it for its existence. -If it fails to provide the well-know url, we assume its a bogus -relay and add it to a list of nodes that goes on ExcludeNodes - -not just exclude Exit. - -If the Contact info is good we add the list of fingerprints to add -to ExitNodes, a whitelist of relays to use as exits. - -```--proof_output``` will write the contact info as a ciiss dictionary +```--good_contacts``` will write the contact info as a ciiss dictionary to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints is downloaded and the fingerprints are added on a 'fps' field we create of that fingerprint's entry of the YAML dictionary. This file is read at the beginning of the program to start with a trust database, and only new contact info from new relays are added to the dictionary. -You can expect it to take an hour or two the first time this is run: ->700 domains. +Now for the final part: we lookup the Contact info of every relay +that is currently in our Tor, and check it the existence of the +well-known file that lists the fingerprints of the relays it runs. +If it fails to provide the well-know url, we assume its a bad +relay and add it to a list of nodes that goes on ```ExcludeNodes``` +(not just ExcludeExitNodes```). If the Contact info is good, we add the +list of fingerprints to ```ExitNodes```, a whitelist of relays to use as exits. + +```--bad_on``` We offer the users 3 levels of cleaning: +1. clean relays that have no contact ```=Empty``` +2. clean relays that don't have an email in the contact (implies 1) + ```=Empty,NoEmail``` +3. clean relays that don't have "good' contactinfo. (implies 1) + ```=Empty,NoEmail,NotGood``` + +The default is ```=Empty,NotGood``` ; ```NoEmail``` is inherently imperfect +in that many of the contact-as-an-email are obfuscated, but we try anyway. + +To be "good" the ContactInfo must: +1. have a url for the well-defined-file to be gotten +2. must have a file that can be gotten at the URL +3. must support getting the file with a valid SSL cert from a recognized authority +4. (not in the spec but added by Python) must use a TLS SSL > v1 +5. must have a fingerprint list in the file +6. must have the FP that got us the contactinfo in the fingerprint list in the file, For usage, do ```python3 exclude_badExits.py --help` -## Usage + + +## Usage ``` + usage: exclude_badExits.py [-h] [--https_cafile HTTPS_CAFILE] [--proxy_host PROXY_HOST] [--proxy_port PROXY_PORT] [--proxy_ctl PROXY_CTL] [--torrc TORRC] [--timeout TIMEOUT] [--good_nodes GOOD_NODES] - [--bad_nodes BAD_NODES] [--contact CONTACT] + [--bad_nodes BAD_NODES] [--bad_on BAD_ON] [--bad_contacts BAD_CONTACTS] [--strict_nodes {0,1}] [--wait_boot WAIT_BOOT] [--points_timeout POINTS_TIMEOUT] [--log_level LOG_LEVEL] [--bad_sections BAD_SECTIONS] - [--white_services WHITE_SERVICES] + [--white_onions WHITE_ONIONS] [--torrc_output TORRC_OUTPUT] - [--proof_output PROOF_OUTPUT] -``` + [--relays_output RELAYS_OUTPUT] + [--good_contacts GOOD_CONTACTS] -### Optional arguments: - -``` +optional arguments: -h, --help show this help message and exit --https_cafile HTTPS_CAFILE Certificate Authority file (in PEM) -``` -``` --proxy_host PROXY_HOST, --proxy-host PROXY_HOST proxy host --proxy_port PROXY_PORT, --proxy-port PROXY_PORT proxy control port --proxy_ctl PROXY_CTL, --proxy-ctl PROXY_CTL control socket - or port -``` -``` --torrc TORRC torrc to check for suggestions --timeout TIMEOUT proxy download connect timeout -``` -``` --good_nodes GOOD_NODES Yaml file of good info that should not be excluded --bad_nodes BAD_NODES Yaml file of bad nodes that should also be excluded -``` -``` - --contact CONTACT comma sep list of conditions - Empty,NoEmail + --bad_on BAD_ON comma sep list of conditions - Empty,NoEmail,NotGood --bad_contacts BAD_CONTACTS Yaml file of bad contacts that bad FPs are using -``` -``` --strict_nodes {0,1} Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable --wait_boot WAIT_BOOT @@ -127,23 +130,31 @@ usage: exclude_badExits.py [-h] [--https_cafile HTTPS_CAFILE] --points_timeout POINTS_TIMEOUT Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs -``` -``` --log_level LOG_LEVEL 10=debug 20=info 30=warn 40=error --bad_sections BAD_SECTIONS sections of the badnodes.yaml to use, comma separated, '' BROKEN -``` -``` - --white_services WHITE_SERVICES + --white_onions WHITE_ONIONS comma sep. list of onions to whitelist their introduction points - BROKEN -``` -``` --torrc_output TORRC_OUTPUT Write the torrc configuration to a file - --proof_output PROOF_OUTPUT + --relays_output RELAYS_OUTPUT + Write the download relays in json to a file + --good_contacts GOOD_CONTACTS Write the proof data of the included nodes to a YAML file + +This extends nusenu's basic idea of using the stem library to dynamically +exclude nodes that are likely to be bad by putting them on the ExcludeNodes or +ExcludeExitNodes setting of a running Tor. * +https://github.com/nusenu/noContactInfo_Exit_Excluder * +https://github.com/TheSmashy/TorExitRelayExclude The basic idea is to exclude +Exit nodes that do not have ContactInfo: * +https://github.com/nusenu/ContactInfo-Information-Sharing-Specification That +can be extended to relays that do not have an email in the contact, or to +relays that do not have ContactInfo that is verified to include them. + ``` + diff --git a/exclude_badExits.py b/exclude_badExits.py index 3c5a247..4561c6b 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -9,12 +9,17 @@ on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. * https://github.com/nusenu/noContactInfo_Exit_Excluder * https://github.com/TheSmashy/TorExitRelayExclude -The basic cut is to exclude Exit nodes that do not have a contact. -That can be extended to nodes that do not have an email in the contact etc. +The basic idea is to exclude Exit nodes that do not have ContactInfo: +* https://github.com/nusenu/ContactInfo-Information-Sharing-Specification + +That can be extended to relays that do not have an email in the contact, +or to relays that do not have ContactInfo that is verified to include them. """ -"""But there's a problem, and your Tor notice.log will tell you about it: -you could exclude the nodes needed to access hidden services or -directorues. So we need to add to the process the concept of a whitelist. +__prolog__ = __doc__ + +__doc__ +="""But there's a problem, and your Tor notice.log will tell you about it: +you could exclude the relays needed to access hidden services or mirror +directories. So we need to add to the process the concept of a whitelist. In addition, we may have our own blacklist of nodes we want to exclude, or use these lists for other applications like selektor. @@ -35,36 +40,22 @@ BadNodes: # $0000000000000000000000000000000000000007 ``` That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML) -https://github.com/yaml/pyyaml/ +https://github.com/yaml/pyyaml/ or ```ruamel```: do +```pip3 install ruamel``` or ```pip3 install PyYAML```; +the advantage of the former is that it preserves comments. -Right now only the ExcludeExitNodes section is used by we may add ExcludeNodes -later, and by default all sub-sections of the badnodes.yaml are used as a -ExcludeExitNodes but it can be customized with the lWanted commandline arg. - -The original idea has also been extended to add different conditions for -exclusion: the ```--contact``` commandline arg is a comma sep list of conditions: -* Empty - no contact info -* NoEmail - no @ sign in the contact', -More may be added later. +(You may have to run this as the Tor user to get RW access to +/run/tor/control, in which case the directory for the YAML files must +be group Tor writeable, and its parents group Tor RX.) Because you don't want to exclude the introduction points to any onion you want to connect to, ```--white_onions``` should whitelist the -introduction points to a comma sep list of onions, but is -currently broken in stem 1.8.0: see: +introduction points to a comma sep list of onions; we fixed stem to do this: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 ```--torrc_output``` will write the torrc ExcludeNodes configuration to a file. -Now for the final part: we lookup the Contact info of every server -that is currently in our Tor, and check it for its existence. -If it fails to provide the well-know url, we assume its a bogus -relay and add it to a list of nodes that goes on ExcludeNodes - -not just exclude Exit. - -If the Contact info is good we add the list of fingerprints to add -to ExitNodes, a whitelist of relays to use as exits. - ```--good_contacts``` will write the contact info as a ciiss dictionary to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints is downloaded and the fingerprints are added on a 'fps' field we create @@ -72,24 +63,51 @@ of that fingerprint's entry of the YAML dictionary. This file is read at the beginning of the program to start with a trust database, and only new contact info from new relays are added to the dictionary. -You can expect it to take an hour or two the first time this is run: ->700 domains. +Now for the final part: we lookup the Contact info of every relay +that is currently in our Tor, and check it the existence of the +well-known file that lists the fingerprints of the relays it runs. +If it fails to provide the well-know url, we assume its a bad +relay and add it to a list of nodes that goes on ```ExcludeNodes``` +(not just ExcludeExitNodes```). If the Contact info is good, we add the +list of fingerprints to ```ExitNodes```, a whitelist of relays to use as exits. + +```--bad_on``` We offer the users 3 levels of cleaning: +1. clean relays that have no contact ```=Empty``` +2. clean relays that don't have an email in the contact (implies 1) + ```=Empty,NoEmail``` +3. clean relays that don't have "good' contactinfo. (implies 1) + ```=Empty,NoEmail,NotGood``` + +The default is ```=Empty,NotGood``` ; ```NoEmail``` is inherently imperfect +in that many of the contact-as-an-email are obfuscated, but we try anyway. + +To be "good" the ContactInfo must: +1. have a url for the well-defined-file to be gotten +2. must have a file that can be gotten at the URL +3. must support getting the file with a valid SSL cert from a recognized authority +4. (not in the spec but added by Python) must use a TLS SSL > v1 +5. must have a fingerprint list in the file +6. must have the FP that got us the contactinfo in the fingerprint list in the file, For usage, do ```python3 exclude_badExits.py --help` """ +# https://github.com/nusenu/trustor-example-trust-config/blob/main/trust_config +# https://github.com/nusenu/tor-relay-operator-ids-trust-information + import argparse import os +import json import sys import time from io import StringIO import stem -import urllib3 from stem import InvalidRequest from stem.connection import IncorrectPassword from stem.util.tor_tools import is_valid_fingerprint +import urllib3 from urllib3.util.ssl_match_hostname import CertificateError # list(ipaddress._find_address_range(ipaddress.IPv4Network('172.16.0.0/12')) @@ -113,6 +131,13 @@ try: except: ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None +from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints, + oGetStemController, vwait_for_controller, + yKNOWN_NODNS, zResolveDomain) + +from trustor_poc import TrustorError, idns_validate +from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl + global LOG import logging import warnings @@ -120,18 +145,17 @@ import warnings warnings.filterwarnings('ignore') LOG = logging.getLogger() -from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints, - oGetStemController, vwait_for_controller, - yKNOWN_NODNS, zResolveDomain) -from support_phantompy import vsetup_logging -from trustor_poc import TrustorError, idns_validate -from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl +try: + from torcontactinfo import TorContactInfoParser + oPARSER = TorContactInfoParser() +except ImportError: + oPARSER = None -LOG.info("imported HTTPSAdapter") - -ETC_DIR = '/etc/tor/yaml' +ETC_DIR = '/usr/local/etc/tor/yaml' aTRUST_DB = {} aTRUST_DB_INDEX = {} +aRELAYS_DB = {} +aRELAYS_DB_INDEX = {} aFP_EMAIL = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping @@ -145,13 +169,13 @@ oBAD_NODES[oBAD_ROOT] = {} oBAD_NODES[oBAD_ROOT]['ExcludeNodes'] = {} lKNOWN_NODNS = [] -lMAYBE_NODNS = [] +tMAYBE_NODNS = set() def lYamlBadNodes(sFile, section=sEXCLUDE_EXIT_KEY, lWanted=['BadExit']): global oBAD_NODES global lKNOWN_NODNS - global lMAYBE_NODNS + global tMAYBE_NODNS if not yaml: return [] @@ -167,11 +191,10 @@ def lYamlBadNodes(sFile, l = oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] + tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS))) root = 'ExcludeDomains' - if root not in oBAD_NODES[oBAD_ROOT] or not oBAD_NODES[oBAD_ROOT][root]: - lMAYBE_NODNS = safe_load(StringIO(yKNOWN_NODNS)) - else: - lMAYBE_NODNS = oBAD_NODES[oBAD_ROOT][root] + if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]: + tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root]) return l oGOOD_NODES = {} @@ -192,12 +215,12 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): def bdomain_is_bad(domain, fp): global lKNOWN_NODNS if domain in lKNOWN_NODNS: return True - if domain in lMAYBE_NODNS: + if domain in tMAYBE_NODNS: ip = zResolveDomain(domain) if ip == '': LOG.debug(f"{fp} {domain} does not resolve") lKNOWN_NODNS.append(domain) - lMAYBE_NODNS.remove(domain) + tMAYBE_NODNS.remove(domain) return True for elt in '@(){}$!': @@ -207,31 +230,79 @@ def bdomain_is_bad(domain, fp): return False tBAD_URLS = set() +lAT_REPS = ['[]', ' at ', '(at)', '[at]', '', '(att)', '_at_', + '~at~', '.at.', '!at!', 't', '<(a)>', '|__at-|', '<:at:>', + '[__at ]', '"a t"', 'removeme at '] +lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', + '<:dot:>', '|dot--|', + ] +lNO_EMAIL = ['', + 'not@needed.com', + '', + '', + 'not a person ', + r'', + '@snowden', + 'ano ano@fu.dk', + 'anonymous', + 'anonymous@buzzzz.com', + 'check http://highwaytohoell.de', + 'no@no.no', + 'not@needed.com', + 'not@re.al', + 'nothanks', + 'nottellingyou@mail.info', + 'ur@mom.com', + 'your@e-mail', + 'your@email.com', + ] +def sCleanEmail(s): + s = s.lower() + for elt in lAT_REPS: + s = s.replace(' ' + elt + ' ', '@').replace(elt, '@') + for elt in lDOT_REPS: + s = s.replace(elt, '.') + s = s.replace('(dash)', '-') + for elt in lNO_EMAIL: + s = s.replace(elt, '') + return s + lATS = ['abuse', 'email'] lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', 'sandbox', 'offlinemasterkey'] -def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050): - global tBAD_URLS - global lKNOWN_NODNS +def aCleanContact(a): # cleanups for elt in lINTS: if elt in a: a[elt] = int(a[elt]) for elt in lBOOLS: - if elt in a: - if a[elt] in ['y', 'yes', 'true', 'True']: - a[elt] = True - else: - a[elt] = False + if elt not in a: continue + if a[elt] in ['y', 'yes', 'true', 'True']: + a[elt] = True + else: + a[elt] = False for elt in lATS: - if elt in a: - a[elt] = a[elt].replace('[]', '@') - + if elt not in a: continue + a[elt] = sCleanEmail(a[elt]) + if 'url' in a.keys(): + a['url'] = a['url'].rstrip('/') + if a['url'].startswith('http://'): + domain = a['url'].replace('http://', '') + elif a['url'].startswith('https://'): + domain = a['url'].replace('https://', '') + else: + domain = a['url'] + a['url'] = 'https://' + domain a.update({'fps': []}) + return a + +def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050): + global tBAD_URLS + global lKNOWN_NODNS keys = list(a.keys()) + a = aCleanContact(a) if 'email' not in keys: - LOG.warn(f"{fp} 'email' not in {keys}") a['email'] = '' if 'ciissversion' not in keys: aFP_EMAIL[fp] = a['email'] @@ -260,13 +331,10 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) LOG.debug(f"{fp} 'uri' but not 'url' in {keys}") # drop through - c = a['url'].lstrip('https://').lstrip('http://').strip('/') - a['url'] = 'https://' +c - - # domain should be a unique key for contacts - domain = a['url'][8:] + domain = a['url'].replace('https://', '').replace('http://', '') + # domain should be a unique key for contacts? if bdomain_is_bad(domain, fp): - LOG.warn(f"{domain} is bad from {a['url']}") + LOG.warn(f"{domain} is bad - {a['url']}") LOG.debug(f"{fp} is bad from {a}") return a @@ -277,7 +345,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) lKNOWN_NODNS.append(domain) return {} - if a['proof'] not in ['uri-rsa']: + if a['proof'] in ['dns-rsa']: # only support uri for now if False and ub_ctx: fp_domain = fp + '.' + domain @@ -289,12 +357,13 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) LOG.warn(f"{fp} proof={a['proof']} not supported yet") return a - LOG.debug(f"{len(keys)} contact fields for {fp}") - url = f"https://{domain}/.well-known/tor-relay/rsa-fingerprint.txt" +# LOG.debug(f"{len(keys)} contact fields for {fp}") + url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" try: LOG.debug(f"Downloading from {domain} for {fp}") o = oDownloadUrl(url, https_cafile, - timeout=timeout, host=host, port=port) + timeout=timeout, host=host, port=port, + content_type='text/plain') # requests response: text "reason", "status_code" except AttributeError as e: LOG.exception(f"AttributeError downloading from {domain} {e}") @@ -308,7 +377,8 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) else: LOG.warn(f"TrustorError downloading from {domain} {e.args}") tBAD_URLS.add(a['url']) - except urllib3.exceptions.MaxRetryError as e: # noqa + except (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) as e: # noqa + # # maybe offline - not bad LOG.warn(f"MaxRetryError downloading from {domain} {e}") except (BaseException) as e: @@ -336,33 +406,45 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) if not l: LOG.warn(f"Downloading from {domain} empty for {fp}") else: - a['fps'] = [elt for elt in l if elt and len(elt) == 40 \ + a['fps'] = [elt.strip() for elt in l if elt \ and not elt.startswith('#')] LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") + for elt in a['fps']: + if len(elt) != 40: + LOG.warn(f"len !=40 from {domain} '{elt}'") return a -def aParseContactYaml(contact, fp): +def aParseContact(contact, fp): """ See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ """ - lelts = contact.split() a = {} - if len(lelts) % 1 != 0: - LOG.warn(f"bad contact for {fp} odd number of components") - LOG.debug(f"{fp} {a}") - return a - key = '' + if not contact: + LOG.warn(f"null contact for {fp}") + LOG.debug(f"{fp} {contact}") + return {} + # shlex? + lelts = contact.split(' ') + if not lelts: + LOG.warn(f"empty contact for {fp}") + LOG.debug(f"{fp} {contact}") + return {} for elt in lelts: - if key == '': - key = elt + if ':' not in elt: + # hoster:Quintex Alliance Consulting + LOG.warn(f"no : in {elt} for {contact} in {fp}") continue - a[key] = elt - key = '' - LOG.debug(f"{fp} {len(a.keys())} fields") + (key , val,) = elt.split(':', 1) + if key == '': + continue + key = key.rstrip(':') + a[key] = val + a = aCleanContact(a) +# LOG.debug(f"{fp} {len(a.keys())} fields") return a -def aParseContact(contact, fp): +def aParseContactYaml(contact, fp): """ See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ @@ -393,7 +475,7 @@ def oMainArgparser(_=None): CAfs = [''] parser = argparse.ArgumentParser(add_help=True, - epilog=__doc__) + epilog=__prolog__) parser.add_argument('--https_cafile', type=str, help="Certificate Authority file (in PEM)", default=CAfs[0]) @@ -420,8 +502,8 @@ def oMainArgparser(_=None): parser.add_argument('--bad_nodes', type=str, default=os.path.join(ETC_DIR, 'badnodes.yaml'), help="Yaml file of bad nodes that should also be excluded") - parser.add_argument('--contact', type=str, default='Empty,NoEmail', - help="comma sep list of conditions - Empty,NoEmail") + parser.add_argument('--bad_on', type=str, default='Empty,NotGood', + help="comma sep list of conditions - Empty,NoEmail,NotGood") parser.add_argument('--bad_contacts', type=str, default=os.path.join(ETC_DIR, 'badcontacts.yaml'), help="Yaml file of bad contacts that bad FPs are using") @@ -443,6 +525,9 @@ def oMainArgparser(_=None): parser.add_argument('--torrc_output', type=str, default=os.path.join(ETC_DIR, 'torrc.new'), help="Write the torrc configuration to a file") + parser.add_argument('--relays_output', type=str, + default=os.path.join(ETC_DIR, 'relays.json'), + help="Write the download relays in json to a file") parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), help="Write the proof data of the included nodes to a YAML file") return parser @@ -471,12 +556,134 @@ def vwrite_goodnodes(oargs, oGOOD_NODES, ilen): os.rename(oargs.good_nodes, bak) os.rename(tmp, oargs.good_nodes) +def lget_onionoo_relays(oargs): + import requests + adata = {} + if oargs.relays_output and os.path.exists(oargs.relays_output): + LOG.info(f"Getting OO relays from {oargs.relays_output}") + try: + with open(oargs.relays_output, 'rt') as ofd: + sdata = ofd.read() + adata = json.loads(sdata) + except Exception as e: + LOG.error(f"Getting data relays from {oargs.relays_output}") + adata = {} + if not adata: + surl = "https://onionoo.torproject.org/details" + LOG.info(f"Getting OO relays from {surl}") + sCAfile = oargs.https_cafile + assert os.path.exists(sCAfile), sCAfile + if True: + try: + o = oDownloadUrl(surl, sCAfile, + timeout=oargs.timeout, + host=oargs.proxy_host, + port=oargs.proxy_port, + content_type='') + if hasattr(o, 'text'): + data = o.text + else: + data = str(o.data, 'UTF-8') + except Exception as e: + # simplejson.errors.JSONDecodeError + # urllib3.exceptions import ConnectTimeoutError, NewConnectionError + # (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) + LOG.exception(f"JSON error {e}") + return [] + else: + LOG.debug(f"Downloaded {surl} {len(sdata)} bytes") + adata = json.loads(data) + else: + odata = requests.get(surl, verify=sCAfile) + try: + adata = odata.json() + except Exception as e: + # simplejson.errors.JSONDecodeError + LOG.exception(f"JSON error {e}") + return [] + else: + LOG.debug(f"Downloaded {surl} {len(adata)} relays") + sdata = repr(adata) + + if oargs.relays_output: + try: + with open(oargs.relays_output, 'wt') as ofd: + ofd.write(sdata) + except Exception as e: + LOG.warn(f"Error {oargs.relays_output} {e}") + else: + LOG.debug(f"Wrote {oargs.relays_output} {len(sdata)} bytes") + lonionoo_relays = [r for r in adata["relays"] if 'fingerprint' in r.keys()] + return lonionoo_relays + +def vsetup_logging(log_level, logfile='', stream=sys.stdout): + global LOG + add = True + + try: + if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: + os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red' + # https://pypi.org/project/coloredlogs/ + import coloredlogs + except ImportError: + coloredlogs = False + + # stem fucks up logging + # from stem.util import log + logging.getLogger('stem').setLevel(30) + + logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') + logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' + logging._defaultFormatter.default_msec_format = '' + + kwargs = dict(level=log_level, + force=True, + format='%(levelname)s %(message)s') + + if logfile: + add = logfile.startswith('+') + sub = logfile.startswith('-') + if add or sub: + logfile = logfile[1:] + kwargs['filename'] = logfile + + if coloredlogs: + # https://pypi.org/project/coloredlogs/ + aKw = dict(level=log_level, + logger=LOG, + stream=stream, + fmt='%(levelname)s %(message)s' + ) + coloredlogs.install(**aKw) + if logfile: + oHandler = logging.FileHandler(logfile) + LOG.addHandler(oHandler) + LOG.info(f"CSetting log_level to {log_level} {stream}") + else: + logging.basicConfig(**kwargs) + if add and logfile: + oHandler = logging.StreamHandler(stream) + LOG.addHandler(oHandler) + LOG.info(f"SSetting log_level to {log_level!s}") + +def vwritefinale(oargs, lNotInaRELAYS_DB): + if len(lNotInaRELAYS_DB): + LOG.warn(f"{len(lNotInaRELAYS_DB)} relays from stem were not in onionoo.torproject.org") + + LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/.html") + LOG.info(f"For info on relays, use: https://onionoo.torproject.org/details") + # https://onionoo.torproject.org/details + LOG.info(f"although it's often broken") + def iMain(lArgs): global aTRUST_DB global aTRUST_DB_INDEX global oBAD_NODES global oGOOD_NODES global lKNOWN_NODNS + global aRELAYS_DB + global aRELAYS_DB_INDEX + parser = oMainArgparser() oargs = parser.parse_args(lArgs) @@ -484,13 +691,21 @@ def iMain(lArgs): if bAreWeConnected() is False: raise SystemExit("we are not connected") + if os.path.exists(oargs.proxy_ctl): + controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) + else: + port =int(oargs.proxy_ctl) + controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port) + + vwait_for_controller(controller, oargs.wait_boot) + sFile = oargs.torrc if sFile and os.path.exists(sFile): icheck_torrc(sFile, oargs) twhitelist_set = set() sFile = oargs.good_contacts - if sFile and os.path.exists(sFile): + if False and sFile and os.path.exists(sFile): try: with open(sFile, 'rt') as oFd: aTRUST_DB = safe_load(oFd) @@ -511,14 +726,6 @@ def iMain(lArgs): except Exception as e: LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") - if os.path.exists(oargs.proxy_ctl): - controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) - else: - port =int(oargs.proxy_ctl) - controller = oGetStemController(port=port) - - vwait_for_controller(controller, oargs.wait_boot) - if oargs.good_contacts: good_contacts_tmp = oargs.good_contacts + '.tmp' @@ -542,9 +749,12 @@ def iMain(lArgs): t = set(oGOOD_NODES[oGOOD_ROOT]['Relays']['IntroductionPoints']) w = set() if 'Services' in oGOOD_NODES[oGOOD_ROOT].keys(): - # 'Onions' can I use the IntroductionPoints for Services too? - # w = set(oGOOD_NODES[oGOOD_ROOT]['Services']) - pass + w = set(oGOOD_NODES[oGOOD_ROOT]['Services']) + twhitelist_set.update(w) + if len(w) > 0: + LOG.info(f"Whitelist {len(t)} relays from Services") + + w = set() if 'Onions' in oGOOD_NODES[oGOOD_ROOT].keys(): # Provides the descriptor for a hidden service. The **address** is the # '.onion' address of the hidden service @@ -555,7 +765,7 @@ def iMain(lArgs): LOG.info(f"{len(w)} services will be checked from IntroductionPoints") t.update(lIntroductionPoints(controller, w, itimeout=oargs.points_timeout)) if len(t) > 0: - LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} services") + LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions") twhitelist_set.update(t) texclude_set = set() @@ -573,10 +783,12 @@ def iMain(lArgs): iFakeContact = 0 iTotalContacts = 0 aBadContacts = {} - - lConds = oargs.contact.split(',') + lNotInaRELAYS_DB = [] + aRELAYS_DB = {elt['fingerprint'].upper(): elt for + elt in lget_onionoo_relays(oargs) + if 'fingerprint' in elt} + lConds = oargs.bad_on.split(',') iR = 0 - relays = controller.get_server_descriptors() for relay in relays: iR += 1 @@ -586,6 +798,12 @@ def iMain(lArgs): relay.fingerprint = relay.fingerprint.upper() sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" + + fp = relay.fingerprint + if aRELAYS_DB and fp not in aRELAYS_DB.keys(): + LOG.warn(f"{fp} not in aRELAYS_DB") + lNotInaRELAYS_DB += [fp] + if not relay.exit_policy.is_exiting_allowed(): if sEXCLUDE_EXIT_KEY == 'ExcludeNodes': pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}") @@ -602,78 +820,79 @@ def iMain(lArgs): # dunno relay.contact = str(relay.contact, 'UTF-8') - if ('Empty' in lConds and not relay.contact) or \ - ('NoEmail' in lConds and relay.contact and 'email:' not in relay.contact): + # fail if the contact is empty + if ('Empty' in lConds and not relay.contact): + LOG.info(f"{fp} skipping empty contact - Empty {sofar}") texclude_set.add(relay.fingerprint) continue - if not relay.contact or 'ciissversion:' not in relay.contact: - # should be unreached 'Empty' should always be in lConds + contact = sCleanEmail(relay.contact) + # fail if the contact has no email - unreliable + if ('NoEmail' in lConds and relay.contact and + ('@' not in contact and 'email:' not in contact)): + LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(relay.fingerprint) continue + + # fail if the contact does not pass + if ('NotGood' in lConds and relay.contact and + ('ciissversion:' not in relay.contact)): + LOG.info(f"{fp} skipping no ciissversion in contact {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(relay.fingerprint) + continue + + # if it has a ciissversion in contact we count it in total iTotalContacts += 1 - fp = relay.fingerprint - if relay.contact and 'url:' not in relay.contact: - LOG.info(f"{fp} skipping bad contact - no url: {sofar}") + # fail if the contact does not have url: to pass + if relay.contact and 'url' not in relay.contact: + LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}") LOG.debug(f"{fp} {relay.contact} {sofar}") + if ('NotGood' in lConds): texclude_set.add(fp) + continue + + # only proceed if 'NotGood' not in lConds: + if 'NotGood' not in lConds: continue + + # fail if the contact does not have url: to pass + a = aParseContact(relay.contact, relay.fingerprint) + if not a: + LOG.warn(f"{relay.fingerprint} contact did not parse {sofar}") texclude_set.add(fp) continue - c = relay.contact.lower() - # first rough cut - i = c.find('url:') - if i >=0: - c = c[i + 4:] - i = c.find(' ') - if i >=0: c = c[:i] - c = c.lstrip('https://').lstrip('http://').strip('/') - i = c.find('/') - if i >=0: c = c[:i] - domain = c - if domain and bdomain_is_bad(domain, fp): - LOG.info(f"{fp} skipping bad {domain} {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") - texclude_set.add(fp) - continue - - if domain: - ip = zResolveDomain(domain) - if not ip: - LOG.warn(f"{fp} {domain} did not resolve {sofar}") - texclude_set.add(fp) - lKNOWN_NODNS.append(domain) + if 'url' in a and a['url']: + # fail if the contact uses a url we already know is bad + if a['url'] in tBAD_URLS: + LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}") + LOG.debug(f"{relay.fingerprint} {a} {sofar}") + # The fp is using a contact with a URL we know is bad iFakeContact += 1 + texclude_set.add(relay.fingerprint) + continue + + domain = a['url'].replace('https://', '').replace('http://', '') + # fail if the contact uses a domain we already know does not resolve + if domain in lKNOWN_NODNS: + # The fp is using a contact with a URL we know is bogus + LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a} {sofar}") + LOG.debug(f"{relay.fingerprint} {relay} {sofar}") + iFakeContact += 1 + texclude_set.add(relay.fingerprint) continue if 'dns-rsa' in relay.contact.lower(): + # skip if the contact uses a dns-rsa url we dont handle target = f"{relay.fingerprint}.{domain}" LOG.info(f"skipping 'dns-rsa' {target} {sofar}") tdns_urls.add(target) + continue - elif 'proof:uri-rsa' in relay.contact.lower(): - a = aParseContact(relay.contact, relay.fingerprint) - if not a: - LOG.warn(f"{relay.fingerprint} did not parse {sofar}") - texclude_set.add(relay.fingerprint) - continue - if 'url' in a and a['url']: - if a['url'] in tBAD_URLS: - # The fp is using a contact with a URL we know is bad - LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}") - LOG.debug(f"{relay.fingerprint} {a} {sofar}") - iFakeContact += 1 - texclude_set.add(relay.fingerprint) - continue - domain = a['url'].replace('https://', '').replace('http://', '') - if domain in lKNOWN_NODNS: - # The fp is using a contact with a URL we know is bogus - LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a['url']} {sofar}") - LOG.debug(f"{relay.fingerprint} {a} {sofar}") - iFakeContact += 1 - texclude_set.add(relay.fingerprint) - continue - - b = aVerifyContact(list(a.values())[0], + if 'proof:uri-rsa' in relay.contact.lower(): + # list(a.values())[0] + b = aVerifyContact(a, relay.fingerprint, oargs.https_cafile, timeout=oargs.timeout, @@ -697,16 +916,11 @@ def iMain(lArgs): aBadContacts[relay.fingerprint] = b continue - LOG.info(f"{relay.fingerprint} verified {b['url']} {sofar}") + LOG.info(f"{relay.fingerprint} GOOD {b['url']} {sofar}") # add our contact info to the trustdb aTRUST_DB[relay.fingerprint] = b for elt in b['fps']: aTRUST_DB_INDEX[elt] = b - if oargs.good_contacts and oargs.log_level <= 20: - # as we go along then clobber - with open(good_contacts_tmp, 'wt') as oFYaml: - yaml.dump(aTRUST_DB, oFYaml) - oFYaml.close() LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) @@ -746,6 +960,8 @@ def iMain(lArgs): # GuardNodes are readonl vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) + vwritefinale(oargs, lNotInaRELAYS_DB) + retval = 0 try: logging.getLogger('stem').setLevel(30) diff --git a/support_onions.py b/support_onions.py index 0c463a1..6da6529 100644 --- a/support_onions.py +++ b/support_onions.py @@ -33,41 +33,41 @@ bHAVE_TORR = shutil.which('tor-resolve') # in the wild we'll keep a copy here so we can avoid restesting yKNOWN_NODNS = """ --- - - 0x0.is - - a9.wtf - - aklad5.com - - artikel5ev.de - - arvanode.net - - dodo.pm - - dra-family.github.io - - eraldonion.org - - erjan.net - - galtland.network - - ineapple.cx - - lonet.sh - - moneneis.de - - olonet.sh - - or-exit-2.aa78i2efsewr0neeknk.xyz - - or.wowplanet.de - - ormycloud.org - - plied-privacy.net - - rivacysvcs.net - - redacted.org - - rification-for-nusenu.net - - rofl.cat - - rsv.ch - - sv.ch + - heraldonion.org + - linkspartei.org + - pineapple.cx - thingtohide.nl - - tikel10.org - - tor.wowplanet.de - tor-exit-2.aa78i2efsewr0neeknk.xyz - tor-exit-3.aa78i2efsewr0neeknk.xyz - - torix-relays.org - - tse.com + - tor.dlecan.com - tuxli.org - - w.digidow.eu - - w.cccs.de + - verification-for-nusenu.net """ +# - 0x0.is +# - a9.wtf +# - aklad5.com +# - artikel5ev.de +# - arvanode.net +# - dodo.pm +# - erjan.net +# - galtland.network +# - lonet.sh +# - moneneis.de +# - olonet.sh +# - or-exit-2.aa78i2efsewr0neeknk.xyz +# - or.wowplanet.de +# - ormycloud.org +# - plied-privacy.net +# - rivacysvcs.net +# - redacted.org +# - rofl.cat +# - sv.ch +# - tikel10.org +# - tor.wowplanet.de +# - torix-relays.org +# - tse.com +# - w.digidow.eu +# - w.cccs.de def oMakeController(sSock='', port=9051): import getpass @@ -86,13 +86,15 @@ def oGetStemController(log_level=10, sock_or_pair='/run/tor/control'): global oSTEM_CONTROLER if oSTEM_CONTROLER: return oSTEM_CONTROLER import stem.util.log - stem.util.log.Runlevel = log_level + # stem.util.log.Runlevel = 'DEBUG' = 20 # log_level if os.path.exists(sock_or_pair): LOG.info(f"controller from socket {sock_or_pair}") controller = Controller.from_socket_file(path=sock_or_pair) else: - if ':' in sock_or_pair: + if type(sock_or_pair) == int: + port = sock_or_pair + elif ':' in sock_or_pair: port = sock_or_pair.split(':')[1] else: port = sock_or_pair diff --git a/trustor_poc.py b/trustor_poc.py index 5445ee5..4527e0c 100644 --- a/trustor_poc.py +++ b/trustor_poc.py @@ -8,7 +8,6 @@ import os import re import sys -import requests from stem.control import Controller # from stem.util.tor_tools import * from urllib3.util import parse_url as urlparse @@ -213,6 +212,7 @@ def find_validation_candidates(controller, return result def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): + import requests # socks proxy used for outbound web requests (for validation of proofs) proxy = {'https': "socks5h://{host}:{port}"} # we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files @@ -372,7 +372,11 @@ from urllib3.contrib.socks import SOCKSProxyManager # from urllib3 import Retry -def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): +def oDownloadUrlUrllib3(uri, sCAfile, + timeout=30, + host='127.0.0.1', + port=9050, + content_type=''): """Theres no need to use requests here and it adds too many layers on the SSL to be able to get at things """ @@ -404,8 +408,8 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): if head.status >= 300: raise TrustorError(f"HTTP Errorcode {head.status}") - if not head.headers['Content-Type'].startswith('text/plain'): - raise TrustorError(f"HTTP Content-Type != text/plain") + if content_type and not head.headers['Content-Type'].startswith(content_type): + raise TrustorError(f"HTTP Content-Type != {content_type}") if not os.path.exists(sCAfile): raise TrustorError(f"File not found CAfile {sCAfile}") @@ -419,8 +423,8 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): raise if oReqResp.status != 200: raise TrustorError(f"HTTP Errorcode {head.status}") - if not oReqResp.headers['Content-Type'].startswith('text/plain'): - raise TrustorError(f"HTTP Content-Type != text/plain") + if content_type and not oReqResp.headers['Content-Type'].startswith(content_type): + raise TrustorError(f"HTTP Content-Type != {content_type}") # check for redirects (not allowed as per spec) if oReqResp.geturl() != uri: @@ -429,6 +433,7 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): oReqResp.decode_content = True return oReqResp + import urllib3.connectionpool from urllib3.connection import HTTPSConnection