Async added and removed

This commit is contained in:
emdee 2022-11-27 01:10:18 +00:00
parent 08626942d3
commit 204a6adc48
4 changed files with 483 additions and 258 deletions

View File

@ -14,7 +14,7 @@ LARGS=(
# you may have a special python for installed packages # you may have a special python for installed packages
EXE=`which python3.bash` EXE=`which python3.bash`
LARGS+=( LARGS+=(
--strict_nodes 0 --strict_nodes 1
--points_timeout 120 --points_timeout 120
--proxy-host 127.0.0.1 --proxy-host 127.0.0.1
--proxy-port $SOCKS_PORT --proxy-port $SOCKS_PORT

View File

@ -136,8 +136,14 @@ from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints,
yKNOWN_NODNS, zResolveDomain) yKNOWN_NODNS, zResolveDomain)
from trustor_poc import TrustorError, idns_validate from trustor_poc import TrustorError, idns_validate
from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl try:
import xxxhttpx
import asyncio
from trustor_poc import oDownloadUrlHttpx
except:
httpx = None
from trustor_poc import oDownloadUrlUrllib3Socks as oDownloadUrl
global LOG global LOG
import logging import logging
import warnings import warnings
@ -157,16 +163,32 @@ aTRUST_DB_INDEX = {}
aRELAYS_DB = {} aRELAYS_DB = {}
aRELAYS_DB_INDEX = {} aRELAYS_DB_INDEX = {}
aFP_EMAIL = {} aFP_EMAIL = {}
aDOMAIN_FPS = {}
sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/"
# You can call this while bootstrapping # You can call this while bootstrapping
sEXCLUDE_EXIT_KEY = 'ExcludeNodes' sEXCLUDE_EXIT_KEY = 'ExcludeNodes'
sINCLUDE_EXIT_KEY = 'ExitNodes' sINCLUDE_EXIT_KEY = 'ExitNodes'
sINCLUDE_GUARD_KEY = 'EntryNodes'
oBAD_NODES = {}
oBAD_ROOT = 'BadNodes' oBAD_ROOT = 'BadNodes'
oBAD_NODES[oBAD_ROOT] = {} oBAD_NODES = safe_load("""
oBAD_NODES[oBAD_ROOT]['ExcludeNodes'] = {} BadNodes:
ExcludeDomains: []
ExcludeNodes:
BadExit: []
""")
sGOOD_ROOT = 'GoodNodes'
sINCLUDE_GUARD_KEY = 'EntryNodes'
sEXCLUDE_DOMAINS = 'ExcludeDomains'
oGOOD_NODES = safe_load("""
GoodNodes:
EntryNodes: []
Relays:
ExitNodes: []
IntroductionPoints: []
Onions: []
Services: []
""")
lKNOWN_NODNS = [] lKNOWN_NODNS = []
tMAYBE_NODNS = set() tMAYBE_NODNS = set()
@ -184,21 +206,19 @@ def lYamlBadNodes(sFile,
oBAD_NODES = safe_load(oFd) oBAD_NODES = safe_load(oFd)
# BROKEN # BROKEN
# root = 'ExcludeNodes' # root = sEXCLUDE_EXIT_KEY
# for elt in o[oBAD_ROOT][root][section].keys(): # for elt in o[oBAD_ROOT][root][section].keys():
# if lWanted and elt not in lWanted: continue # if lWanted and elt not in lWanted: continue
# # l += o[oBAD_ROOT][root][section][elt] # # l += o[oBAD_ROOT][root][section][elt]
l = oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit']
tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS))) tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS)))
root = 'ExcludeDomains' root = sEXCLUDE_DOMAINS
if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]: if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]:
tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root]) tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root])
return l return l
oGOOD_NODES = {}
oGOOD_ROOT = 'GoodNodes'
def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
global oGOOD_NODES global oGOOD_NODES
l = [] l = []
@ -207,8 +227,8 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
with open(sFile, 'rt') as oFd: with open(sFile, 'rt') as oFd:
o = safe_load(oFd) o = safe_load(oFd)
oGOOD_NODES = o oGOOD_NODES = o
if 'GuardNodes' in o[oGOOD_ROOT].keys(): if 'EntryNodes' in o[sGOOD_ROOT].keys():
l = o[oGOOD_ROOT]['GuardNodes'] l = o[sGOOD_ROOT]['EntryNodes']
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
return l return l
@ -236,18 +256,20 @@ lAT_REPS = ['[]', ' at ', '(at)', '[at]', '<at>', '(att)', '_at_',
lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>',
'<:dot:>', '|dot--|', '<:dot:>', '|dot--|',
] ]
lNO_EMAIL = ['<nobody at example dot com>', lNO_EMAIL = [
'not@needed.com', '<nobody at example dot com>',
'<nobody at none of your business xyz>', '<nobody at none of your business xyz>',
'<not-set@example.com>', '<not-set@example.com>',
'not a person <nomail at yet dot com>',
r'<nothing/at\\mail.de>',
'@snowden', '@snowden',
'ano ano@fu.dk', 'ano ano@fu.dk',
'anonymous', 'anonymous',
'anonymous@buzzzz.com', 'anonymous@buzzzz.com',
'check http://highwaytohoell.de', 'check http://highwaytohoell.de',
'no-spam@tor.org',
'no@no.no', 'no@no.no',
'noreply@bytor.com',
'not a person <nomail at yet dot com>',
'not@needed.com',
'not@needed.com', 'not@needed.com',
'not@re.al', 'not@re.al',
'nothanks', 'nothanks',
@ -255,6 +277,7 @@ lNO_EMAIL = ['<nobody at example dot com>',
'ur@mom.com', 'ur@mom.com',
'your@e-mail', 'your@e-mail',
'your@email.com', 'your@email.com',
r'<nothing/at\\mail.de>',
] ]
def sCleanEmail(s): def sCleanEmail(s):
s = s.lower() s = s.lower()
@ -297,17 +320,26 @@ def aCleanContact(a):
a.update({'fps': []}) a.update({'fps': []})
return a return a
def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050): def bVerifyContact(a=None, fp=None, https_cafile=None):
global aFP_EMAIL
global tBAD_URLS global tBAD_URLS
global lKNOWN_NODNS global lKNOWN_NODNS
global aTRUST_DB
global aTRUST_DB_INDEX
assert a
assert fp
assert https_cafile
keys = list(a.keys()) keys = list(a.keys())
a = aCleanContact(a) a = aCleanContact(a)
a['fp'] = fp
if 'email' not in keys: if 'email' not in keys:
a['email'] = '' a['email'] = ''
if 'ciissversion' not in keys: if 'ciissversion' not in keys:
aFP_EMAIL[fp] = a['email'] aFP_EMAIL[fp] = a['email']
LOG.warn(f"{fp} 'ciissversion' not in {keys}") LOG.warn(f"{fp} 'ciissversion' not in {keys}")
a['ciissversion'] = 2 return a
# test the url for fps and add it to the array # test the url for fps and add it to the array
if 'proof' not in keys: if 'proof' not in keys:
aFP_EMAIL[fp] = a['email'] aFP_EMAIL[fp] = a['email']
@ -343,7 +375,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
aFP_EMAIL[fp] = a['email'] aFP_EMAIL[fp] = a['email']
LOG.debug(f"{fp} {domain} does not resolve") LOG.debug(f"{fp} {domain} does not resolve")
lKNOWN_NODNS.append(domain) lKNOWN_NODNS.append(domain)
return {} return a
if a['proof'] in ['dns-rsa']: if a['proof'] in ['dns-rsa']:
# only support uri for now # only support uri for now
@ -354,16 +386,56 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
dnssec_DS_file='dnssec-root-trust', dnssec_DS_file='dnssec-root-trust',
) == 0: ) == 0:
pass pass
LOG.warn(f"{fp} proof={a['proof']} not supported yet") LOG.warn(f"{fp} proof={a['proof']} - assumed good")
a['fps'] = [fp]
aTRUST_DB_INDEX[fp] = a
return a return a
return True
# async
# If we keep a cache of FPs that we have gotten by downloading a URL
# we can avoid re-downloading the URL of other FP in the list of relays.
# If we paralelize the gathering of the URLs, we may have simultaneous
# gathers of the same URL from different relays, defeating the advantage
# of going parallel. The cache is global aDOMAIN_FPS.
def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
assert a
assert fp
assert https_cafile
r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile)
if r is not True:
return r
domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/')
if domain in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[domain]
return a
# LOG.debug(f"{len(keys)} contact fields for {fp}") # LOG.debug(f"{len(keys)} contact fields for {fp}")
url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt"
if url in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[url]
return a
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
try: try:
LOG.debug(f"Downloading from {domain} for {fp}") if httpx:
o = oDownloadUrl(url, https_cafile, LOG.debug(f"Downloading from {domain} for {fp}")
timeout=timeout, host=host, port=port, # await
content_type='text/plain') o = oDownloadUrl(url, https_cafile,
timeout=timeout, host=host, port=port,
content_type='text/plain')
else:
LOG.debug(f"Downloading from {domain} for {fp}")
o = oDownloadUrl(url, https_cafile,
timeout=timeout, host=host, port=port,
content_type='text/plain')
# requests response: text "reason", "status_code" # requests response: text "reason", "status_code"
except AttributeError as e: except AttributeError as e:
LOG.exception(f"AttributeError downloading from {domain} {e}") LOG.exception(f"AttributeError downloading from {domain} {e}")
@ -384,34 +456,57 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
except (BaseException) as e: except (BaseException) as e:
LOG.error(f"Exception {type(e)} downloading from {domain} {e}") LOG.error(f"Exception {type(e)} downloading from {domain} {e}")
else: else:
if hasattr(o, 'status'): a = aContactFps(oargs, a, o, domain)
status_code = o.status LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}")
else: aDOMAIN_FPS[domain] = a['fps']
status_code = o.status_code url = a['url']
if status_code >= 300: aDOMAIN_FPS[url] = a['fps']
aFP_EMAIL[fp] = a['email'] return a
LOG.warn(f"Error from {domain} {status_code} {o.reason}")
# any reason retry?
tBAD_URLS.add(a['url'])
return a
if hasattr(o, 'text'): def aContactFps(oargs, a, o, domain):
data = o.text global aFP_EMAIL
else: global tBAD_URLS
data = str(o.data, 'UTF-8') global lKNOWN_NODNS
l = data.upper().strip().split('\n') global aDOMAIN_FPS
LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes")
a['modified'] = int(time.time()) if hasattr(o, 'status'):
if not l: status_code = o.status
LOG.warn(f"Downloading from {domain} empty for {fp}") else:
else: status_code = o.status_code
a['fps'] = [elt.strip() for elt in l if elt \ if status_code >= 300:
and not elt.startswith('#')] aFP_EMAIL[fp] = a['email']
LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") LOG.warn(f"Error from {domain} {status_code} {o.reason}")
for elt in a['fps']: # any reason retry?
if len(elt) != 40: tBAD_URLS.add(a['url'])
LOG.warn(f"len !=40 from {domain} '{elt}'") return a
if hasattr(o, 'text'):
data = o.text
else:
data = str(o.data, 'UTF-8')
l = data.upper().strip().split('\n')
LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes")
if oargs.wellknown_output:
sdir = os.path.join(oargs.wellknown_output, domain,
'.well-known', 'tor-relay')
try:
if not os.path.isdir(sdir):
os.makedirs(sdir)
sfile = os.path.join(sdir, "rsa-fingerprint.txt")
with open(sfile, 'wt') as oFd:
oFd.write(data)
except Exception as e:
LOG.warn(f"Error wirting {sfile} {e}")
a['modified'] = int(time.time())
if not l:
LOG.warn(f"Downloaded from {domain} empty for {fp}")
else:
a['fps'] = [elt.strip() for elt in l if elt \
and len(elt) == 40 \
and not elt.startswith('#')]
LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs")
aDOMAIN_FPS[domain] = a['fps']
return a return a
def aParseContact(contact, fp): def aParseContact(contact, fp):
@ -432,16 +527,18 @@ def aParseContact(contact, fp):
return {} return {}
for elt in lelts: for elt in lelts:
if ':' not in elt: if ':' not in elt:
if elt == 'DFRI':
# oddball
continue
# hoster:Quintex Alliance Consulting # hoster:Quintex Alliance Consulting
LOG.warn(f"no : in {elt} for {contact} in {fp}") LOG.warn(f"no : in {elt} for {contact} in {fp}")
continue return {}
(key , val,) = elt.split(':', 1) (key , val,) = elt.split(':', 1)
if key == '': if key == '':
continue continue
key = key.rstrip(':') key = key.rstrip(':')
a[key] = val a[key] = val
a = aCleanContact(a) a = aCleanContact(a)
# LOG.debug(f"{fp} {len(a.keys())} fields")
return a return a
def aParseContactYaml(contact, fp): def aParseContactYaml(contact, fp):
@ -508,8 +605,9 @@ def oMainArgparser(_=None):
default=os.path.join(ETC_DIR, 'badcontacts.yaml'), default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
help="Yaml file of bad contacts that bad FPs are using") help="Yaml file of bad contacts that bad FPs are using")
parser.add_argument('--strict_nodes', type=int, default=0, choices=[0, 1], parser.add_argument('--strict_nodes', type=str, default=0,
help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable") choices=['0', '1'],
help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable")
parser.add_argument('--wait_boot', type=int, default=120, parser.add_argument('--wait_boot', type=int, default=120,
help="Seconds to wait for Tor to booststrap") help="Seconds to wait for Tor to booststrap")
parser.add_argument('--points_timeout', type=int, default=0, parser.add_argument('--points_timeout', type=int, default=0,
@ -528,10 +626,25 @@ def oMainArgparser(_=None):
parser.add_argument('--relays_output', type=str, parser.add_argument('--relays_output', type=str,
default=os.path.join(ETC_DIR, 'relays.json'), default=os.path.join(ETC_DIR, 'relays.json'),
help="Write the download relays in json to a file") help="Write the download relays in json to a file")
parser.add_argument('--wellknown_output', type=str,
default=os.path.join(ETC_DIR, 'https'),
help="Write the well-known files to a directory")
parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'),
help="Write the proof data of the included nodes to a YAML file") help="Write the proof data of the included nodes to a YAML file")
return parser return parser
def vwrite_good_contacts(oargs):
global aTRUST_DB
good_contacts_tmp = oargs.good_contacts + '.tmp'
with open(good_contacts_tmp, 'wt') as oFYaml:
yaml.dump(aTRUST_DB, oFYaml)
oFYaml.close()
if os.path.exists(oargs.good_contacts):
bak = oargs.good_contacts +'.bak'
os.rename(oargs.good_contacts, bak)
os.rename(good_contacts_tmp, oargs.good_contacts)
LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}")
def vwrite_badnodes(oargs, oBAD_NODES, slen): def vwrite_badnodes(oargs, oBAD_NODES, slen):
if oargs.bad_nodes: if oargs.bad_nodes:
tmp = oargs.bad_nodes +'.tmp' tmp = oargs.bad_nodes +'.tmp'
@ -560,6 +673,7 @@ def lget_onionoo_relays(oargs):
import requests import requests
adata = {} adata = {}
if oargs.relays_output and os.path.exists(oargs.relays_output): if oargs.relays_output and os.path.exists(oargs.relays_output):
# and less than a day old?
LOG.info(f"Getting OO relays from {oargs.relays_output}") LOG.info(f"Getting OO relays from {oargs.relays_output}")
try: try:
with open(oargs.relays_output, 'rt') as ofd: with open(oargs.relays_output, 'rt') as ofd:
@ -581,9 +695,9 @@ def lget_onionoo_relays(oargs):
port=oargs.proxy_port, port=oargs.proxy_port,
content_type='') content_type='')
if hasattr(o, 'text'): if hasattr(o, 'text'):
data = o.text sdata = o.text
else: else:
data = str(o.data, 'UTF-8') sdata = str(o.data, 'UTF-8')
except Exception as e: except Exception as e:
# simplejson.errors.JSONDecodeError # simplejson.errors.JSONDecodeError
# urllib3.exceptions import ConnectTimeoutError, NewConnectionError # urllib3.exceptions import ConnectTimeoutError, NewConnectionError
@ -592,7 +706,7 @@ def lget_onionoo_relays(oargs):
return [] return []
else: else:
LOG.debug(f"Downloaded {surl} {len(sdata)} bytes") LOG.debug(f"Downloaded {surl} {len(sdata)} bytes")
adata = json.loads(data) adata = json.loads(sdata)
else: else:
odata = requests.get(surl, verify=sCAfile) odata = requests.get(surl, verify=sCAfile)
try: try:
@ -675,15 +789,101 @@ def vwritefinale(oargs, lNotInaRELAYS_DB):
# https://onionoo.torproject.org/details # https://onionoo.torproject.org/details
LOG.info(f"although it's often broken") LOG.info(f"although it's often broken")
def iMain(lArgs): def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0):
global aTRUST_DB global aTRUST_DB
global aTRUST_DB_INDEX global aTRUST_DB_INDEX
global oBAD_NODES sofar = ''
global oGOOD_NODES fp = b['fp']
global lKNOWN_NODNS # need to skip urllib3.exceptions.MaxRetryError
global aRELAYS_DB if not b or 'fps' not in b or not b['fps'] or not b['url']:
global aRELAYS_DB_INDEX LOG.warn(f"{fp} did NOT VERIFY {sofar}")
LOG.debug(f"{fp} {b} {sofar}")
# If it's giving contact info that doesnt check out
# it could be a bad exit with fake contact info
texclude_set.add(fp)
aBadContacts[fp] = b
return None
if fp not in b['fps']:
LOG.warn(f"{fp} the FP IS NOT in the list of fps {sofar}")
# assume a fp is using a bogus contact
texclude_set.add(fp)
aBadContacts[fp] = b
return False
LOG.info(f"{fp} GOOD {b['url']} {sofar}")
# add our contact info to the trustdb
aTRUST_DB[fp] = b
for elt in b['fps']:
aTRUST_DB_INDEX[elt] = b
return True
def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
global aTRUST_DB
global aTRUST_DB_INDEX
if not is_valid_fingerprint(relay.fingerprint):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
return None
fp = relay.fingerprint
if aRELAYS_DB and fp not in aRELAYS_DB.keys():
LOG.warn(f"{fp} not in aRELAYS_DB")
lNotInaRELAYS_DB += [fp]
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY:
pass # LOG.debug(f"{fp} not an exit {sofar}")
else:
pass # LOG.warn(f"{fp} not an exit {sofar}")
# return None
# great contact had good fps and we are in them
if fp in aTRUST_DB_INDEX.keys():
# a cached entry
return None
if type(relay.contact) == bytes:
# dunno
relay.contact = str(relay.contact, 'UTF-8')
# fail if the contact is empty
if ('Empty' in lConds and not relay.contact):
LOG.info(f"{fp} skipping empty contact - Empty {sofar}")
texclude_set.add(fp)
return None
contact = sCleanEmail(relay.contact)
# fail if the contact has no email - unreliable
if ('NoEmail' in lConds and relay.contact and
('@' not in contact and 'email:' not in contact)):
LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
return None
# fail if the contact does not pass
if ('NotGood' in lConds and relay.contact and
('ciissversion:' not in relay.contact)):
LOG.info(f"{fp} skipping no ciissversion in contact {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
return None
# fail if the contact does not have url: to pass
if relay.contact and 'url' not in relay.contact:
LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
if ('NotGood' in lConds): texclude_set.add(fp)
return None
return True
def oMainPreamble(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
parser = oMainArgparser() parser = oMainArgparser()
oargs = parser.parse_args(lArgs) oargs = parser.parse_args(lArgs)
@ -691,21 +891,12 @@ def iMain(lArgs):
if bAreWeConnected() is False: if bAreWeConnected() is False:
raise SystemExit("we are not connected") raise SystemExit("we are not connected")
if os.path.exists(oargs.proxy_ctl):
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl)
else:
port =int(oargs.proxy_ctl)
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port)
vwait_for_controller(controller, oargs.wait_boot)
sFile = oargs.torrc sFile = oargs.torrc
if sFile and os.path.exists(sFile): if sFile and os.path.exists(sFile):
icheck_torrc(sFile, oargs) icheck_torrc(sFile, oargs)
twhitelist_set = set()
sFile = oargs.good_contacts sFile = oargs.good_contacts
if False and sFile and os.path.exists(sFile): if sFile and os.path.exists(sFile):
try: try:
with open(sFile, 'rt') as oFd: with open(sFile, 'rt') as oFd:
aTRUST_DB = safe_load(oFd) aTRUST_DB = safe_load(oFd)
@ -726,8 +917,16 @@ def iMain(lArgs):
except Exception as e: except Exception as e:
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
if oargs.good_contacts: return oargs
good_contacts_tmp = oargs.good_contacts + '.tmp'
def oStemController(oargs):
if os.path.exists(oargs.proxy_ctl):
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl)
else:
port =int(oargs.proxy_ctl)
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port)
vwait_for_controller(controller, oargs.wait_boot)
elt = controller.get_conf('UseMicrodescriptors') elt = controller.get_conf('UseMicrodescriptors')
if elt != '0': if elt != '0':
@ -740,25 +939,31 @@ def iMain(lArgs):
if elt and elt != '{??}': if elt and elt != '{??}':
LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already") LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already")
return controller
def tWhitelistSet(oargs, controller):
twhitelist_set = set()
twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes))) twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes)))
LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} GuardNodes from {oargs.good_nodes}") LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} EntryNodes from {oargs.good_nodes}")
global oGOOD_NODES
t = set() t = set()
if 'IntroductionPoints' in oGOOD_NODES[oGOOD_ROOT]['Relays'].keys(): if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \
t = set(oGOOD_NODES[oGOOD_ROOT]['Relays']['IntroductionPoints']) 'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys():
t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints'])
w = set() w = set()
if 'Services' in oGOOD_NODES[oGOOD_ROOT].keys(): if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys():
w = set(oGOOD_NODES[oGOOD_ROOT]['Services']) w = set(oGOOD_NODES[sGOOD_ROOT]['Services'])
twhitelist_set.update(w) twhitelist_set.update(w)
if len(w) > 0: if len(w) > 0:
LOG.info(f"Whitelist {len(t)} relays from Services") LOG.info(f"Whitelist {len(t)} relays from Services")
w = set() w = set()
if 'Onions' in oGOOD_NODES[oGOOD_ROOT].keys(): if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys():
# Provides the descriptor for a hidden service. The **address** is the # Provides the descriptor for a hidden service. The **address** is the
# '.onion' address of the hidden service # '.onion' address of the hidden service
w = set(oGOOD_NODES[oGOOD_ROOT]['Onions']) w = set(oGOOD_NODES[sGOOD_ROOT]['Onions'])
if oargs.white_onions: if oargs.white_onions:
w.update(oargs.white_onions.split(',')) w.update(oargs.white_onions.split(','))
if oargs.points_timeout > 0: if oargs.points_timeout > 0:
@ -768,6 +973,9 @@ def iMain(lArgs):
LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions") LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions")
twhitelist_set.update(t) twhitelist_set.update(t)
return twhitelist_set
def tExcludeSet(oargs):
texclude_set = set() texclude_set = set()
if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): if oargs.bad_nodes and os.path.exists(oargs.bad_nodes):
if False and oargs.bad_sections: if False and oargs.bad_sections:
@ -778,150 +986,117 @@ def iMain(lArgs):
section=sEXCLUDE_EXIT_KEY)) section=sEXCLUDE_EXIT_KEY))
LOG.info(f"Preloaded {len(texclude_set)} bad fps") LOG.info(f"Preloaded {len(texclude_set)} bad fps")
return texclude_set
# async
def iMain(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
global oBAD_NODES
global oGOOD_NODES
global lKNOWN_NODNS
global aRELAYS_DB
global aRELAYS_DB_INDEX
global tBAD_URLS
oargs = oMainPreamble(lArgs)
controller = oStemController(oargs)
twhitelist_set = tWhitelistSet(oargs, controller)
texclude_set = tExcludeSet(oargs)
ttrust_db_index = aTRUST_DB_INDEX.keys() ttrust_db_index = aTRUST_DB_INDEX.keys()
tdns_urls = set() tdns_urls = set()
iFakeContact = 0 iFakeContact = 0
iTotalContacts = 0 iTotalContacts = 0
aBadContacts = {} aBadContacts = {}
lNotInaRELAYS_DB = [] lNotInaRELAYS_DB = []
aRELAYS_DB = {elt['fingerprint'].upper(): elt for
elt in lget_onionoo_relays(oargs)
if 'fingerprint' in elt}
lConds = oargs.bad_on.split(',')
iR = 0 iR = 0
relays = controller.get_server_descriptors() relays = controller.get_server_descriptors()
lqueue = []
socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}"
for relay in relays: for relay in relays:
iR += 1 iR += 1
if not is_valid_fingerprint(relay.fingerprint): fp = relay.fingerprint = relay.fingerprint.upper()
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
continue
relay.fingerprint = relay.fingerprint.upper()
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
fp = relay.fingerprint lConds = oargs.bad_on.split(',')
if aRELAYS_DB and fp not in aRELAYS_DB.keys(): r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB)
LOG.warn(f"{fp} not in aRELAYS_DB") if r is not True: continue
lNotInaRELAYS_DB += [fp]
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
else:
pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
# continue
# great contact had good fps and we are in them
if relay.fingerprint in aTRUST_DB_INDEX.keys():
# a cached entry
continue
if type(relay.contact) == bytes:
# dunno
relay.contact = str(relay.contact, 'UTF-8')
# fail if the contact is empty
if ('Empty' in lConds and not relay.contact):
LOG.info(f"{fp} skipping empty contact - Empty {sofar}")
texclude_set.add(relay.fingerprint)
continue
contact = sCleanEmail(relay.contact)
# fail if the contact has no email - unreliable
if ('NoEmail' in lConds and relay.contact and
('@' not in contact and 'email:' not in contact)):
LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
continue
# fail if the contact does not pass
if ('NotGood' in lConds and relay.contact and
('ciissversion:' not in relay.contact)):
LOG.info(f"{fp} skipping no ciissversion in contact {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
continue
# if it has a ciissversion in contact we count it in total # if it has a ciissversion in contact we count it in total
iTotalContacts += 1 iTotalContacts += 1
# fail if the contact does not have url: to pass
if relay.contact and 'url' not in relay.contact:
LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
if ('NotGood' in lConds): texclude_set.add(fp)
continue
# only proceed if 'NotGood' not in lConds: # only proceed if 'NotGood' not in lConds:
if 'NotGood' not in lConds: continue if 'NotGood' not in lConds: continue
# fail if the contact does not have url: to pass # fail if the contact does not have url: to pass
a = aParseContact(relay.contact, relay.fingerprint) a = aParseContact(relay.contact, fp)
if not a: if not a:
LOG.warn(f"{relay.fingerprint} contact did not parse {sofar}") LOG.warn(f"{fp} contact did not parse {sofar}")
texclude_set.add(fp) texclude_set.add(fp)
continue continue
if 'url' in a and a['url']: if 'url' in a and a['url']:
# fail if the contact uses a url we already know is bad # fail if the contact uses a url we already know is bad
if a['url'] in tBAD_URLS: if a['url'] in tBAD_URLS:
LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}") LOG.info(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}")
LOG.debug(f"{relay.fingerprint} {a} {sofar}") LOG.debug(f"{fp} {a} {sofar}")
# The fp is using a contact with a URL we know is bad texclude_set.add(fp)
iFakeContact += 1
texclude_set.add(relay.fingerprint)
continue continue
domain = a['url'].replace('https://', '').replace('http://', '') domain = a['url'].replace('https://', '').replace('http://', '')
# fail if the contact uses a domain we already know does not resolve # fail if the contact uses a domain we already know does not resolve
if domain in lKNOWN_NODNS: if domain in lKNOWN_NODNS:
# The fp is using a contact with a URL we know is bogus # The fp is using a contact with a URL we know is bogus
LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a} {sofar}") LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}")
LOG.debug(f"{relay.fingerprint} {relay} {sofar}") LOG.debug(f"{fp} {relay} {sofar}")
iFakeContact += 1 texclude_set.add(fp)
texclude_set.add(relay.fingerprint)
continue continue
# drop through
if 'dns-rsa' in relay.contact.lower(): if 'dns-rsa' in relay.contact.lower():
# skip if the contact uses a dns-rsa url we dont handle # skip if the contact uses a dns-rsa url we dont handle
target = f"{relay.fingerprint}.{domain}" target = f"{fp}.{domain}"
LOG.info(f"skipping 'dns-rsa' {target} {sofar}") LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
tdns_urls.add(target) tdns_urls.add(target)
continue continue
if 'proof:uri-rsa' in relay.contact.lower(): if 'proof:uri-rsa' in relay.contact.lower():
# list(a.values())[0] if domain in aDOMAIN_FPS.keys(): continue
b = aVerifyContact(a, a['fp'] = fp
relay.fingerprint, if httpx:
oargs.https_cafile, lqueue.append(asyncio.create_task(
timeout=oargs.timeout, aVerifyContact(a=a,
host=oargs.proxy_host, fp=fp,
port=oargs.proxy_port) https_cafile=oargs.https_cafile,
# need to skip urllib3.exceptions.MaxRetryError timeout=oargs.timeout,
if not b or 'fps' not in b or not b['fps'] or not b['url']: host=oargs.proxy_host,
LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}") port=oargs.proxy_port,
LOG.debug(f"{relay.fingerprint} {b} {sofar}") oargs=oargs)))
# If it's giving contact info that doesnt check out else:
# it could be a bad exit with fake contact info b = aVerifyContact(a=a,
texclude_set.add(relay.fingerprint) fp=fp,
aBadContacts[relay.fingerprint] = b https_cafile=oargs.https_cafile,
continue timeout=oargs.timeout,
host=oargs.proxy_host,
if relay.fingerprint not in b['fps']: port=oargs.proxy_port,
LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}") oargs=oargs)
# assume a fp is using a bogus contact r = bProcessContact(b, texclude_set, aBadContacts, iFakeContact)
texclude_set.add(relay.fingerprint) if r is False:
iFakeContact += 1
if httpx:
# for b in asyncio.as_completed(lqueue):
for b in lqueue:
# r = await b
r = b
r = bProcessContact(r, texclude_set, aBadContacts, iFakeContact)
if r is False:
iFakeContact += 1 iFakeContact += 1
aBadContacts[relay.fingerprint] = b elif r is True:
continue # iGoodContact += 1
pass
LOG.info(f"{relay.fingerprint} GOOD {b['url']} {sofar}")
# add our contact info to the trustdb
aTRUST_DB[relay.fingerprint] = b
for elt in b['fps']:
aTRUST_DB_INDEX[elt] = b
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
texclude_set = texclude_set.difference(twhitelist_set) texclude_set = texclude_set.difference(twhitelist_set)
# accept the dns-rsa urls for now until we test them # accept the dns-rsa urls for now until we test them
@ -932,7 +1107,7 @@ def iMain(lArgs):
with open(oargs.torrc_output, 'wt') as oFTorrc: with open(oargs.torrc_output, 'wt') as oFTorrc:
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n") oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n") oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n") oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n")
LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") LOG.info(f"Wrote tor configuration to {oargs.torrc_output}")
oFTorrc.close() oFTorrc.close()
@ -943,21 +1118,14 @@ def iMain(lArgs):
oFYaml.close() oFYaml.close()
if oargs.good_contacts != '' and aTRUST_DB: if oargs.good_contacts != '' and aTRUST_DB:
with open(good_contacts_tmp, 'wt') as oFYaml: vwrite_good_contacts(oargs)
yaml.dump(aTRUST_DB, oFYaml)
oFYaml.close()
if os.path.exists(oargs.good_contacts):
bak = oargs.good_contacts +'.bak'
os.rename(oargs.good_contacts, bak)
os.rename(good_contacts_tmp, oargs.good_contacts)
LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}")
oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set) oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set)
oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS
vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set))) vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set)))
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
# GuardNodes are readonl # EntryNodes are readony
vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
vwritefinale(oargs, lNotInaRELAYS_DB) vwritefinale(oargs, lNotInaRELAYS_DB)
@ -965,50 +1133,48 @@ def iMain(lArgs):
retval = 0 retval = 0
try: try:
logging.getLogger('stem').setLevel(30) logging.getLogger('stem').setLevel(30)
try: if texclude_set:
if texclude_set: try:
LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays") LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays")
controller.set_conf(sEXCLUDE_EXIT_KEY, texclude_set) controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set))
except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa
LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}")
LOG.debug(repr(texclude_set))
retval += 1
except stem.SocketClosed as e: # noqa if aTRUST_DB_INDEX.keys():
LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor") l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40]
retval += 1 try:
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays")
controller.set_conf(sINCLUDE_EXIT_KEY, l)
except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}")
LOG.debug(repr(l))
retval += 1
try: if 'EntryNodes' in oGOOD_NODES[sGOOD_ROOT].keys():
if aTRUST_DB_INDEX.keys(): try:
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(aTRUST_DB_INDEX.keys())} good relays") LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])} guard nodes")
controller.set_conf(sINCLUDE_EXIT_KEY, aTRUST_DB_INDEX.keys())
except stem.SocketClosed as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
retval += 1
try:
if 'GuardNodes' in oGOOD_NODES[oGOOD_ROOT].keys():
LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])} guard nodes")
# FixMe for now override StrictNodes it may be unusable otherwise # FixMe for now override StrictNodes it may be unusable otherwise
controller.set_conf(sINCLUDE_GUARD_KEY, controller.set_conf(sINCLUDE_GUARD_KEY,
oGOOD_NODES[oGOOD_ROOT]['GuardNodes']) oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])
cur = controller.get_conf('StrictNodes') except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa
if oargs.strict_nodes and int(cur) != oargs.strict_nodes: LOG.error(f"Failed setting {sINCLUDE_GUARD_KEY} guard nodes in Tor {e}")
LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}") LOG.debug(repr(list(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])))
controller.set_conf('StrictNodes', oargs.strict_nodes) retval += 1
else:
LOG.info(f"StrictNodes is set to {cur}") cur = controller.get_conf('StrictNodes')
except stem.SocketClosed as e: # noqa if oargs.strict_nodes and int(cur) != oargs.strict_nodes:
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor") LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}")
retval += 1 controller.set_conf('StrictNodes', oargs.strict_nodes)
else:
LOG.info(f"StrictNodes is set to {cur}")
except InvalidRequest as e:
# Unacceptable option value: Invalid router list.
LOG.error(str(e))
retval = 1
return retval
except KeyboardInterrupt: except KeyboardInterrupt:
return 0 return 0
except Exception as e: except Exception as e:
LOG.exception(str(e)) LOG.exception(str(e))
retval = 2 retval = 2
return retval
finally: finally:
# wierd we are getting stem errors during the final return # wierd we are getting stem errors during the final return
# with a traceback that doesnt correspond to any real flow # with a traceback that doesnt correspond to any real flow
@ -1027,6 +1193,7 @@ def iMain(lArgs):
if __name__ == '__main__': if __name__ == '__main__':
try: try:
# i = asyncio.run(iMain(sys.argv[1:]))
i = iMain(sys.argv[1:]) i = iMain(sys.argv[1:])
except IncorrectPassword as e: except IncorrectPassword as e:
LOG.error(e) LOG.error(e)

View File

@ -33,9 +33,12 @@ bHAVE_TORR = shutil.which('tor-resolve')
# in the wild we'll keep a copy here so we can avoid restesting # in the wild we'll keep a copy here so we can avoid restesting
yKNOWN_NODNS = """ yKNOWN_NODNS = """
--- ---
- a9.wtf
- heraldonion.org - heraldonion.org
- linkspartei.org - linkspartei.org
- pineapple.cx - pineapple.cx
- privacylayer.xyz
- prsv.ch
- thingtohide.nl - thingtohide.nl
- tor-exit-2.aa78i2efsewr0neeknk.xyz - tor-exit-2.aa78i2efsewr0neeknk.xyz
- tor-exit-3.aa78i2efsewr0neeknk.xyz - tor-exit-3.aa78i2efsewr0neeknk.xyz
@ -44,7 +47,6 @@ yKNOWN_NODNS = """
- verification-for-nusenu.net - verification-for-nusenu.net
""" """
# - 0x0.is # - 0x0.is
# - a9.wtf
# - aklad5.com # - aklad5.com
# - artikel5ev.de # - artikel5ev.de
# - arvanode.net # - arvanode.net

View File

@ -7,10 +7,15 @@ import datetime
import os import os
import re import re
import sys import sys
import ipaddress
import warnings
import urllib3.util
from urllib3.util import parse_url as urlparse
from stem.control import Controller from stem.control import Controller
# from stem.util.tor_tools import * # from stem.util.tor_tools import *
from urllib3.util import parse_url as urlparse
try: try:
# unbound is not on pypi # unbound is not on pypi
@ -20,11 +25,13 @@ except:
global LOG global LOG
import logging import logging
import warnings
warnings.filterwarnings('ignore') warnings.filterwarnings('ignore')
LOG = logging.getLogger() LOG = logging.getLogger()
logging.getLogger("urllib3").setLevel(logging.INFO)
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# download this python library from # download this python library from
# https://github.com/erans/torcontactinfoparser # https://github.com/erans/torcontactinfoparser
# sys.path.append('/home/....') # sys.path.append('/home/....')
@ -211,7 +218,7 @@ def find_validation_candidates(controller,
result[domain] = {prooftype: [fingerprint]} result[domain] = {prooftype: [fingerprint]}
return result return result
def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain', session=None):
import requests import requests
# socks proxy used for outbound web requests (for validation of proofs) # socks proxy used for outbound web requests (for validation of proofs)
proxy = {'https': "socks5h://{host}:{port}"} proxy = {'https': "socks5h://{host}:{port}"}
@ -225,6 +232,7 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
# urllib3.connection WARNING Certificate did not match expected hostname: # urllib3.connection WARNING Certificate did not match expected hostname:
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers) head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
except Exception as e: except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}") raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
if head.status_code >= 300: if head.status_code >= 300:
@ -234,15 +242,15 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
if not os.path.exists(sCAfile): if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}") raise TrustorError(f"File not found CAfile {sCAfile}")
if session is None: session = requests.sessions.Session()
try: try:
with requests.sessions.Session() as session: oReqResp = session.request(method="get", url=uri,
oReqResp = session.request(method="get", url=uri, proxies=proxy,
proxies=proxy, timeout=timeout,
timeout=timeout, headers=headers,
headers=headers, allow_redirects=False,
allow_redirects=False, verify=True
verify=True )
)
except: except:
LOG.warn("HTTP GET request failed for %s" % uri) LOG.warn("HTTP GET request failed for %s" % uri)
raise raise
@ -257,13 +265,61 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url)) raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp return oReqResp
logging.getLogger("urllib3").setLevel(logging.INFO) # There's no point in using asyncio because of duplicate urls in the tasks
# import urllib3.contrib.pyopenssl async def oDownloadUrlHttpx(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain'):
# urllib3.contrib.pyopenssl.inject_into_urllib3() import httpcore
import asyncio
import httpx
# socks proxy used for outbound web requests (for validation of proofs)
if host and port:
proxy = "socks5://{host}:{port}"
else:
proxy = ''
# we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files
# https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'}
import ipaddress LOG.debug("fetching %s...." % uri)
async with httpx.AsyncClient(proxies=proxy) as client:
try:
# https://www.python-httpx.org/advanced/
head = await client.head(uri, timeout=timeout, headers=headers)
except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
import urllib3.util if head.status_code >= 300:
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if content_type and not head.headers['Content-Type'].startswith(content_type):
raise TrustorError(f"HTTP Content-Type != {content_type}" )
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
try:
oReqResp = await client.get(url=uri,
timeout=timeout,
headers=headers,
max_redirects=0,
verify=sCAfile,
)
except (asyncio.exceptions.CancelledError,
httpcore.PoolTimeout,
Exception,) as e:
LOG.warn(f"HTTP GET request failed for %s {e}" % uri)
raise
if oReqResp.status_code != 200:
LOG.warn(f"HTTP Errorcode {head.status_code}")
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if not oReqResp.headers['Content-Type'].startswith('text/plain'):
LOG.warn(f"HTTP Content-Type != text/plain")
raise TrustorError(f"HTTP Content-Type != text/plain")
# check for redirects (not allowed as per spec)
if oReqResp.url != uri:
LOG.error(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp
def ballow_subdomain_matching(hostname, dnsnames): def ballow_subdomain_matching(hostname, dnsnames):
@ -276,7 +332,6 @@ def ballow_subdomain_matching(hostname, dnsnames):
from urllib3.util.ssl_match_hostname import (CertificateError, _dnsname_match, from urllib3.util.ssl_match_hostname import (CertificateError, _dnsname_match,
_ipaddress_match) _ipaddress_match)
def my_match_hostname(cert, hostname): def my_match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by """Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
@ -370,13 +425,14 @@ urllib3.connection._match_hostname = _my_match_hostname
from urllib3.contrib.socks import SOCKSProxyManager from urllib3.contrib.socks import SOCKSProxyManager
# from urllib3 import Retry # from urllib3 import Retry
def oDownloadUrlUrllib3(uri, sCAfile, def oDownloadUrlUrllib3Socks(uri,
timeout=30, sCAfile,
host='127.0.0.1', timeout=30,
port=9050, host='127.0.0.1',
content_type=''): port=9050,
session=None,
content_type='text/plain'):
"""Theres no need to use requests here and it """Theres no need to use requests here and it
adds too many layers on the SSL to be able to get at things adds too many layers on the SSL to be able to get at things
""" """