nicer error formats

This commit is contained in:
Nick Sweeting 2019-01-20 14:08:00 -05:00
parent de6eb649e0
commit 071b39b944

View file

@ -224,27 +224,27 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5) # index.html result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout) # index.html
end() end()
output = wget_output_path(link, look_in=domain_dir) output = wget_output_path(link, look_in=domain_dir)
# Check for common failure cases # Check for common failure cases
if result.returncode > 0: if result.returncode > 0:
print(' got wget response code {}:'.format(result.returncode)) print(' Got wget response code {}:'.format(result.returncode))
if result.returncode != 8: print('\n'.join(' ' + line for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] if line.strip()))
print('\n'.join(' ' + line for line in (result.stderr or result.stdout).decode().rsplit('\n', 10)[-10:] if line.strip()))
if b'403: Forbidden' in result.stderr: if b'403: Forbidden' in result.stderr:
raise Exception('403 Forbidden (try changing WGET_USER_AGENT)') raise Exception('403 Forbidden (try changing WGET_USER_AGENT)')
if b'404: Not Found' in result.stderr: if b'404: Not Found' in result.stderr:
raise Exception('404 Not Found') raise Exception('404 Not Found')
if b'ERROR 500: Internal Server Error' in result.stderr: if b'ERROR 500: Internal Server Error' in result.stderr:
raise Exception('500 Internal Server Error') raise Exception('500 Internal Server Error')
if result.returncode == 4: raise Exception('Got an error from the server')
raise Exception('Failed wget download')
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD))) print(' {}Some resources were skipped: {}{}'.format(ANSI['lightyellow'], e, ANSI['reset']))
print(' {}Warning: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -267,13 +267,13 @@ def fetch_pdf(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
*chrome_headless(user_data_dir=user_data_dir), *chrome_headless(user_data_dir=user_data_dir),
'--print-to-pdf', '--print-to-pdf',
'--hide-scrollbars', '--hide-scrollbars',
'--timeout={timeout * 1000}', '--timeout={}'.format((timeout) * 1000),
*(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
link['url'] link['url']
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5) # output.pdf result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout) # output.pdf
end() end()
if result.returncode: if result.returncode:
print(' ', (result.stderr or result.stdout).decode()) print(' ', (result.stderr or result.stdout).decode())
@ -282,8 +282,10 @@ def fetch_pdf(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
output = 'output.pdf' output = 'output.pdf'
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -306,14 +308,14 @@ def fetch_screenshot(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_
'--screenshot', '--screenshot',
'--window-size={}'.format(resolution), '--window-size={}'.format(resolution),
'--hide-scrollbars', '--hide-scrollbars',
'--timeout={timeout * 1000}', '--timeout={}'.format((timeout) * 1000),
*(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')), *(() if CHECK_SSL_VALIDITY else ('--disable-web-security', '--ignore-certificate-errors')),
# '--full-page', # TODO: make this actually work using ./bin/screenshot fullPage: true # '--full-page', # TODO: make this actually work using ./bin/screenshot fullPage: true
link['url'], link['url'],
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout + 5) # sreenshot.png result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout) # sreenshot.png
end() end()
if result.returncode: if result.returncode:
print(' ', (result.stderr or result.stdout).decode()) print(' ', (result.stderr or result.stdout).decode())
@ -322,8 +324,10 @@ def fetch_screenshot(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_
output = 'screenshot.png' output = 'screenshot.png'
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -346,13 +350,13 @@ def fetch_dom(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
CMD = [ CMD = [
*chrome_headless(user_data_dir=user_data_dir), *chrome_headless(user_data_dir=user_data_dir),
'--dump-dom', '--dump-dom',
'--timeout={timeout * 1000}', '--timeout={}'.format((timeout) * 1000),
link['url'] link['url']
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
with open(output_path, 'w+') as f: with open(output_path, 'w+') as f:
result = run(CMD, stdout=f, stderr=PIPE, cwd=link_dir, timeout=timeout + 5) # output.html result = run(CMD, stdout=f, stderr=PIPE, cwd=link_dir, timeout=timeout) # output.html
end() end()
if result.returncode: if result.returncode:
print(' ', (result.stderr).decode()) print(' ', (result.stderr).decode())
@ -361,8 +365,10 @@ def fetch_dom(link_dir, link, timeout=TIMEOUT, user_data_dir=CHROME_USER_DATA_DI
output = 'output.html' output = 'output.html'
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -393,7 +399,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
] ]
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1) # archive.org.txt result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=link_dir, timeout=timeout) # archive.org.txt
end() end()
# Parse archive.org response headers # Parse archive.org response headers
@ -422,8 +428,9 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
raise Exception('Failed to find "content-location" URL header in Archive.org response.') raise Exception('Failed to find "content-location" URL header in Archive.org response.')
except Exception as e: except Exception as e:
end() end()
print(' Visit url to see output:', ' '.join(CMD))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' {}'.format(' '.join(CMD)))
output = e output = e
if success: if success:
@ -444,11 +451,15 @@ def fetch_favicon(link_dir, link, timeout=TIMEOUT):
if os.path.exists(os.path.join(link_dir, 'favicon.ico')): if os.path.exists(os.path.join(link_dir, 'favicon.ico')):
return {'output': 'favicon.ico', 'status': 'skipped'} return {'output': 'favicon.ico', 'status': 'skipped'}
CMD = ['curl', 'https://www.google.com/s2/favicons?domain={domain}'.format(**link)] CMD = [
'curl',
'--max-time', str(timeout),
'https://www.google.com/s2/favicons?domain={domain}'.format(**link),
]
fout = open('{}/favicon.ico'.format(link_dir), 'w') fout = open('{}/favicon.ico'.format(link_dir), 'w')
end = progress(timeout, prefix=' ') end = progress(timeout, prefix=' ')
try: try:
run(CMD, stdout=fout, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1) # favicon.ico run(CMD, stdout=fout, stderr=DEVNULL, cwd=link_dir, timeout=timeout) # favicon.ico
fout.close() fout.close()
end() end()
chmod_file('favicon.ico', cwd=link_dir) chmod_file('favicon.ico', cwd=link_dir)
@ -456,8 +467,9 @@ def fetch_favicon(link_dir, link, timeout=TIMEOUT):
except Exception as e: except Exception as e:
fout.close() fout.close()
end() end()
print(' Run to see full output:', ' '.join(CMD))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -510,8 +522,10 @@ def fetch_media(link_dir, link, timeout=MEDIA_TIMEOUT, overwrite=False):
raise Exception('Failed to download media') raise Exception('Failed to download media')
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {
@ -545,8 +559,10 @@ def fetch_git(link_dir, link, timeout=TIMEOUT):
raise Exception('Failed git download') raise Exception('Failed git download')
except Exception as e: except Exception as e:
end() end()
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset'])) print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
print(' Run to see full output:')
print(' cd {};'.format(link_dir))
print(' {}'.format(' '.join(CMD)))
output = e output = e
return { return {