mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-22 20:23:12 +00:00
fix media download with longer timeout
This commit is contained in:
parent
c33f7ba91c
commit
a15a331798
2 changed files with 18 additions and 12 deletions
|
@ -29,6 +29,7 @@ from config import (
|
||||||
CHROME_USER_DATA_DIR,
|
CHROME_USER_DATA_DIR,
|
||||||
CHROME_SANDBOX,
|
CHROME_SANDBOX,
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
MEDIA_TIMEOUT,
|
||||||
ANSI,
|
ANSI,
|
||||||
ARCHIVE_DIR,
|
ARCHIVE_DIR,
|
||||||
GIT_DOMAINS,
|
GIT_DOMAINS,
|
||||||
|
@ -441,16 +442,16 @@ def fetch_favicon(link_dir, link, timeout=TIMEOUT):
|
||||||
}
|
}
|
||||||
|
|
||||||
@attach_result_to_link('media')
|
@attach_result_to_link('media')
|
||||||
def fetch_media(link_dir, link, timeout=TIMEOUT, overwrite=False):
|
def fetch_media(link_dir, link, timeout=MEDIA_TIMEOUT, overwrite=False):
|
||||||
"""Download playlists or individual video, audio, and subtitles using youtube-dl"""
|
"""Download playlists or individual video, audio, and subtitles using youtube-dl"""
|
||||||
|
|
||||||
output = os.path.join(link_dir, 'media')
|
|
||||||
|
|
||||||
|
# import ipdb; ipdb.set_trace()
|
||||||
|
output = os.path.join(link_dir, 'media')
|
||||||
if os.path.exists(output) and not overwrite:
|
if os.path.exists(output) and not overwrite:
|
||||||
return {'output': 'media', 'status': 'skipped'}
|
return {'output': 'media', 'status': 'skipped'}
|
||||||
|
|
||||||
os.mkdir(output)
|
os.makedirs(output, exist_ok=True)
|
||||||
print(' - Downloading media')
|
|
||||||
CMD = [
|
CMD = [
|
||||||
'youtube-dl',
|
'youtube-dl',
|
||||||
'--write-description',
|
'--write-description',
|
||||||
|
@ -463,6 +464,7 @@ def fetch_media(link_dir, link, timeout=TIMEOUT, overwrite=False):
|
||||||
'--user-agent',
|
'--user-agent',
|
||||||
'--all-subs',
|
'--all-subs',
|
||||||
'-x',
|
'-x',
|
||||||
|
'-k',
|
||||||
'--audio-format', 'mp3',
|
'--audio-format', 'mp3',
|
||||||
'--audio-quality', '320K',
|
'--audio-quality', '320K',
|
||||||
'--embed-thumbnail',
|
'--embed-thumbnail',
|
||||||
|
@ -472,17 +474,20 @@ def fetch_media(link_dir, link, timeout=TIMEOUT, overwrite=False):
|
||||||
|
|
||||||
end = progress(timeout, prefix=' ')
|
end = progress(timeout, prefix=' ')
|
||||||
try:
|
try:
|
||||||
result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=output, timeout=timeout + 1) # audio/audio.mp3
|
result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=output, timeout=timeout + 1) # audio/audio.mp3
|
||||||
end()
|
end()
|
||||||
if result.returncode:
|
if result.returncode:
|
||||||
|
if b'ERROR: Unsupported URL' in result.stderr:
|
||||||
|
# print(' none found')
|
||||||
|
pass
|
||||||
|
else:
|
||||||
print(' got youtubedl response code {}:'.format(result.returncode))
|
print(' got youtubedl response code {}:'.format(result.returncode))
|
||||||
raise Exception('Failed to download media')
|
raise Exception('Failed to download media')
|
||||||
chmod_file('media', cwd=link_dir)
|
|
||||||
return 'media'
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
end()
|
end()
|
||||||
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
|
print(' Run to see full output:', 'cd {}; {}'.format(link_dir, ' '.join(CMD)))
|
||||||
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
|
print(' {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
|
||||||
|
output = e
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'cmd': CMD,
|
'cmd': CMD,
|
||||||
|
|
|
@ -22,7 +22,7 @@ FETCH_PDF = os.getenv('FETCH_PDF', 'True'
|
||||||
FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true'
|
FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true'
|
||||||
FETCH_DOM = os.getenv('FETCH_DOM', 'True' ).lower() == 'true'
|
FETCH_DOM = os.getenv('FETCH_DOM', 'True' ).lower() == 'true'
|
||||||
FETCH_GIT = os.getenv('FETCH_GIT', 'True' ).lower() == 'true'
|
FETCH_GIT = os.getenv('FETCH_GIT', 'True' ).lower() == 'true'
|
||||||
FETCH_MEDIA = os.getenv('FETCH_MEDIA', 'True' ).lower() == 'true'
|
FETCH_MEDIA = os.getenv('FETCH_MEDIA', 'False' ).lower() == 'true'
|
||||||
FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true'
|
FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true'
|
||||||
SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true'
|
SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true'
|
||||||
RESOLUTION = os.getenv('RESOLUTION', '1440,1200' )
|
RESOLUTION = os.getenv('RESOLUTION', '1440,1200' )
|
||||||
|
@ -33,6 +33,7 @@ WGET_BINARY = os.getenv('WGET_BINARY', 'wget'
|
||||||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox')
|
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox')
|
||||||
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
||||||
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
|
TIMEOUT = int(os.getenv('TIMEOUT', '60'))
|
||||||
|
MEDIA_TIMEOUT = int(os.getenv('MEDIA_TIMEOUT', '3600'))
|
||||||
FOOTER_INFO = os.getenv('FOOTER_INFO', 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.',)
|
FOOTER_INFO = os.getenv('FOOTER_INFO', 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.',)
|
||||||
GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbucket.org,gitlab.com').split(',')
|
GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbucket.org,gitlab.com').split(',')
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue