#!/usr/bin/env python3
__package__ = 'archivebox'
import os
import sys
import shutil
import unittest
from contextlib import contextmanager
TEST_CONFIG = {
'OUTPUT_DIR': 'data.tests',
'FETCH_MEDIA': 'False',
'USE_CHROME': 'False',
'SUBMIT_ARCHIVE_DOT_ORG': 'False',
'SHOW_PROGRESS': 'False',
'USE_COLOR': 'False',
'FETCH_TITLE': 'False',
'FETCH_FAVICON': 'False',
'FETCH_WGET': 'False',
}
OUTPUT_DIR = 'data.tests'
os.environ.update(TEST_CONFIG)
from .legacy.main import init
from .legacy.index import load_main_index
from .legacy.config import (
SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
)
from .cli import (
archivebox_init,
archivebox_add,
archivebox_remove,
)
HIDE_CLI_OUTPUT = True
test_urls = '''
https://example1.com/what/is/happening.html?what=1#how-about-this=1
https://example2.com/what/is/happening/?what=1#how-about-this=1
HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
https://example4.com/what/is/happening.html
https://example5.com/
https://example6.com
http://example7.com
[https://example8.com/what/is/this.php?what=1]
[and http://example9.com?what=1&other=3#and-thing=2]
https://example10.com#and-thing=2 "
abcdef
sdflkf[what](https://subb.example12.com/who/what.php?whoami=1#whatami=2)?am=hi
example13.bada
and example14.badb
htt://example15.badc
'''
stdout = sys.stdout
stderr = sys.stderr
@contextmanager
def output_hidden(show_failing=True):
if not HIDE_CLI_OUTPUT:
yield
return
sys.stdout = open('stdout.txt', 'w+')
sys.stderr = open('stderr.txt', 'w+')
try:
yield
sys.stdout.close()
sys.stderr.close()
sys.stdout = stdout
sys.stderr = stderr
except:
sys.stdout.close()
sys.stderr.close()
sys.stdout = stdout
sys.stderr = stderr
if show_failing:
with open('stdout.txt', 'r') as f:
print(f.read())
with open('stderr.txt', 'r') as f:
print(f.read())
raise
finally:
os.remove('stdout.txt')
os.remove('stderr.txt')
class TestInit(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
def test_basic_init(self):
with output_hidden():
archivebox_init.main([])
assert os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
assert os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
assert os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
def test_conflicting_init(self):
with open(os.path.join(OUTPUT_DIR, 'test_conflict.txt'), 'w+') as f:
f.write('test')
try:
with output_hidden(show_failing=False):
archivebox_init.main([])
assert False, 'Init should have exited with an exception'
except SystemExit:
pass
assert not os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
assert not os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
assert not os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
try:
load_main_index(out_dir=OUTPUT_DIR)
assert False, 'load_main_index should raise an exception when no index is present'
except:
pass
def test_no_dirty_state(self):
with output_hidden():
init()
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
with output_hidden():
init()
class TestAdd(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
with output_hidden():
init()
def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
def test_add_arg_url(self):
with output_hidden():
archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 30
def test_add_arg_file(self):
test_file = os.path.join(OUTPUT_DIR, 'test.txt')
with open(test_file, 'w+') as f:
f.write(test_urls)
with output_hidden():
archivebox_add.main([test_file])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 12
os.remove(test_file)
def test_add_stdin_url(self):
with output_hidden():
archivebox_add.main([], stdin=test_urls)
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 12
class TestRemove(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
with output_hidden():
init()
archivebox_add.main([], stdin=test_urls)
# def tearDown(self):
# shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
def test_remove_exact(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 11
def test_remove_regex(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', 'http(s)?:\/\/(.+\.)?(example\d\.com)'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 4
def test_remove_domain(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 10
def test_remove_none(self):
try:
with output_hidden(show_failing=False):
archivebox_remove.main(['--yes', '--delete', 'https://doesntexist.com'])
assert False, 'Should raise if no URLs match'
except:
pass
if __name__ == '__main__':
if '--verbose' in sys.argv or '-v' in sys.argv:
HIDE_CLI_OUTPUT = False
unittest.main()