mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2025-02-16 21:38:33 +00:00
dont parse quotes as part of urls
This commit is contained in:
parent
af8b9b5fdf
commit
3ac0efb619
1 changed files with 1 additions and 1 deletions
|
@ -43,7 +43,7 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links
|
|||
|
||||
short_ts = lambda ts: ts.split('.')[0]
|
||||
|
||||
URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<]+'
|
||||
URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<\""]+'
|
||||
|
||||
|
||||
def check_dependencies():
|
||||
|
|
Loading…
Add table
Reference in a new issue