mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 15:14:57 +00:00
[utils] Support TTML without default namespace
In a strict sense such TTML is invalid, but Yahoo uses it.
This commit is contained in:
parent
2aa64b89b3
commit
1b0427e6c4
2 changed files with 21 additions and 3 deletions
|
@ -621,6 +621,21 @@ Line
|
|||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data), srt_data)
|
||||
|
||||
dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||
<body>
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">The first line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The first line
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data):
|
|||
out = str_or_empty(node.text)
|
||||
|
||||
for child in node:
|
||||
if child.tag == _x('ttml:br'):
|
||||
if child.tag in (_x('ttml:br'), 'br'):
|
||||
out += '\n' + str_or_empty(child.tail)
|
||||
elif child.tag == _x('ttml:span'):
|
||||
elif child.tag in (_x('ttml:span'), 'span'):
|
||||
out += str_or_empty(parse_node(child))
|
||||
else:
|
||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||
|
@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data):
|
|||
|
||||
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p'))
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||
|
||||
if not paras:
|
||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||
|
||||
for para, index in zip(paras, itertools.count(1)):
|
||||
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
|
||||
|
|
Loading…
Reference in a new issue