Sanitize XML strings before parsing (#1452)

This commit is contained in:
JonnyWong16 2024-08-17 14:07:12 -07:00 committed by GitHub
parent bbe3e8e49f
commit e23cd8d158
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 46 additions and 3 deletions

View file

@ -197,7 +197,7 @@ class PlexClient(PlexObject):
raise NotFound(message)
else:
raise BadRequest(message)
data = response.text.encode('utf8')
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
def sendCommand(self, command, proxy=None, **params):

View file

@ -250,7 +250,7 @@ class MyPlexAccount(PlexObject):
return response.json()
elif 'text/plain' in response.headers.get('Content-Type', ''):
return response.text.strip()
data = response.text.encode('utf8')
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
def ping(self):

View file

@ -768,7 +768,7 @@ class PlexServer(PlexObject):
raise NotFound(message)
else:
raise BadRequest(message)
data = response.text.encode('utf8')
data = utils.cleanXMLString(response.text).encode('utf8')
return ElementTree.fromstring(data) if data.strip() else None
def search(self, query, mediatype=None, limit=None, sectionId=None):

View file

@ -6,6 +6,7 @@ import logging
import os
import re
import string
import sys
import time
import unicodedata
import warnings
@ -673,3 +674,45 @@ def openOrRead(file):
def sha1hash(guid):
""" Return the SHA1 hash of a guid. """
return sha1(guid.encode('utf-8')).hexdigest()
# https://stackoverflow.com/a/64570125
_illegal_XML_characters = [
(0x00, 0x08),
(0x0B, 0x0C),
(0x0E, 0x1F),
(0x7F, 0x84),
(0x86, 0x9F),
(0xFDD0, 0xFDDF),
(0xFFFE, 0xFFFF),
]
if sys.maxunicode >= 0x10000: # not narrow build
_illegal_XML_characters.extend(
[
(0x1FFFE, 0x1FFFF),
(0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF),
(0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF),
(0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF),
(0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF),
(0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF),
(0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF),
(0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF),
(0x10FFFE, 0x10FFFF),
]
)
_illegal_XML_ranges = [
fr'{chr(low)}-{chr(high)}'
for (low, high) in _illegal_XML_characters
]
_illegal_XML_re = re.compile(fr'[{"".join(_illegal_XML_ranges)}]')
def cleanXMLString(s):
return _illegal_XML_re.sub('', s)