From e23cd8d1583b75e0b1a9c625748d536e97839238 Mon Sep 17 00:00:00 2001 From: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> Date: Sat, 17 Aug 2024 14:07:12 -0700 Subject: [PATCH] Sanitize XML strings before parsing (#1452) --- plexapi/client.py | 2 +- plexapi/myplex.py | 2 +- plexapi/server.py | 2 +- plexapi/utils.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/plexapi/client.py b/plexapi/client.py index 76513e79..3d89e3dc 100644 --- a/plexapi/client.py +++ b/plexapi/client.py @@ -197,7 +197,7 @@ class PlexClient(PlexObject): raise NotFound(message) else: raise BadRequest(message) - data = response.text.encode('utf8') + data = utils.cleanXMLString(response.text).encode('utf8') return ElementTree.fromstring(data) if data.strip() else None def sendCommand(self, command, proxy=None, **params): diff --git a/plexapi/myplex.py b/plexapi/myplex.py index bc40583e..24e32e6b 100644 --- a/plexapi/myplex.py +++ b/plexapi/myplex.py @@ -250,7 +250,7 @@ class MyPlexAccount(PlexObject): return response.json() elif 'text/plain' in response.headers.get('Content-Type', ''): return response.text.strip() - data = response.text.encode('utf8') + data = utils.cleanXMLString(response.text).encode('utf8') return ElementTree.fromstring(data) if data.strip() else None def ping(self): diff --git a/plexapi/server.py b/plexapi/server.py index f39a423f..8cd110d8 100644 --- a/plexapi/server.py +++ b/plexapi/server.py @@ -768,7 +768,7 @@ class PlexServer(PlexObject): raise NotFound(message) else: raise BadRequest(message) - data = response.text.encode('utf8') + data = utils.cleanXMLString(response.text).encode('utf8') return ElementTree.fromstring(data) if data.strip() else None def search(self, query, mediatype=None, limit=None, sectionId=None): diff --git a/plexapi/utils.py b/plexapi/utils.py index bb128532..549afc5b 100644 --- a/plexapi/utils.py +++ b/plexapi/utils.py @@ -6,6 +6,7 @@ import logging import os import re import string +import sys import time import unicodedata import warnings @@ -673,3 +674,45 @@ def openOrRead(file): def sha1hash(guid): """ Return the SHA1 hash of a guid. """ return sha1(guid.encode('utf-8')).hexdigest() + + +# https://stackoverflow.com/a/64570125 +_illegal_XML_characters = [ + (0x00, 0x08), + (0x0B, 0x0C), + (0x0E, 0x1F), + (0x7F, 0x84), + (0x86, 0x9F), + (0xFDD0, 0xFDDF), + (0xFFFE, 0xFFFF), +] +if sys.maxunicode >= 0x10000: # not narrow build + _illegal_XML_characters.extend( + [ + (0x1FFFE, 0x1FFFF), + (0x2FFFE, 0x2FFFF), + (0x3FFFE, 0x3FFFF), + (0x4FFFE, 0x4FFFF), + (0x5FFFE, 0x5FFFF), + (0x6FFFE, 0x6FFFF), + (0x7FFFE, 0x7FFFF), + (0x8FFFE, 0x8FFFF), + (0x9FFFE, 0x9FFFF), + (0xAFFFE, 0xAFFFF), + (0xBFFFE, 0xBFFFF), + (0xCFFFE, 0xCFFFF), + (0xDFFFE, 0xDFFFF), + (0xEFFFE, 0xEFFFF), + (0xFFFFE, 0xFFFFF), + (0x10FFFE, 0x10FFFF), + ] + ) +_illegal_XML_ranges = [ + fr'{chr(low)}-{chr(high)}' + for (low, high) in _illegal_XML_characters +] +_illegal_XML_re = re.compile(fr'[{"".join(_illegal_XML_ranges)}]') + + +def cleanXMLString(s): + return _illegal_XML_re.sub('', s)