Debug: remove lxml dependency, pickle SVD tree for faster processing. (#909)

* Debug: remove lxml dependency, pickle SVD tree for faster processing.
* Debug: remove unused import in svd.py
This commit is contained in:
あく 2021-12-17 04:28:51 +03:00 committed by GitHub
parent 4013da5b59
commit e109e2e3e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 624 additions and 35 deletions

3
.gitignore vendored
View file

@ -19,6 +19,7 @@ venv/
__pycache__/
*.py[cod]
*$py.class
*.pickle
.obj/
bindings/
@ -34,4 +35,4 @@ build
CMakeLists.txt
# bundle output
dist
dist

View file

@ -16,7 +16,7 @@ My implementation so far has only tested STM32 chips but should hold for others.
expect plenty of errors in the file. Like GPIOA having a register named GPIOB_OSPEEDR and lots of 16-bit registers
that are listed as 32!
The implementation consists of two components -- An lxml-based parser module (pysvd) and a GDB file (gdb_svd).
The implementation consists of two components -- An xml parser module (pysvd) and a GDB file (gdb_svd).
I haven't yet worked out a perfect workflow for this, though it's quite easy to use when
you already tend to have a GDB initialization file for starting up OpenOCD and the like.
However your workflow works, just make sure to, in GDB:

View file

@ -16,7 +16,6 @@ You should have received a copy of the GNU General Public License
along with PyCortexMDebug. If not, see <http://www.gnu.org/licenses/>.
"""
import lxml.objectify as objectify
import sys
from collections import OrderedDict
import os
@ -24,6 +23,7 @@ import pickle
import traceback
import re
import warnings
import x2d
class SmartDict:
@ -126,26 +126,31 @@ class SVDFile:
def __init__(self, fname):
"""
Args:
fname: Filename for the SVD file
"""
f = objectify.parse(os.path.expanduser(fname))
root = f.getroot()
periph = root.peripherals.getchildren()
self.peripherals = SmartDict()
self.base_address = 0
xml_file_name = os.path.expanduser(fname)
pickle_file_name = xml_file_name + ".pickle"
root = None
if os.path.exists(pickle_file_name):
print("Loading pickled SVD")
root = pickle.load(open(pickle_file_name, "rb"))
else:
print("Loading XML SVD and pickling it")
root = x2d.parse(open(xml_file_name, "rb"))
pickle.dump(root, open(pickle_file_name, "wb"), pickle.HIGHEST_PROTOCOL)
print("Processing SVD tree")
# XML elements
for p in periph:
for p in root["device"]["peripherals"]["peripheral"]:
try:
if p.tag == "peripheral":
self.peripherals[str(p.name)] = SVDPeripheral(p, self)
else:
# This is some other tag
pass
self.peripherals[p["name"]] = SVDPeripheral(p, self)
except SVDNonFatalError as e:
print(e)
# print(e)
pass
print("SVD Ready")
def add_register(parent, node):
@ -265,11 +270,11 @@ class SVDPeripheral:
self.parent_base_address = parent.base_address
# Look for a base address, as it is required
if not hasattr(svd_elem, "baseAddress"):
if "baseAddress" not in svd_elem:
raise SVDNonFatalError("Periph without base address")
self.base_address = int(str(svd_elem.baseAddress), 0)
if "derivedFrom" in svd_elem.attrib:
derived_from = svd_elem.attrib["derivedFrom"]
if "@derivedFrom" in svd_elem:
derived_from = svd_elem["@derivedFrom"]
try:
self.name = str(svd_elem.name)
except AttributeError:
@ -295,16 +300,14 @@ class SVDPeripheral:
self.clusters = SmartDict()
if hasattr(svd_elem, "registers"):
registers = [
r
for r in svd_elem.registers.getchildren()
if r.tag in ["cluster", "register"]
]
for r in registers:
if r.tag == "cluster":
add_cluster(self, r)
elif r.tag == "register":
add_register(self, r)
if "register" in svd_elem.registers:
for r in svd_elem.registers.register:
if isinstance(r, x2d.ObjectDict):
add_register(self, r)
if "cluster" in svd_elem.registers:
for c in svd_elem.registers.cluster:
if isinstance(c, x2d.ObjectDict):
add_cluster(self, c)
def refactor_parent(self, parent):
self.parent_base_address = parent.base_address
@ -338,11 +341,11 @@ class SVDPeripheralRegister:
else:
self.size = 0x20
self.fields = SmartDict()
if hasattr(svd_elem, "fields"):
if "fields" in svd_elem:
# Filter fields to only consider those of tag "field"
fields = [f for f in svd_elem.fields.getchildren() if f.tag == "field"]
for f in fields:
self.fields[str(f.name)] = SVDPeripheralRegisterField(f, self)
for f in svd_elem.fields.field:
if isinstance(f, x2d.ObjectDict):
self.fields[str(f.name)] = SVDPeripheralRegisterField(f, self)
def refactor_parent(self, parent):
self.parent_base_address = parent.base_address

View file

@ -23,6 +23,7 @@ import sys
import struct
import pkg_resources
import fnmatch
import traceback
from .svd import SVDFile
@ -99,6 +100,7 @@ class LoadSVD(gdb.Command):
try:
SVD(SVDFile(f))
except Exception as e:
traceback.print_exc()
raise gdb.GdbError("Could not load SVD file {} : {}...\n".format(f, e))

View file

@ -0,0 +1,586 @@
#!/usr/bin/env python
"Makes working with XML feel like you are working with JSON"
try:
from defusedexpat import pyexpat as expat
except ImportError:
from xml.parsers import expat
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesImpl
try: # pragma no cover
from cStringIO import StringIO
except ImportError: # pragma no cover
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
from inspect import isgenerator
class ObjectDict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
try: # pragma no cover
_basestring = basestring
except NameError: # pragma no cover
_basestring = str
try: # pragma no cover
_unicode = unicode
except NameError: # pragma no cover
_unicode = str
__author__ = "Martin Blech"
__version__ = "0.12.0"
__license__ = "MIT"
class ParsingInterrupted(Exception):
pass
class _DictSAXHandler(object):
def __init__(
self,
item_depth=0,
item_callback=lambda *args: True,
xml_attribs=True,
attr_prefix="@",
cdata_key="#text",
force_cdata=False,
cdata_separator="",
postprocessor=None,
dict_constructor=ObjectDict,
strip_whitespace=True,
namespace_separator=":",
namespaces=None,
force_list=None,
comment_key="#comment",
):
self.path = []
self.stack = []
self.data = []
self.item = None
self.item_depth = item_depth
self.xml_attribs = xml_attribs
self.item_callback = item_callback
self.attr_prefix = attr_prefix
self.cdata_key = cdata_key
self.force_cdata = force_cdata
self.cdata_separator = cdata_separator
self.postprocessor = postprocessor
self.dict_constructor = dict_constructor
self.strip_whitespace = strip_whitespace
self.namespace_separator = namespace_separator
self.namespaces = namespaces
self.namespace_declarations = ObjectDict()
self.force_list = force_list
self.comment_key = comment_key
def _build_name(self, full_name):
if self.namespaces is None:
return full_name
i = full_name.rfind(self.namespace_separator)
if i == -1:
return full_name
namespace, name = full_name[:i], full_name[i + 1 :]
try:
short_namespace = self.namespaces[namespace]
except KeyError:
short_namespace = namespace
if not short_namespace:
return name
else:
return self.namespace_separator.join((short_namespace, name))
def _attrs_to_dict(self, attrs):
if isinstance(attrs, dict):
return attrs
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
def startNamespaceDecl(self, prefix, uri):
self.namespace_declarations[prefix or ""] = uri
def startElement(self, full_name, attrs):
name = self._build_name(full_name)
attrs = self._attrs_to_dict(attrs)
if attrs and self.namespace_declarations:
attrs["xmlns"] = self.namespace_declarations
self.namespace_declarations = ObjectDict()
self.path.append((name, attrs or None))
if len(self.path) > self.item_depth:
self.stack.append((self.item, self.data))
if self.xml_attribs:
attr_entries = []
for key, value in attrs.items():
key = self.attr_prefix + self._build_name(key)
if self.postprocessor:
entry = self.postprocessor(self.path, key, value)
else:
entry = (key, value)
if entry:
attr_entries.append(entry)
attrs = self.dict_constructor(attr_entries)
else:
attrs = None
self.item = attrs or None
self.data = []
def endElement(self, full_name):
name = self._build_name(full_name)
if len(self.path) == self.item_depth:
item = self.item
if item is None:
item = None if not self.data else self.cdata_separator.join(self.data)
should_continue = self.item_callback(self.path, item)
if not should_continue:
raise ParsingInterrupted()
if len(self.stack):
data = None if not self.data else self.cdata_separator.join(self.data)
item = self.item
self.item, self.data = self.stack.pop()
if self.strip_whitespace and data:
data = data.strip() or None
if data and self.force_cdata and item is None:
item = self.dict_constructor()
if item is not None:
if data:
self.push_data(item, self.cdata_key, data)
self.item = self.push_data(self.item, name, item)
else:
self.item = self.push_data(self.item, name, data)
else:
self.item = None
self.data = []
self.path.pop()
def characters(self, data):
if not self.data:
self.data = [data]
else:
self.data.append(data)
def comments(self, data):
if self.strip_whitespace:
data = data.strip()
self.item = self.push_data(self.item, self.comment_key, data)
def push_data(self, item, key, data):
if self.postprocessor is not None:
result = self.postprocessor(self.path, key, data)
if result is None:
return item
key, data = result
if item is None:
item = self.dict_constructor()
try:
value = item[key]
if isinstance(value, list):
value.append(data)
else:
item[key] = [value, data]
except KeyError:
if self._should_force_list(key, data):
item[key] = [data]
else:
item[key] = data
return item
def _should_force_list(self, key, value):
if not self.force_list:
return False
if isinstance(self.force_list, bool):
return self.force_list
try:
return key in self.force_list
except TypeError:
return self.force_list(self.path[:-1], key, value)
def parse(
xml_input,
encoding=None,
expat=expat,
process_namespaces=False,
namespace_separator=":",
disable_entities=True,
process_comments=False,
**kwargs
):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string`, a file-like object, or a generator of strings.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
ObjectDict([(u'a', ObjectDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
ObjectDict([(u'a', u'hello')])
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precedence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
<servers>
<server>
<name>host1</name>
<os>Linux</os>
<interfaces>
<interface>
<name>em0</name>
<ip_address>10.0.0.1</ip_address>
</interface>
</interfaces>
</server>
</servers>
If called with force_list=('interface',), it will produce
this dictionary:
{'servers':
{'server':
{'name': 'host1',
'os': 'Linux'},
'interfaces':
{'interface':
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
If `process_comment` is `True` then comment will be added with comment_key
(default=`'#comment'`) to then tag which contains comment
For example, given this input:
<a>
<b>
<!-- b comment -->
<c>
<!-- c comment -->
1
</c>
<d>2</d>
</b>
</a>
If called with process_comment=True, it will produce
this dictionary:
'a': {
'b': {
'#comment': 'b comment',
'c': {
'#comment': 'c comment',
'#text': '1',
},
'd': '2',
},
}
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = "utf-8"
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(encoding, namespace_separator)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
if process_comments:
parser.CommentHandler = handler.comments
parser.buffer_text = True
if disable_entities:
try:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, "read"):
parser.ParseFile(xml_input)
elif isgenerator(xml_input):
for chunk in xml_input:
parser.Parse(chunk, False)
parser.Parse(b"", True)
else:
parser.Parse(xml_input, True)
return handler.item
def _process_namespace(name, namespaces, ns_sep=":", attr_prefix="@"):
if not namespaces:
return name
try:
ns, name = name.rsplit(ns_sep, 1)
except ValueError:
pass
else:
ns_res = namespaces.get(ns.strip(attr_prefix))
name = (
"{}{}{}{}".format(
attr_prefix if ns.startswith(attr_prefix) else "", ns_res, ns_sep, name
)
if ns_res
else name
)
return name
def _emit(
key,
value,
content_handler,
attr_prefix="@",
cdata_key="#text",
depth=0,
preprocessor=None,
pretty=False,
newl="\n",
indent="\t",
namespace_separator=":",
namespaces=None,
full_document=True,
expand_iter=None,
):
key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
if preprocessor is not None:
result = preprocessor(key, value)
if result is None:
return
key, value = result
if (
not hasattr(value, "__iter__")
or isinstance(value, _basestring)
or isinstance(value, dict)
):
value = [value]
for index, v in enumerate(value):
if full_document and depth == 0 and index > 0:
raise ValueError("document with multiple roots")
if v is None:
v = ObjectDict()
elif isinstance(v, bool):
if v:
v = _unicode("true")
else:
v = _unicode("false")
elif not isinstance(v, dict):
if (
expand_iter
and hasattr(v, "__iter__")
and not isinstance(v, _basestring)
):
v = ObjectDict(((expand_iter, v),))
else:
v = _unicode(v)
if isinstance(v, _basestring):
v = ObjectDict(((cdata_key, v),))
cdata = None
attrs = ObjectDict()
children = []
for ik, iv in v.items():
if ik == cdata_key:
cdata = iv
continue
if ik.startswith(attr_prefix):
ik = _process_namespace(
ik, namespaces, namespace_separator, attr_prefix
)
if ik == "@xmlns" and isinstance(iv, dict):
for k, v in iv.items():
attr = "xmlns{}".format(":{}".format(k) if k else "")
attrs[attr] = _unicode(v)
continue
if not isinstance(iv, _unicode):
iv = _unicode(iv)
attrs[ik[len(attr_prefix) :]] = iv
continue
children.append((ik, iv))
if pretty:
content_handler.ignorableWhitespace(depth * indent)
content_handler.startElement(key, AttributesImpl(attrs))
if pretty and children:
content_handler.ignorableWhitespace(newl)
for child_key, child_value in children:
_emit(
child_key,
child_value,
content_handler,
attr_prefix,
cdata_key,
depth + 1,
preprocessor,
pretty,
newl,
indent,
namespaces=namespaces,
namespace_separator=namespace_separator,
expand_iter=expand_iter,
)
if cdata is not None:
content_handler.characters(cdata)
if pretty and children:
content_handler.ignorableWhitespace(depth * indent)
content_handler.endElement(key)
if pretty and depth:
content_handler.ignorableWhitespace(newl)
def unparse(
input_dict,
output=None,
encoding="utf-8",
full_document=True,
short_empty_elements=False,
**kwargs
):
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
The resulting XML document is returned as a string, but if `output` (a
file-like object) is specified, it is written there instead.
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
as XML node attributes, whereas keys equal to `cdata_key`
(default=`'#text'`) are treated as character data.
The `pretty` parameter (default=`False`) enables pretty-printing. In this
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
can be customized with the `newl` and `indent` parameters.
"""
if full_document and len(input_dict) != 1:
raise ValueError("Document must have exactly one root.")
must_return = False
if output is None:
output = StringIO()
must_return = True
if short_empty_elements:
content_handler = XMLGenerator(output, encoding, True)
else:
content_handler = XMLGenerator(output, encoding)
if full_document:
content_handler.startDocument()
for key, value in input_dict.items():
_emit(key, value, content_handler, full_document=full_document, **kwargs)
if full_document:
content_handler.endDocument()
if must_return:
value = output.getvalue()
try: # pragma no cover
value = value.decode(encoding)
except AttributeError: # pragma no cover
pass
return value
if __name__ == "__main__": # pragma: no cover
import sys
import marshal
try:
stdin = sys.stdin.buffer
stdout = sys.stdout.buffer
except AttributeError:
stdin = sys.stdin
stdout = sys.stdout
(item_depth,) = sys.argv[1:]
item_depth = int(item_depth)
def handle_item(path, item):
marshal.dump((path, item), stdout)
return True
try:
root = parse(
stdin,
item_depth=item_depth,
item_callback=handle_item,
dict_constructor=dict,
)
if item_depth == 0:
handle_item([], root)
except KeyboardInterrupt:
pass

View file

@ -25,9 +25,6 @@ RUN wget --progress=dot:giga "https://developer.arm.com/-/media/Files/downloads/
for file in * ; do ln -s "${PWD}/${file}" "/usr/bin/${file}" ; done && \
cd / && arm-none-eabi-gcc -v && arm-none-eabi-gdb -v
RUN wget --progress=dot:giga -O - https://bootstrap.pypa.io/pip/2.7/get-pip.py | python2 && \
pip install --no-cache-dir lxml==4.6.3
RUN git clone --depth 1 --branch v0.4.1 https://github.com/atomicobject/heatshrink.git && \
cd heatshrink && make && mv ./heatshrink /usr/local/bin/heatshrink
@ -35,4 +32,4 @@ COPY entrypoint.sh syntax_check.sh /
RUN chmod +x /syntax_check.sh
ENTRYPOINT ["/entrypoint.sh"]
ENTRYPOINT ["/entrypoint.sh"]