Source code for mappet.mappet

# -*- coding: utf-8 -*-

u"""Module for dynamic mapping of XML trees to Python objects.

.. :module: mappet
   :synopsis: Module for dynamic mapping of XML trees to Python objects.
"""

import re

from copy import deepcopy

from lxml import etree

import helpers

__all__ = [
    'Literal',
    'Mappet',
    'Node',
]


[docs]class Node(object): u"""Base class representing an XML node.""" #: The lxml object representing parsed XML. _xml = None def __init__(self, xml): self._xml = xml def __repr__(self): u"""Represent an XML node as a string with child count. >>> xml = etree.Element('root') >>> xml.set('attr1', 'val1') >>> _ = etree.SubElement(xml, 'child') >>> repr(Node(xml)) '<root attr1="val1"> (1)' """ return '<{tagname}{attributes}{closing_paren}> ({children})'.format( tagname=self._xml.tag, attributes=''.join( [' {}="{}"'.format( attr, self._xml.attrib[attr] ) for attr in self._xml.attrib] ), closing_paren='' if len(self._xml) else '/', children=len(self._xml) ) def __getitem__(self, key): u"""Call to a list element. Only calls to node attributes (i.e. starting with `@`) or text nodes (starting with `#`) are allowed. >>> xml = etree.Element('root') >>> xml.set('attr1', 'val1') >>> _ = etree.SubElement(xml, 'child') >>> Node(xml)[0] Traceback (most recent call last): ... KeyError: 0 >>> Node(xml)['@attr1'] 'val1' """ if self.is_key_attr_or_text(key): return self.getattr(key[1:]) raise KeyError(key)
[docs] def getattr(self, key, default=None, callback=None): u"""Getting the attribute of an element. >>> xml = etree.Element('root') >>> xml.text = 'text' >>> Node(xml).getattr('text') 'text' >>> Node(xml).getattr('text', callback=str.upper) 'TEXT' >>> Node(xml).getattr('wrong_attr', default='default') 'default' """ value = self._xml.text if key == 'text' else self._xml.get(key, default) return callback(value) if callback else value
[docs] def setattr(self, key, value): u"""Sets an attribute on a node. >>> xml = etree.Element('root') >>> Node(xml).setattr('text', 'text2') >>> Node(xml).getattr('text') 'text2' >>> Node(xml).setattr('attr', 'val') >>> Node(xml).getattr('attr') 'val' """ if key == 'text': self._xml.text = str(value) else: self._xml.set(key, str(value))
@property def tag(self): u"""Returns node's tag name.""" return self._xml.tag @staticmethod
[docs] def is_key_attr_or_text(key): return isinstance(key, basestring) and key.startswith(('@', '#'))
[docs]class Literal(Node): u"""Represents a leaf in an XML tree.""" def __str__(self): u"""Represents a leaf as a str. Returns node's text as a string, if it's not None.""" return str(self.get()) #: Represents the leaf as unicode. __unicode__ = __str__ def __repr__(self): u"""Represents the leaf as its textual value.""" return str(self) def __int__(self): u"""Represents the literal as an int.""" return self.to_int() def __float__(self): u"""Represents the literal as an float.""" return self.to_float() def __nonzero__(self): u"""Represents the literal as an bool.""" return True if self._xml.text else False def __eq__(self, other): u"""Compares two leafs. Assumes they are equal if the same are their: * tagname, * parent, * text, * attributes, * position among parent's children. """ self_parent = self._xml.getparent() other_parent = other._xml.getparent() is_same_tag = self._xml.tag == other._xml.tag is_same_parent = self_parent == other_parent is_same_text = str(self) == str(other) are_attrs_equal = (self._xml.attrib == other._xml.attrib) is_same_position = self_parent.index(self._xml) == other_parent.index(other._xml) return all(( is_same_tag, is_same_parent, is_same_text, are_attrs_equal, is_same_position, )) def __hash__(self): return hash(self._xml) def __len__(self): u"""Returns the length of node's text.""" return len(self._xml.text) @staticmethod def __dir__(): u"""Lists available casting methods.""" return sorted(set([fnc for fnc in helpers.__all__ if fnc.startswith('to_')])) def __getattr__(self, name): u"""Returns a function for converting node's value. A leaf has no children, thus accessing its attributes returns a function. """ if name.startswith('to_') and name in dir(helpers): fn = getattr(helpers, name) return lambda: fn(self._xml.text) raise AttributeError(name) def __setitem__(self, key, value): u"""Attribute assignment by dict access. Extending the leaf in this case is not possible, since a string is returned. """ if self.is_key_attr_or_text(key): self.setattr(key[1:], value) def __add__(self, other): u"""String concatenation.""" return self.to_str() + str(other) def __radd__(self, other): u"""Reverse string concatenation.""" return str(other) + self.to_str()
[docs] def get(self, default=None, callback=None): u"""Returns leaf's value.""" value = self._xml.text if self._xml.text else default return callback(value) if callback else value
class _NoneNode(object): u"""None like object with converting methods.""" _mocked_functions = [f for f in dir(helpers) if f.startswith('to_')] + ['to_dict'] def __new__(cls, *args, **kwargs): u"""Singleton. Recipe 6.15 by Jurgen Hermann. """ if '_inst' not in vars(cls): # pragma: no cover cls._inst = super(type, cls).__new__(cls, *args, **kwargs) return cls._inst def __repr__(self): return 'NONE_NODE' def __nonzero__(self): return False def __dir__(self): u"""Lists available casting methods.""" return sorted(set(self._mocked_functions)) def __getattr__(self, name): u"""Returns mocked function for converting node's value.""" if name in self._mocked_functions: return lambda: None raise AttributeError(name) NONE_NODE = _NoneNode()
[docs]class Mappet(Node): u"""A node that may have children.""" _aliases = None u"""Dictionary with node aliases. The keys are normalized tagnames, values are the original tagnames. _aliases = { 'car_model_desc': 'car-model-desc', 'car': 'Car', } """ def __init__(self, xml): u"""Creates the mappet object from either lxml object, a string or a dict. If you pass a dict without root element, one will be created for you with 'root' as tag name. >>> Mappet({'a': {'#text': 'list_elem_1', '@attr1': 'val1'}}).to_str() '<a attr1="val1">list_elem_1</a>' >>> Mappet({'#text': 'list_elem_1', '@attr1': 'val1'}).to_str() '<root attr1="val1">list_elem_1</root>' """ if etree.iselement(xml): self._xml = xml elif isinstance(xml, basestring): self._xml = etree.fromstring(xml) elif isinstance(xml, dict): if len(xml) == 1: root_name = xml.keys()[0] body = xml[root_name] else: root_name = 'root' body = xml self._xml = helpers.dict_to_etree(body, etree.Element(root_name)) else: raise AttributeError('Specified data cannot be used to construct a Mappet object.') def __nonzero__(self): u"""Checks if this node has children, otherwise returns False.""" return self.has_children() def __len__(self): u"""Returns the children count.""" return len(self._xml) def __dir__(self): u"""Returns a list of children and available helper methods.""" return sorted(self.keys() | {m for m in dir(self.__class__) if m.startswith('to_')}) def __deepcopy__(self, memodict): u"""Performs a deepcopy on the underlying XML tree.""" return self.__class__(deepcopy(self._xml)) def __getattr__(self, name): u"""Attribute access. Returns a list o children, if there is more than 1. Returns a child, if there is exactly 1. """ children = self.children(name) if len(children) > 1: return children elif len(children) == 1: return children[0] def __setattr__(self, name, value): u"""Node attribute assignment. Calls ``set`` in the end. """ # Only elements that aren't a part of class definition are overwritten. if name not in dir(self.__class__): return self.set(name, value) return super(Mappet, self).__setattr__(name, value) def __delattr__(self, key): u"""Node removal.""" # Searches among aliases, if none is found returns the original key. tag = self._get_aliases().get(key, key) self.__delitem__(tag) def __getitem__(self, key): u"""Dictionary access.""" # Checks if the call isn't to an attribute. if isinstance(key, basestring) and not key.startswith('@'): children = self.children(key) if len(children) == 1: children = children[0] # Return the value if it's a leaf. if isinstance(children, Literal): return children.get() return children return super(Mappet, self).__getitem__(key) def __delitem__(self, key): u"""Removes all children with a given key.""" # Checks if name is not a part of class definition. if key not in dir(self.__class__): for child in self._xml.iterchildren(tag=key): self._xml.remove(child) def __eq__(self, other): u"""Compares mappet objects. Two mappet objects are deemed equal if the lxmls object they represent are equal. """ return etree.tostring(self._xml) == etree.tostring(other._xml) def __contains__(self, path): u"""Check if object contains given path.""" elem = self.sget(path) return not (elem is None or elem is NONE_NODE) def __getstate__(self): u"""Converts the lxml to string for Pickling.""" return { '_xml': etree.tostring(self._xml, pretty_print=False) } def __setstate__(self, dict_): u"""Restores a Pickled mappet object.""" self._xml = etree.fromstring(dict_['_xml']) def __iter__(self): u"""Returns children as an iterator.""" return self.iter_children()
[docs] def to_str(self, pretty_print=False, encoding=None, **kw): u"""Converts a node with all of it's children to a string. Remaining arguments are passed to etree.tostring as is. kwarg without_comments: bool because it works only in C14N flags: 'pretty print' and 'encoding' are ignored. :param bool pretty_print: whether to format the output :param str encoding: which encoding to use (ASCII by default) :rtype: str :returns: node's representation as a string """ if kw.get('without_comments') and not kw.get('method'): kw.pop('without_comments') kw['method'] = 'c14n' kw['with_comments'] = False return etree.tostring( self._xml, pretty_print=pretty_print, encoding=encoding, **kw )
[docs] def has_children(self): u"""Returns true if a node has children.""" return bool(len(self))
[docs] def iter_children(self, key=None): u"""Iterates over children. :param key: A key for filtering children by tagname. """ tag = None if key: tag = self._get_aliases().get(key) if not tag: raise KeyError(key) for child in self._xml.iterchildren(tag=tag): if len(child): yield self.__class__(child) else: yield Literal(child)
[docs] def children(self, key=None): u"""Returns node's children. :param key: A key for filtering children by tagname. """ return list(self.iter_children(key))
[docs] def update(self, **kwargs): u"""Updating or creation of new simple nodes. Each dict key is used as a tagname and value as text. """ for key, value in kwargs.items(): helper = helpers.CAST_DICT.get(type(value), str) tag = self._get_aliases().get(key, key) elements = list(self._xml.iterchildren(tag=tag)) if elements: for element in elements: element.text = helper(value) else: element = etree.Element(key) element.text = helper(value) self._xml.append(element) self._aliases = None
[docs] def sget(self, path, default=NONE_NODE): u"""Enables access to nodes if one or more of them don't exist. Example: >>> m = Mappet('<root><tag attr1="attr text">text value</tag></root>') >>> m.sget('tag') text value >>> m.sget('tag.@attr1') 'attr text' >>> m.sget('tag.#text') 'text value' >>> m.sget('reply.vms_model_cars.car.0.params.doors') NONE_NODE Accessing nonexistent path returns None-like object with mocked converting functions which returns None: >>> m.sget('reply.fake_node').to_dict() is None True """ attrs = str(path).split(".") text_or_attr = None last_attr = attrs[-1] # Case of getting text or attribute if last_attr == '#text' or last_attr.startswith('@'): # #text => text, @attr => attr text_or_attr = last_attr[1:] attrs = attrs[:-1] # When getting #text and @attr we want default value to be None. if default is NONE_NODE: default = None my_object = self for attr in attrs: try: if isinstance(my_object, (list, tuple)) and re.match('^\-?\d+$', attr): my_object_next = my_object[int(attr)] else: my_object_next = getattr(my_object, attr) my_object = my_object_next except (AttributeError, KeyError, IndexError): return default # Return #text or @attr if text_or_attr: try: return my_object.getattr(text_or_attr) except AttributeError: # myObject can be a list. return None else: return my_object
[docs] def create(self, tag, value): u"""Creates a node, if it doesn't exist yet. Unlike attribute access, this allows to pass a node's name with hyphens. Those hyphens will be normalized automatically. In case the required element already exists, raises an exception. Updating/overwriting should be done using `update``. """ child_tags = {child.tag for child in self._xml} if tag in child_tags: raise KeyError('Node {} already exists in XML tree.'.format(tag)) self.set(tag, value)
[docs] def set(self, name, value): u"""Assigns a new XML structure to the node. A literal value, dict or list can be passed in. Works for all nested levels. Dictionary: >>> m = Mappet('<root/>') >>> m.head = {'a': 'A', 'b': {'#text': 'B', '@attr': 'val'}} >>> m.head.to_str() '<head><a>A</a><b attr="val">B</b></head>' List: >>> m.head = [{'a': i} for i in 'ABC'] >>> m.head.to_str() '<head><a>A</a><a>B</a><a>C</a></head>' Literals: >>> m.head.leaf = 'A' >>> m.head.leaf.get() 'A' """ try: # Searches for a node to assign to. element = next(self._xml.iterchildren(tag=name)) except StopIteration: # There is no such node in the XML tree. We create a new one # with current root as parent (self._xml). element = etree.SubElement(self._xml, name) if isinstance(value, dict): self.assign_dict(element, value) elif isinstance(value, (list, tuple, set)): self.assign_sequence_or_set(element, value) else: # Literal value. self.assign_literal(element, value) # Clear the aliases. self._aliases = None
[docs] def assign_dict(self, node, xml_dict): """Assigns a Python dict to a ``lxml`` node. :param node: A node to assign the dict to. :param xml_dict: The dict with attributes/children to use. """ new_node = etree.Element(node.tag) # Replaces the previous node with the new one self._xml.replace(node, new_node) # Copies #text and @attrs from the xml_dict helpers.dict_to_etree(xml_dict, new_node)
@staticmethod
[docs] def assign_sequence_or_set(element, value): element.clear() for item in value: temp_element = etree.Element('temp') helpers.dict_to_etree(item, temp_element) for child in temp_element.iterchildren(): element.append(child) del temp_element
@staticmethod
[docs] def assign_literal(element, value): u"""Assigns a literal. If a given node doesn't exist, it will be created. :param etree.Element element: element to which we assign. :param value: the value to assign """ # Searches for a conversion method specific to the type of value. helper = helpers.CAST_DICT.get(type(value), str) # Removes all children and attributes. element.clear() element.text = helper(value)
[docs] def to_dict(self, **kw): u"""Converts the lxml object to a dict. possible kwargs: without_comments: bool """ _, value = helpers.etree_to_dict(self._xml, **kw).popitem() return value
def _get_aliases(self): u"""Creates a dict with aliases. The key is a normalized tagname, value the original tagname. """ if self._aliases is None: self._aliases = {} if self._xml is not None: for child in self._xml.iterchildren(): self._aliases[helpers.normalize_tag(child.tag)] = child.tag return self._aliases
[docs] def xpath( self, path, namespaces=None, regexp=False, smart_strings=True, single_use=False, ): u"""Executes XPath query on the ``lxml`` object and returns a correct object. :param str path: XPath string e.g., 'cars'/'car' :param str/dict namespaces: e.g., 'exslt', 're' or ``{'re': "http://exslt.org/regular-expressions"}`` :param bool regexp: if ``True`` and no namespaces is provided, it will use ``exslt`` namespace :param bool smart_strings: :param bool single_use: faster method for using only once. Does not create ``XPathEvaluator`` instance. >>> root = mappet.Mappet("<root><a>aB</a><b>aBc</b></root>") >>> root.XPath( "//*[re:test(., '^abc$', 'i')]", namespaces='exslt', regexp=True, ) """ if ( namespaces in ['exslt', 're'] or (regexp and not namespaces) ): namespaces = {'re': "http://exslt.org/regular-expressions"} if single_use: node = self._xml.xpath(path) else: xpe = self.xpath_evaluator( namespaces=namespaces, regexp=regexp, smart_strings=smart_strings ) node = xpe(path) if len(node) == 1: node = node[0] if len(node): return self.__class__(node) else: return Literal(node) return node
[docs] def xpath_evaluator(self, namespaces=None, regexp=False, smart_strings=True): u"""Creates an XPathEvaluator instance for an ElementTree or an Element. :returns: ``XPathEvaluator`` instance """ return etree.XPathEvaluator( self._xml, namespaces=namespaces, regexp=regexp, smart_strings=smart_strings )
[docs] def keys(self): """Returns a set of node's keys.""" return set(self._get_aliases().keys())