1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import textwrap
19 import zlib
20 import gzip
21
22 this_dir = os.path.dirname(__file__)
23 if this_dir not in sys.path:
24 sys.path.insert(0, this_dir)
25
26 from common_imports import etree, StringIO, BytesIO, HelperTestCase
27 from common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
28 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
29 from common_imports import canonicalize, sorted, _str, _bytes
30
31 print("")
32 print("TESTED VERSION: %s" % etree.__version__)
33 print(" Python: " + repr(sys.version_info))
34 print(" lxml.etree: " + repr(etree.LXML_VERSION))
35 print(" libxml used: " + repr(etree.LIBXML_VERSION))
36 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
37 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
38 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
39 print("")
40
41 try:
42 _unicode = unicode
43 except NameError:
44
45 _unicode = str
46
48 """Tests only for etree, not ElementTree"""
49 etree = etree
50
61
70
77
79 Element = self.etree.Element
80 el = Element('name')
81 self.assertRaises(ValueError, Element, '{}')
82 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
83
84 self.assertRaises(ValueError, Element, '{test}')
85 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
86
94
96 Element = self.etree.Element
97 self.assertRaises(ValueError, Element, "p'name")
98 self.assertRaises(ValueError, Element, 'p"name')
99
100 self.assertRaises(ValueError, Element, "{test}p'name")
101 self.assertRaises(ValueError, Element, '{test}p"name')
102
103 el = Element('name')
104 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
105 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
106
108 Element = self.etree.Element
109 self.assertRaises(ValueError, Element, ' name ')
110 self.assertRaises(ValueError, Element, 'na me')
111 self.assertRaises(ValueError, Element, '{test} name')
112
113 el = Element('name')
114 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
115
123
131
133 Element = self.etree.Element
134 SubElement = self.etree.SubElement
135
136 el = Element('name')
137 self.assertRaises(ValueError, SubElement, el, "p'name")
138 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
139
140 self.assertRaises(ValueError, SubElement, el, 'p"name')
141 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
142
151
160
162 QName = self.etree.QName
163 self.assertRaises(ValueError, QName, '')
164 self.assertRaises(ValueError, QName, 'test', '')
165
167 QName = self.etree.QName
168 self.assertRaises(ValueError, QName, 'p:name')
169 self.assertRaises(ValueError, QName, 'test', 'p:name')
170
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, ' name ')
174 self.assertRaises(ValueError, QName, 'na me')
175 self.assertRaises(ValueError, QName, 'test', ' name')
176
184
186
187 QName = self.etree.QName
188 qname1 = QName('http://myns', 'a')
189 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
190
191 qname2 = QName(a)
192 self.assertEqual(a.tag, qname1.text)
193 self.assertEqual(qname1.text, qname2.text)
194 self.assertEqual(qname1, qname2)
195
197
198 etree = self.etree
199 qname = etree.QName('http://myns', 'a')
200 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
201 a.text = qname
202
203 self.assertEqual("p:a", a.text)
204
213
228
234
244
256
258 Element = self.etree.Element
259
260 keys = ["attr%d" % i for i in range(10)]
261 values = ["TEST-%d" % i for i in range(10)]
262 items = list(zip(keys, values))
263
264 root = Element("root")
265 for key, value in items:
266 root.set(key, value)
267 self.assertEqual(keys, root.attrib.keys())
268 self.assertEqual(values, root.attrib.values())
269
270 root2 = Element("root2", root.attrib,
271 attr_99='TOAST-1', attr_98='TOAST-2')
272 self.assertEqual(['attr_98', 'attr_99'] + keys,
273 root2.attrib.keys())
274 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
275 root2.attrib.values())
276
277 self.assertEqual(keys, root.attrib.keys())
278 self.assertEqual(values, root.attrib.values())
279
287
301
323
325 XML = self.etree.XML
326 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
327
328 root = XML(xml)
329 self.etree.strip_elements(root, 'a')
330 self.assertEqual(_bytes('<test><x></x></test>'),
331 self._writeElement(root))
332
333 root = XML(xml)
334 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
335 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
336 self._writeElement(root))
337
338 root = XML(xml)
339 self.etree.strip_elements(root, 'c')
340 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
341 self._writeElement(root))
342
344 XML = self.etree.XML
345 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
346
347 root = XML(xml)
348 self.etree.strip_elements(root, 'a')
349 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
350 self._writeElement(root))
351
352 root = XML(xml)
353 self.etree.strip_elements(root, '{urn:a}b', 'c')
354 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
355 self._writeElement(root))
356
357 root = XML(xml)
358 self.etree.strip_elements(root, '{urn:a}*', 'c')
359 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
360 self._writeElement(root))
361
362 root = XML(xml)
363 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
364 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
365 self._writeElement(root))
366
385
411
438
465
484
497
508
514
516 XML = self.etree.XML
517 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
518 self.assertEqual(root[0].target, "mypi")
519 self.assertEqual(root[0].get('my'), "1")
520 self.assertEqual(root[0].get('test'), " abc ")
521 self.assertEqual(root[0].get('quotes'), "' '")
522 self.assertEqual(root[0].get('only'), None)
523 self.assertEqual(root[0].get('names'), None)
524 self.assertEqual(root[0].get('nope'), None)
525
527 XML = self.etree.XML
528 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
529 self.assertEqual(root[0].target, "mypi")
530 self.assertEqual(root[0].attrib['my'], "1")
531 self.assertEqual(root[0].attrib['test'], " abc ")
532 self.assertEqual(root[0].attrib['quotes'], "' '")
533 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
534 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
535 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
536
538
539 ProcessingInstruction = self.etree.ProcessingInstruction
540
541 a = ProcessingInstruction("PI", "ONE")
542 b = copy.deepcopy(a)
543 b.text = "ANOTHER"
544
545 self.assertEqual('ONE', a.text)
546 self.assertEqual('ANOTHER', b.text)
547
563
578
590
609
614
627
638
639 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
640 events = list(iterparse(f, events=('end', 'comment')))
641 root = events[-1][1]
642 self.assertEqual(6, len(events))
643 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
644 [ name(*item) for item in events ])
645 self.assertEqual(
646 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
647 tostring(root))
648
660
661 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
662 events = list(iterparse(f, events=('end', 'pi')))
663 root = events[-2][1]
664 self.assertEqual(8, len(events))
665 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
666 ('pid','d'), 'a', ('pie','e')],
667 [ name(*item) for item in events ])
668 self.assertEqual(
669 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
670 tostring(ElementTree(root)))
671
686
692
694 iterparse = self.etree.iterparse
695 f = BytesIO('<a><b><c/></a>')
696 it = iterparse(f, events=('start', 'end'), recover=True)
697 events = [(ev, el.tag) for ev, el in it]
698 root = it.root
699 self.assertTrue(root is not None)
700
701 self.assertEqual(1, events.count(('start', 'a')))
702 self.assertEqual(1, events.count(('end', 'a')))
703
704 self.assertEqual(1, events.count(('start', 'b')))
705 self.assertEqual(1, events.count(('end', 'b')))
706
707 self.assertEqual(1, events.count(('start', 'c')))
708 self.assertEqual(1, events.count(('end', 'c')))
709
711 iterparse = self.etree.iterparse
712 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
713 it = iterparse(f, events=('start', 'end'), recover=True)
714 events = [(ev, el.tag) for ev, el in it]
715 root = it.root
716 self.assertTrue(root is not None)
717
718 self.assertEqual(1, events.count(('start', 'a')))
719 self.assertEqual(1, events.count(('end', 'a')))
720
721 self.assertEqual(2, events.count(('start', 'b')))
722 self.assertEqual(2, events.count(('end', 'b')))
723
724 self.assertEqual(2, events.count(('start', 'c')))
725 self.assertEqual(2, events.count(('end', 'c')))
726
728 iterparse = self.etree.iterparse
729 f = BytesIO("""
730 <a> \n \n <b> b test </b> \n
731
732 \n\t <c> \n </c> </a> \n """)
733 iterator = iterparse(f, remove_blank_text=True)
734 text = [ (element.text, element.tail)
735 for event, element in iterator ]
736 self.assertEqual(
737 [(" b test ", None), (" \n ", None), (None, None)],
738 text)
739
741 iterparse = self.etree.iterparse
742 f = BytesIO('<a><b><d/></b><c/></a>')
743
744 iterator = iterparse(f, tag="b", events=('start', 'end'))
745 events = list(iterator)
746 root = iterator.root
747 self.assertEqual(
748 [('start', root[0]), ('end', root[0])],
749 events)
750
752 iterparse = self.etree.iterparse
753 f = BytesIO('<a><b><d/></b><c/></a>')
754
755 iterator = iterparse(f, tag="*", events=('start', 'end'))
756 events = list(iterator)
757 self.assertEqual(
758 8,
759 len(events))
760
762 iterparse = self.etree.iterparse
763 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
764
765 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
766 events = list(iterator)
767 root = iterator.root
768 self.assertEqual(
769 [('start', root[0]), ('end', root[0])],
770 events)
771
773 iterparse = self.etree.iterparse
774 f = BytesIO('<a><b><d/></b><c/></a>')
775 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
776 events = list(iterator)
777 root = iterator.root
778 self.assertEqual(
779 [('start', root[0]), ('end', root[0])],
780 events)
781
782 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
783 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
784 events = list(iterator)
785 root = iterator.root
786 self.assertEqual([], events)
787
789 iterparse = self.etree.iterparse
790 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
791 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
792 events = list(iterator)
793 self.assertEqual(8, len(events))
794
796 iterparse = self.etree.iterparse
797 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
798 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual([], events)
801
802 f = BytesIO('<a><b><d/></b><c/></a>')
803 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
804 events = list(iterator)
805 self.assertEqual(8, len(events))
806
808 text = _str('Søk på nettet')
809 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
810 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
811 ).encode('iso-8859-1')
812
813 self.assertRaises(self.etree.ParseError,
814 list, self.etree.iterparse(BytesIO(xml_latin1)))
815
817 text = _str('Søk på nettet', encoding="UTF-8")
818 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
819 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
820 ).encode('iso-8859-1')
821
822 iterator = self.etree.iterparse(BytesIO(xml_latin1),
823 encoding="iso-8859-1")
824 self.assertEqual(1, len(list(iterator)))
825
826 a = iterator.root
827 self.assertEqual(a.text, text)
828
830 tostring = self.etree.tostring
831 f = BytesIO('<root><![CDATA[test]]></root>')
832 context = self.etree.iterparse(f, strip_cdata=False)
833 content = [ el.text for event,el in context ]
834
835 self.assertEqual(['test'], content)
836 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
837 tostring(context.root))
838
842
847
866
867
868
891
892
893
904 def end(self, tag):
905 events.append("end")
906 assertEqual("TAG", tag)
907 def close(self):
908 return "DONE"
909
910 parser = self.etree.XMLParser(target=Target())
911 tree = self.etree.ElementTree()
912
913 self.assertRaises(TypeError,
914 tree.parse, BytesIO("<TAG/>"), parser=parser)
915 self.assertEqual(["start", "end"], events)
916
918
919 events = []
920 class Target(object):
921 def start(self, tag, attrib):
922 events.append("start-" + tag)
923 def end(self, tag):
924 events.append("end-" + tag)
925 if tag == 'a':
926 raise ValueError("dead and gone")
927 def data(self, data):
928 events.append("data-" + data)
929 def close(self):
930 events.append("close")
931 return "DONE"
932
933 parser = self.etree.XMLParser(target=Target())
934
935 try:
936 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
937 done = parser.close()
938 self.fail("error expected, but parsing succeeded")
939 except ValueError:
940 done = 'value error received as expected'
941
942 self.assertEqual(["start-root", "data-A", "start-a",
943 "data-ca", "end-a", "close"],
944 events)
945
947
948 events = []
949 class Target(object):
950 def start(self, tag, attrib):
951 events.append("start-" + tag)
952 def end(self, tag):
953 events.append("end-" + tag)
954 if tag == 'a':
955 raise ValueError("dead and gone")
956 def data(self, data):
957 events.append("data-" + data)
958 def close(self):
959 events.append("close")
960 return "DONE"
961
962 parser = self.etree.XMLParser(target=Target())
963
964 try:
965 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
966 parser=parser)
967 self.fail("error expected, but parsing succeeded")
968 except ValueError:
969 done = 'value error received as expected'
970
971 self.assertEqual(["start-root", "data-A", "start-a",
972 "data-ca", "end-a", "close"],
973 events)
974
976
977 events = []
978 class Target(object):
979 def start(self, tag, attrib):
980 events.append("start-" + tag)
981 def end(self, tag):
982 events.append("end-" + tag)
983 def data(self, data):
984 events.append("data-" + data)
985 def comment(self, text):
986 events.append("comment-" + text)
987 def close(self):
988 return "DONE"
989
990 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
991
992 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
993 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
994 done = parser.close()
995
996 self.assertEqual("DONE", done)
997 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
998 "start-sub", "end-sub", "data-B", "end-root"],
999 events)
1000
1006 def end(self, tag):
1007 events.append("end-" + tag)
1008 def data(self, data):
1009 events.append("data-" + data)
1010 def comment(self, text):
1011 events.append("comment-" + text)
1012 def close(self):
1013 return "DONE"
1014
1015 parser = self.etree.XMLParser(target=Target())
1016
1017 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1018 done = parser.close()
1019
1020 self.assertEqual("DONE", done)
1021 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1022 "start-sub", "end-sub", "comment-c", "data-B",
1023 "end-root", "comment-d"],
1024 events)
1025
1027 events = []
1028 class Target(object):
1029 def start(self, tag, attrib):
1030 events.append("start-" + tag)
1031 def end(self, tag):
1032 events.append("end-" + tag)
1033 def data(self, data):
1034 events.append("data-" + data)
1035 def pi(self, target, data):
1036 events.append("pi-" + target + "-" + data)
1037 def close(self):
1038 return "DONE"
1039
1040 parser = self.etree.XMLParser(target=Target())
1041
1042 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1043 done = parser.close()
1044
1045 self.assertEqual("DONE", done)
1046 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1047 "data-B", "end-root", "pi-test-c"],
1048 events)
1049
1051 events = []
1052 class Target(object):
1053 def start(self, tag, attrib):
1054 events.append("start-" + tag)
1055 def end(self, tag):
1056 events.append("end-" + tag)
1057 def data(self, data):
1058 events.append("data-" + data)
1059 def close(self):
1060 return "DONE"
1061
1062 parser = self.etree.XMLParser(target=Target(),
1063 strip_cdata=False)
1064
1065 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1066 done = parser.close()
1067
1068 self.assertEqual("DONE", done)
1069 self.assertEqual(["start-root", "data-A", "start-a",
1070 "data-ca", "end-a", "data-B", "end-root"],
1071 events)
1072
1074 events = []
1075 class Target(object):
1076 def start(self, tag, attrib):
1077 events.append("start-" + tag)
1078 def end(self, tag):
1079 events.append("end-" + tag)
1080 def data(self, data):
1081 events.append("data-" + data)
1082 def close(self):
1083 events.append("close")
1084 return "DONE"
1085
1086 parser = self.etree.XMLParser(target=Target(),
1087 recover=True)
1088
1089 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1090 done = parser.close()
1091
1092 self.assertEqual("DONE", done)
1093 self.assertEqual(["start-root", "data-A", "start-a",
1094 "data-ca", "end-a", "data-B",
1095 "end-root", "close"],
1096 events)
1097
1107
1117
1126
1136
1138 iterwalk = self.etree.iterwalk
1139 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1140
1141 iterator = iterwalk(root, events=('start','end'))
1142 events = list(iterator)
1143 self.assertEqual(
1144 [('start', root), ('start', root[0]), ('end', root[0]),
1145 ('start', root[1]), ('end', root[1]), ('end', root)],
1146 events)
1147
1158
1160 iterwalk = self.etree.iterwalk
1161 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1162
1163 attr_name = '{testns}bla'
1164 events = []
1165 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1166 for event, elem in iterator:
1167 events.append(event)
1168 if event == 'start':
1169 if elem.tag != '{ns1}a':
1170 elem.set(attr_name, 'value')
1171
1172 self.assertEqual(
1173 ['start-ns', 'start', 'start', 'start-ns', 'start',
1174 'end', 'end-ns', 'end', 'end', 'end-ns'],
1175 events)
1176
1177 self.assertEqual(
1178 None,
1179 root.get(attr_name))
1180 self.assertEqual(
1181 'value',
1182 root[0].get(attr_name))
1183
1194
1196 parse = self.etree.parse
1197 parser = self.etree.XMLParser(dtd_validation=True)
1198 assertEqual = self.assertEqual
1199 test_url = _str("__nosuch.dtd")
1200
1201 class MyResolver(self.etree.Resolver):
1202 def resolve(self, url, id, context):
1203 assertEqual(url, test_url)
1204 return self.resolve_string(
1205 _str('''<!ENTITY myentity "%s">
1206 <!ELEMENT doc ANY>''') % url, context)
1207
1208 parser.resolvers.add(MyResolver())
1209
1210 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1211 tree = parse(StringIO(xml), parser)
1212 root = tree.getroot()
1213 self.assertEqual(root.text, test_url)
1214
1216 parse = self.etree.parse
1217 parser = self.etree.XMLParser(dtd_validation=True)
1218 assertEqual = self.assertEqual
1219 test_url = _str("__nosuch.dtd")
1220
1221 class MyResolver(self.etree.Resolver):
1222 def resolve(self, url, id, context):
1223 assertEqual(url, test_url)
1224 return self.resolve_string(
1225 (_str('''<!ENTITY myentity "%s">
1226 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1227 context)
1228
1229 parser.resolvers.add(MyResolver())
1230
1231 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1232 tree = parse(StringIO(xml), parser)
1233 root = tree.getroot()
1234 self.assertEqual(root.text, test_url)
1235
1237 parse = self.etree.parse
1238 parser = self.etree.XMLParser(dtd_validation=True)
1239 assertEqual = self.assertEqual
1240 test_url = _str("__nosuch.dtd")
1241
1242 class MyResolver(self.etree.Resolver):
1243 def resolve(self, url, id, context):
1244 assertEqual(url, test_url)
1245 return self.resolve_file(
1246 SillyFileLike(
1247 _str('''<!ENTITY myentity "%s">
1248 <!ELEMENT doc ANY>''') % url), context)
1249
1250 parser.resolvers.add(MyResolver())
1251
1252 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1253 tree = parse(StringIO(xml), parser)
1254 root = tree.getroot()
1255 self.assertEqual(root.text, test_url)
1256
1258 parse = self.etree.parse
1259 parser = self.etree.XMLParser(attribute_defaults=True)
1260 assertEqual = self.assertEqual
1261 test_url = _str("__nosuch.dtd")
1262
1263 class MyResolver(self.etree.Resolver):
1264 def resolve(self, url, id, context):
1265 assertEqual(url, test_url)
1266 return self.resolve_filename(
1267 fileInTestDir('test.dtd'), context)
1268
1269 parser.resolvers.add(MyResolver())
1270
1271 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1272 tree = parse(StringIO(xml), parser)
1273 root = tree.getroot()
1274 self.assertEqual(
1275 root.attrib, {'default': 'valueA'})
1276 self.assertEqual(
1277 root[0].attrib, {'default': 'valueB'})
1278
1293
1294 parser.resolvers.add(MyResolver())
1295
1296 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1297 tree = parse(StringIO(xml), parser,
1298 base_url=fileUrlInTestDir('__test.xml'))
1299 root = tree.getroot()
1300 self.assertEqual(
1301 root.attrib, {'default': 'valueA'})
1302 self.assertEqual(
1303 root[0].attrib, {'default': 'valueB'})
1304
1306 parse = self.etree.parse
1307 parser = self.etree.XMLParser(attribute_defaults=True)
1308 assertEqual = self.assertEqual
1309 test_url = _str("__nosuch.dtd")
1310
1311 class MyResolver(self.etree.Resolver):
1312 def resolve(self, url, id, context):
1313 assertEqual(url, test_url)
1314 return self.resolve_file(
1315 open(fileInTestDir('test.dtd'), 'rb'), context)
1316
1317 parser.resolvers.add(MyResolver())
1318
1319 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1320 tree = parse(StringIO(xml), parser)
1321 root = tree.getroot()
1322 self.assertEqual(
1323 root.attrib, {'default': 'valueA'})
1324 self.assertEqual(
1325 root[0].attrib, {'default': 'valueB'})
1326
1328 parse = self.etree.parse
1329 parser = self.etree.XMLParser(load_dtd=True)
1330 assertEqual = self.assertEqual
1331 test_url = _str("__nosuch.dtd")
1332
1333 class check(object):
1334 resolved = False
1335
1336 class MyResolver(self.etree.Resolver):
1337 def resolve(self, url, id, context):
1338 assertEqual(url, test_url)
1339 check.resolved = True
1340 return self.resolve_empty(context)
1341
1342 parser.resolvers.add(MyResolver())
1343
1344 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1345 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1346 self.assertTrue(check.resolved)
1347
1354
1355 class MyResolver(self.etree.Resolver):
1356 def resolve(self, url, id, context):
1357 raise _LocalException
1358
1359 parser.resolvers.add(MyResolver())
1360
1361 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1362 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1363
1364 if etree.LIBXML_VERSION > (2,6,20):
1381
1383 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1384 <root>
1385 <child1/>
1386 <child2/>
1387 <child3> </child3>
1388 </root>''')
1389
1390 parser = self.etree.XMLParser(resolve_entities=False)
1391 root = etree.fromstring(xml, parser)
1392 self.assertEqual([ el.tag for el in root ],
1393 ['child1', 'child2', 'child3'])
1394
1395 root[0] = root[-1]
1396 self.assertEqual([ el.tag for el in root ],
1397 ['child3', 'child2'])
1398 self.assertEqual(root[0][0].text, ' ')
1399 self.assertEqual(root[0][0].name, 'nbsp')
1400
1416
1423
1425 Entity = self.etree.Entity
1426 self.assertRaises(ValueError, Entity, 'a b c')
1427 self.assertRaises(ValueError, Entity, 'a,b')
1428 self.assertRaises(ValueError, Entity, 'a\0b')
1429 self.assertRaises(ValueError, Entity, '#abc')
1430 self.assertRaises(ValueError, Entity, '#xxyz')
1431
1444
1457
1459 CDATA = self.etree.CDATA
1460 Element = self.etree.Element
1461
1462 root = Element("root")
1463 cdata = CDATA('test')
1464
1465 self.assertRaises(TypeError,
1466 setattr, root, 'tail', cdata)
1467 self.assertRaises(TypeError,
1468 root.set, 'attr', cdata)
1469 self.assertRaises(TypeError,
1470 operator.setitem, root.attrib, 'attr', cdata)
1471
1480
1489
1490
1500
1509
1511 Element = self.etree.Element
1512 SubElement = self.etree.SubElement
1513 root = Element('root')
1514 self.assertRaises(ValueError, root.append, root)
1515 child = SubElement(root, 'child')
1516 self.assertRaises(ValueError, child.append, root)
1517 child2 = SubElement(child, 'child2')
1518 self.assertRaises(ValueError, child2.append, root)
1519 self.assertRaises(ValueError, child2.append, child)
1520 self.assertEqual('child2', root[0][0].tag)
1521
1534
1547
1558
1569
1579
1589
1605
1621
1627
1642
1655
1670
1683
1698
1711
1726
1739
1740
1748
1749
1759
1760
1775
1776
1786
1787
1798
1825
1826
1828 self.assertRaises(TypeError, self.etree.dump, None)
1829
1842
1855
1876
1885
1894
1903
1912
1921
1923 XML = self.etree.XML
1924
1925 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1926 result = []
1927 for el in root.iterchildren(tag=['two', 'three']):
1928 result.append(el.text)
1929 self.assertEqual(['Two', 'Bla', None], result)
1930
1932 XML = self.etree.XML
1933
1934 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1935 result = []
1936 for el in root.iterchildren('two', 'three'):
1937 result.append(el.text)
1938 self.assertEqual(['Two', 'Bla', None], result)
1939
1948
1969
1991
1993 Element = self.etree.Element
1994 SubElement = self.etree.SubElement
1995
1996 a = Element('a')
1997 b = SubElement(a, 'b')
1998 c = SubElement(a, 'c')
1999 d = SubElement(b, 'd')
2000 self.assertEqual(
2001 [b, a],
2002 list(d.iterancestors(tag=('a', 'b'))))
2003 self.assertEqual(
2004 [b, a],
2005 list(d.iterancestors('a', 'b')))
2006
2007 self.assertEqual(
2008 [],
2009 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2010 self.assertEqual(
2011 [],
2012 list(d.iterancestors('w', 'x', 'y', 'z')))
2013
2014 self.assertEqual(
2015 [],
2016 list(d.iterancestors(tag=('d', 'x'))))
2017 self.assertEqual(
2018 [],
2019 list(d.iterancestors('d', 'x')))
2020
2021 self.assertEqual(
2022 [b, a],
2023 list(d.iterancestors(tag=('b', '*'))))
2024 self.assertEqual(
2025 [b, a],
2026 list(d.iterancestors('b', '*')))
2027
2028 self.assertEqual(
2029 [b],
2030 list(d.iterancestors(tag=('b', 'c'))))
2031 self.assertEqual(
2032 [b],
2033 list(d.iterancestors('b', 'c')))
2034
2051
2053 Element = self.etree.Element
2054 SubElement = self.etree.SubElement
2055
2056 a = Element('a')
2057 b = SubElement(a, 'b')
2058 c = SubElement(a, 'c')
2059 d = SubElement(b, 'd')
2060 e = SubElement(c, 'e')
2061
2062 self.assertEqual(
2063 [],
2064 list(a.iterdescendants('a')))
2065 self.assertEqual(
2066 [],
2067 list(a.iterdescendants(tag='a')))
2068
2069 a2 = SubElement(e, 'a')
2070 self.assertEqual(
2071 [a2],
2072 list(a.iterdescendants('a')))
2073
2074 self.assertEqual(
2075 [a2],
2076 list(c.iterdescendants('a')))
2077 self.assertEqual(
2078 [a2],
2079 list(c.iterdescendants(tag='a')))
2080
2082 Element = self.etree.Element
2083 SubElement = self.etree.SubElement
2084
2085 a = Element('a')
2086 b = SubElement(a, 'b')
2087 c = SubElement(a, 'c')
2088 d = SubElement(b, 'd')
2089 e = SubElement(c, 'e')
2090
2091 self.assertEqual(
2092 [b, e],
2093 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2094 self.assertEqual(
2095 [b, e],
2096 list(a.iterdescendants('a', 'b', 'e')))
2097
2098 a2 = SubElement(e, 'a')
2099 self.assertEqual(
2100 [b, a2],
2101 list(a.iterdescendants(tag=('a', 'b'))))
2102 self.assertEqual(
2103 [b, a2],
2104 list(a.iterdescendants('a', 'b')))
2105
2106 self.assertEqual(
2107 [],
2108 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2109 self.assertEqual(
2110 [],
2111 list(c.iterdescendants('x', 'y', 'z')))
2112
2113 self.assertEqual(
2114 [b, d, c, e, a2],
2115 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2116 self.assertEqual(
2117 [b, d, c, e, a2],
2118 list(a.iterdescendants('x', 'y', 'z', '*')))
2119
2137
2154
2172
2196
2198 Element = self.etree.Element
2199 SubElement = self.etree.SubElement
2200
2201 a = Element('a')
2202 b = SubElement(a, 'b')
2203 c = SubElement(a, 'c')
2204 d = SubElement(b, 'd')
2205 self.assertEqual(
2206 [],
2207 list(a.itersiblings(tag='XXX')))
2208 self.assertEqual(
2209 [c],
2210 list(b.itersiblings(tag='c')))
2211 self.assertEqual(
2212 [c],
2213 list(b.itersiblings(tag='*')))
2214 self.assertEqual(
2215 [b],
2216 list(c.itersiblings(preceding=True, tag='b')))
2217 self.assertEqual(
2218 [],
2219 list(c.itersiblings(preceding=True, tag='c')))
2220
2222 Element = self.etree.Element
2223 SubElement = self.etree.SubElement
2224
2225 a = Element('a')
2226 b = SubElement(a, 'b')
2227 c = SubElement(a, 'c')
2228 d = SubElement(b, 'd')
2229 e = SubElement(a, 'e')
2230 self.assertEqual(
2231 [],
2232 list(a.itersiblings(tag=('XXX', 'YYY'))))
2233 self.assertEqual(
2234 [c, e],
2235 list(b.itersiblings(tag=('c', 'd', 'e'))))
2236 self.assertEqual(
2237 [b],
2238 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2239 self.assertEqual(
2240 [c, b],
2241 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2242
2244 parseid = self.etree.parseid
2245 XML = self.etree.XML
2246 xml_text = _bytes('''
2247 <!DOCTYPE document [
2248 <!ELEMENT document (h1,p)*>
2249 <!ELEMENT h1 (#PCDATA)>
2250 <!ATTLIST h1 myid ID #REQUIRED>
2251 <!ELEMENT p (#PCDATA)>
2252 <!ATTLIST p someid ID #REQUIRED>
2253 ]>
2254 <document>
2255 <h1 myid="chapter1">...</h1>
2256 <p id="note1" class="note">...</p>
2257 <p>Regular paragraph.</p>
2258 <p xml:id="xmlid">XML:ID paragraph.</p>
2259 <p someid="warn1" class="warning">...</p>
2260 </document>
2261 ''')
2262
2263 tree, dic = parseid(BytesIO(xml_text))
2264 root = tree.getroot()
2265 root2 = XML(xml_text)
2266 self.assertEqual(self._writeElement(root),
2267 self._writeElement(root2))
2268 expected = {
2269 "chapter1" : root[0],
2270 "xmlid" : root[3],
2271 "warn1" : root[4]
2272 }
2273 self.assertTrue("chapter1" in dic)
2274 self.assertTrue("warn1" in dic)
2275 self.assertTrue("xmlid" in dic)
2276 self._checkIDDict(dic, expected)
2277
2279 XMLDTDID = self.etree.XMLDTDID
2280 XML = self.etree.XML
2281 xml_text = _bytes('''
2282 <!DOCTYPE document [
2283 <!ELEMENT document (h1,p)*>
2284 <!ELEMENT h1 (#PCDATA)>
2285 <!ATTLIST h1 myid ID #REQUIRED>
2286 <!ELEMENT p (#PCDATA)>
2287 <!ATTLIST p someid ID #REQUIRED>
2288 ]>
2289 <document>
2290 <h1 myid="chapter1">...</h1>
2291 <p id="note1" class="note">...</p>
2292 <p>Regular paragraph.</p>
2293 <p xml:id="xmlid">XML:ID paragraph.</p>
2294 <p someid="warn1" class="warning">...</p>
2295 </document>
2296 ''')
2297
2298 root, dic = XMLDTDID(xml_text)
2299 root2 = XML(xml_text)
2300 self.assertEqual(self._writeElement(root),
2301 self._writeElement(root2))
2302 expected = {
2303 "chapter1" : root[0],
2304 "xmlid" : root[3],
2305 "warn1" : root[4]
2306 }
2307 self.assertTrue("chapter1" in dic)
2308 self.assertTrue("warn1" in dic)
2309 self.assertTrue("xmlid" in dic)
2310 self._checkIDDict(dic, expected)
2311
2313 XMLDTDID = self.etree.XMLDTDID
2314 XML = self.etree.XML
2315 xml_text = _bytes('''
2316 <document>
2317 <h1 myid="chapter1">...</h1>
2318 <p id="note1" class="note">...</p>
2319 <p>Regular paragraph.</p>
2320 <p someid="warn1" class="warning">...</p>
2321 </document>
2322 ''')
2323
2324 root, dic = XMLDTDID(xml_text)
2325 root2 = XML(xml_text)
2326 self.assertEqual(self._writeElement(root),
2327 self._writeElement(root2))
2328 expected = {}
2329 self._checkIDDict(dic, expected)
2330
2332 XMLDTDID = self.etree.XMLDTDID
2333 XML = self.etree.XML
2334 xml_text = _bytes('''
2335 <!DOCTYPE document [
2336 <!ELEMENT document (h1,p)*>
2337 <!ELEMENT h1 (#PCDATA)>
2338 <!ATTLIST h1 myid ID #REQUIRED>
2339 <!ELEMENT p (#PCDATA)>
2340 <!ATTLIST p someid ID #REQUIRED>
2341 ]>
2342 <document>
2343 <h1 myid="chapter1">...</h1>
2344 <p id="note1" class="note">...</p>
2345 <p>Regular paragraph.</p>
2346 <p xml:id="xmlid">XML:ID paragraph.</p>
2347 <p someid="warn1" class="warning">...</p>
2348 </document>
2349 ''')
2350
2351 parser = etree.XMLParser(collect_ids=False)
2352 root, dic = XMLDTDID(xml_text, parser=parser)
2353 root2 = XML(xml_text)
2354 self.assertEqual(self._writeElement(root),
2355 self._writeElement(root2))
2356 self.assertFalse(dic)
2357 self._checkIDDict(dic, {})
2358
2360 self.assertEqual(len(dic),
2361 len(expected))
2362 self.assertEqual(sorted(dic.items()),
2363 sorted(expected.items()))
2364 if sys.version_info < (3,):
2365 self.assertEqual(sorted(dic.iteritems()),
2366 sorted(expected.iteritems()))
2367 self.assertEqual(sorted(dic.keys()),
2368 sorted(expected.keys()))
2369 if sys.version_info < (3,):
2370 self.assertEqual(sorted(dic.iterkeys()),
2371 sorted(expected.iterkeys()))
2372 if sys.version_info < (3,):
2373 self.assertEqual(sorted(dic.values()),
2374 sorted(expected.values()))
2375 self.assertEqual(sorted(dic.itervalues()),
2376 sorted(expected.itervalues()))
2377
2379 etree = self.etree
2380
2381 r = {'foo': 'http://ns.infrae.com/foo'}
2382 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2383 self.assertEqual(
2384 'foo',
2385 e.prefix)
2386 self.assertEqual(
2387 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2388 self._writeElement(e))
2389
2391 etree = self.etree
2392
2393 r = {None: 'http://ns.infrae.com/foo'}
2394 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2395 self.assertEqual(
2396 None,
2397 e.prefix)
2398 self.assertEqual(
2399 '{http://ns.infrae.com/foo}bar',
2400 e.tag)
2401 self.assertEqual(
2402 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2403 self._writeElement(e))
2404
2406 etree = self.etree
2407
2408 r = {None: 'http://ns.infrae.com/foo',
2409 'hoi': 'http://ns.infrae.com/hoi'}
2410 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2411 e.set('{http://ns.infrae.com/hoi}test', 'value')
2412 self.assertEqual(
2413 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2414 self._writeElement(e))
2415
2417 etree = self.etree
2418
2419 root = etree.Element('{http://test/ns}root',
2420 nsmap={None: 'http://test/ns'})
2421 sub = etree.Element('{http://test/ns}sub',
2422 nsmap={'test': 'http://test/ns'})
2423
2424 sub.attrib['{http://test/ns}attr'] = 'value'
2425 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2426 self.assertEqual(
2427 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2428 etree.tostring(sub))
2429
2430 root.append(sub)
2431 self.assertEqual(
2432 _bytes('<root xmlns="http://test/ns">'
2433 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2434 '</root>'),
2435 etree.tostring(root))
2436
2438 etree = self.etree
2439
2440 root = etree.Element('root')
2441 sub = etree.Element('{http://test/ns}sub',
2442 nsmap={'test': 'http://test/ns'})
2443
2444 sub.attrib['{http://test/ns}attr'] = 'value'
2445 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2446 self.assertEqual(
2447 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2448 etree.tostring(sub))
2449
2450 root.append(sub)
2451 self.assertEqual(
2452 _bytes('<root>'
2453 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2454 '</root>'),
2455 etree.tostring(root))
2456
2458 etree = self.etree
2459
2460 root = etree.Element('root')
2461 sub = etree.Element('{http://test/ns}sub',
2462 nsmap={None: 'http://test/ns'})
2463
2464 sub.attrib['{http://test/ns}attr'] = 'value'
2465 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2466 self.assertEqual(
2467 _bytes('<sub xmlns="http://test/ns" '
2468 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2469 etree.tostring(sub))
2470
2471 root.append(sub)
2472 self.assertEqual(
2473 _bytes('<root>'
2474 '<sub xmlns="http://test/ns"'
2475 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2476 '</root>'),
2477 etree.tostring(root))
2478
2480 etree = self.etree
2481
2482 root = etree.Element('{http://test/ns}root',
2483 nsmap={'test': 'http://test/ns',
2484 None: 'http://test/ns'})
2485 sub = etree.Element('{http://test/ns}sub',
2486 nsmap={None: 'http://test/ns'})
2487
2488 sub.attrib['{http://test/ns}attr'] = 'value'
2489 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2490 self.assertEqual(
2491 _bytes('<sub xmlns="http://test/ns" '
2492 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2493 etree.tostring(sub))
2494
2495 root.append(sub)
2496 self.assertEqual(
2497 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2498 '<test:sub test:attr="value"/>'
2499 '</test:root>'),
2500 etree.tostring(root))
2501
2503 etree = self.etree
2504 r = {None: 'http://ns.infrae.com/foo',
2505 'hoi': 'http://ns.infrae.com/hoi'}
2506 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2507 tree = etree.ElementTree(element=e)
2508 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2509 self.assertEqual(
2510 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2511 self._writeElement(e))
2512
2514 etree = self.etree
2515
2516 r = {None: 'http://ns.infrae.com/foo'}
2517 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2518 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2519
2520 e1.append(e2)
2521
2522 self.assertEqual(
2523 None,
2524 e1.prefix)
2525 self.assertEqual(
2526 None,
2527 e1[0].prefix)
2528 self.assertEqual(
2529 '{http://ns.infrae.com/foo}bar',
2530 e1.tag)
2531 self.assertEqual(
2532 '{http://ns.infrae.com/foo}bar',
2533 e1[0].tag)
2534
2536 etree = self.etree
2537
2538 r = {None: 'http://ns.infrae.com/BAR'}
2539 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2540 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2541
2542 e1.append(e2)
2543
2544 self.assertEqual(
2545 None,
2546 e1.prefix)
2547 self.assertNotEqual(
2548 None,
2549 e2.prefix)
2550 self.assertEqual(
2551 '{http://ns.infrae.com/BAR}bar',
2552 e1.tag)
2553 self.assertEqual(
2554 '{http://ns.infrae.com/foo}bar',
2555 e2.tag)
2556
2558 ns_href = "http://a.b.c"
2559 one = self.etree.fromstring(
2560 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2561 baz = one[0][0]
2562
2563 two = self.etree.fromstring(
2564 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2565 two.append(baz)
2566 del one
2567
2568 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2569 self.assertEqual(
2570 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2571 self.etree.tostring(two))
2572
2582
2584 etree = self.etree
2585
2586 r = {None: 'http://ns.infrae.com/foo',
2587 'hoi': 'http://ns.infrae.com/hoi'}
2588 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2589 self.assertEqual(
2590 r,
2591 e.nsmap)
2592
2594 etree = self.etree
2595
2596 re = {None: 'http://ns.infrae.com/foo',
2597 'hoi': 'http://ns.infrae.com/hoi'}
2598 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2599
2600 rs = {None: 'http://ns.infrae.com/honk',
2601 'top': 'http://ns.infrae.com/top'}
2602 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2603
2604 r = re.copy()
2605 r.update(rs)
2606 self.assertEqual(re, e.nsmap)
2607 self.assertEqual(r, s.nsmap)
2608
2610 etree = self.etree
2611 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2612 self.assertEqual({'hha': None}, el.nsmap)
2613
2615 Element = self.etree.Element
2616 SubElement = self.etree.SubElement
2617
2618 a = Element('a')
2619 b = SubElement(a, 'b')
2620 c = SubElement(a, 'c')
2621 d = SubElement(b, 'd')
2622 e = SubElement(c, 'e')
2623 f = SubElement(c, 'f')
2624
2625 self.assertEqual(
2626 [a, b],
2627 list(a.getiterator('a', 'b')))
2628 self.assertEqual(
2629 [],
2630 list(a.getiterator('x', 'y')))
2631 self.assertEqual(
2632 [a, f],
2633 list(a.getiterator('f', 'a')))
2634 self.assertEqual(
2635 [c, e, f],
2636 list(c.getiterator('c', '*', 'a')))
2637 self.assertEqual(
2638 [],
2639 list(a.getiterator( (), () )))
2640
2642 Element = self.etree.Element
2643 SubElement = self.etree.SubElement
2644
2645 a = Element('a')
2646 b = SubElement(a, 'b')
2647 c = SubElement(a, 'c')
2648 d = SubElement(b, 'd')
2649 e = SubElement(c, 'e')
2650 f = SubElement(c, 'f')
2651
2652 self.assertEqual(
2653 [a, b],
2654 list(a.getiterator( ('a', 'b') )))
2655 self.assertEqual(
2656 [],
2657 list(a.getiterator( ('x', 'y') )))
2658 self.assertEqual(
2659 [a, f],
2660 list(a.getiterator( ('f', 'a') )))
2661 self.assertEqual(
2662 [c, e, f],
2663 list(c.getiterator( ('c', '*', 'a') )))
2664 self.assertEqual(
2665 [],
2666 list(a.getiterator( () )))
2667
2669 Element = self.etree.Element
2670 SubElement = self.etree.SubElement
2671
2672 a = Element('{a}a')
2673 b = SubElement(a, '{a}b')
2674 c = SubElement(a, '{a}c')
2675 d = SubElement(b, '{b}d')
2676 e = SubElement(c, '{a}e')
2677 f = SubElement(c, '{b}f')
2678 g = SubElement(c, 'g')
2679
2680 self.assertEqual(
2681 [a],
2682 list(a.getiterator('{a}a')))
2683 self.assertEqual(
2684 [],
2685 list(a.getiterator('{b}a')))
2686 self.assertEqual(
2687 [],
2688 list(a.getiterator('a')))
2689 self.assertEqual(
2690 [a,b,d,c,e,f,g],
2691 list(a.getiterator('*')))
2692 self.assertEqual(
2693 [f],
2694 list(c.getiterator('{b}*')))
2695 self.assertEqual(
2696 [d, f],
2697 list(a.getiterator('{b}*')))
2698 self.assertEqual(
2699 [g],
2700 list(a.getiterator('g')))
2701 self.assertEqual(
2702 [g],
2703 list(a.getiterator('{}g')))
2704 self.assertEqual(
2705 [g],
2706 list(a.getiterator('{}*')))
2707
2709 Element = self.etree.Element
2710 SubElement = self.etree.SubElement
2711
2712 a = Element('{a}a')
2713 b = SubElement(a, '{nsA}b')
2714 c = SubElement(b, '{nsB}b')
2715 d = SubElement(a, 'b')
2716 e = SubElement(a, '{nsA}e')
2717 f = SubElement(e, '{nsB}e')
2718 g = SubElement(e, 'e')
2719
2720 self.assertEqual(
2721 [b, c, d],
2722 list(a.getiterator('{*}b')))
2723 self.assertEqual(
2724 [e, f, g],
2725 list(a.getiterator('{*}e')))
2726 self.assertEqual(
2727 [a, b, c, d, e, f, g],
2728 list(a.getiterator('{*}*')))
2729
2754
2770
2787
2789 a = etree.Element("a")
2790 b = etree.SubElement(a, "b")
2791 c = etree.SubElement(a, "c")
2792 d1 = etree.SubElement(c, "d")
2793 d2 = etree.SubElement(c, "d")
2794 c.text = d1.text = 'TEXT'
2795
2796 tree = etree.ElementTree(a)
2797 self.assertEqual('.', tree.getelementpath(a))
2798 self.assertEqual('c/d[1]', tree.getelementpath(d1))
2799 self.assertEqual('c/d[2]', tree.getelementpath(d2))
2800
2801 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2802 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2803
2804 tree = etree.ElementTree(c)
2805 self.assertEqual('.', tree.getelementpath(c))
2806 self.assertEqual('d[2]', tree.getelementpath(d2))
2807 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2808
2809 tree = etree.ElementTree(b)
2810 self.assertEqual('.', tree.getelementpath(b))
2811 self.assertRaises(ValueError, tree.getelementpath, a)
2812 self.assertRaises(ValueError, tree.getelementpath, c)
2813 self.assertRaises(ValueError, tree.getelementpath, d2)
2814
2816 a = etree.Element("{http://ns1/}a")
2817 b = etree.SubElement(a, "{http://ns1/}b")
2818 c = etree.SubElement(a, "{http://ns1/}c")
2819 d1 = etree.SubElement(c, "{http://ns1/}d")
2820 d2 = etree.SubElement(c, "{http://ns2/}d")
2821 d3 = etree.SubElement(c, "{http://ns1/}d")
2822
2823 tree = etree.ElementTree(a)
2824 self.assertEqual('.', tree.getelementpath(a))
2825 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
2826 tree.getelementpath(d1))
2827 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
2828 tree.getelementpath(d2))
2829 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
2830 tree.getelementpath(d3))
2831
2832 self.assertEqual(a, tree.find(tree.getelementpath(a)))
2833 self.assertEqual(b, tree.find(tree.getelementpath(b)))
2834 self.assertEqual(c, tree.find(tree.getelementpath(c)))
2835 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2836 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2837 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2838
2839 tree = etree.ElementTree(c)
2840 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
2841 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
2842 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
2843 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
2844 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
2845 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
2846
2847 tree = etree.ElementTree(b)
2848 self.assertRaises(ValueError, tree.getelementpath, d1)
2849 self.assertRaises(ValueError, tree.getelementpath, d2)
2850
2857
2864
2873
2875 XML = self.etree.XML
2876 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2877 self.assertEqual(len(root.findall(".//{X}b")), 2)
2878 self.assertEqual(len(root.findall(".//{X}*")), 2)
2879 self.assertEqual(len(root.findall(".//b")), 3)
2880
2882 XML = self.etree.XML
2883 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2884 nsmap = {'xx': 'X'}
2885 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2886 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2887 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2888 nsmap = {'xx': 'Y'}
2889 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2890 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2891 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2892
2899
2901 etree = self.etree
2902 e = etree.Element('foo')
2903 for i in range(10):
2904 etree.SubElement(e, 'a%s' % i)
2905 for i in range(10):
2906 self.assertEqual(
2907 i,
2908 e.index(e[i]))
2909 self.assertEqual(
2910 3, e.index(e[3], 3))
2911 self.assertRaises(
2912 ValueError, e.index, e[3], 4)
2913 self.assertRaises(
2914 ValueError, e.index, e[3], 0, 2)
2915 self.assertRaises(
2916 ValueError, e.index, e[8], 0, -3)
2917 self.assertRaises(
2918 ValueError, e.index, e[8], -5, -3)
2919 self.assertEqual(
2920 8, e.index(e[8], 0, -1))
2921 self.assertEqual(
2922 8, e.index(e[8], -12, -1))
2923 self.assertEqual(
2924 0, e.index(e[0], -12, -1))
2925
2927 etree = self.etree
2928 e = etree.Element('foo')
2929 for i in range(10):
2930 el = etree.SubElement(e, 'a%s' % i)
2931 el.text = "text%d" % i
2932 el.tail = "tail%d" % i
2933
2934 child0 = e[0]
2935 child1 = e[1]
2936 child2 = e[2]
2937
2938 e.replace(e[0], e[1])
2939 self.assertEqual(
2940 9, len(e))
2941 self.assertEqual(
2942 child1, e[0])
2943 self.assertEqual(
2944 child1.text, "text1")
2945 self.assertEqual(
2946 child1.tail, "tail1")
2947 self.assertEqual(
2948 child0.tail, "tail0")
2949 self.assertEqual(
2950 child2, e[1])
2951
2952 e.replace(e[-1], e[0])
2953 self.assertEqual(
2954 child1, e[-1])
2955 self.assertEqual(
2956 child1.text, "text1")
2957 self.assertEqual(
2958 child1.tail, "tail1")
2959 self.assertEqual(
2960 child2, e[0])
2961
2963 etree = self.etree
2964 e = etree.Element('foo')
2965 for i in range(10):
2966 etree.SubElement(e, 'a%s' % i)
2967
2968 new_element = etree.Element("test")
2969 new_element.text = "TESTTEXT"
2970 new_element.tail = "TESTTAIL"
2971 child1 = e[1]
2972 e.replace(e[0], new_element)
2973 self.assertEqual(
2974 new_element, e[0])
2975 self.assertEqual(
2976 "TESTTEXT",
2977 e[0].text)
2978 self.assertEqual(
2979 "TESTTAIL",
2980 e[0].tail)
2981 self.assertEqual(
2982 child1, e[1])
2983
2999
3017
3035
3053
3055 Element = self.etree.Element
3056 SubElement = self.etree.SubElement
3057 try:
3058 slice
3059 except NameError:
3060 print("slice() not found")
3061 return
3062
3063 a = Element('a')
3064 b = SubElement(a, 'b')
3065 c = SubElement(a, 'c')
3066 d = SubElement(a, 'd')
3067 e = SubElement(a, 'e')
3068
3069 x = Element('x')
3070 y = Element('y')
3071 z = Element('z')
3072
3073 self.assertRaises(
3074 ValueError,
3075 operator.setitem, a, slice(1,None,2), [x, y, z])
3076
3077 self.assertEqual(
3078 [b, c, d, e],
3079 list(a))
3080
3093
3095 XML = self.etree.XML
3096 root = XML(_bytes(
3097 '<?xml version="1.0"?>\n'
3098 '<root>' + '\n' * 65536 +
3099 '<p>' + '\n' * 65536 + '</p>\n' +
3100 '<br/>\n'
3101 '</root>'))
3102
3103 if self.etree.LIBXML_VERSION >= (2, 9):
3104 expected = [2, 131074, 131076]
3105 else:
3106 expected = [2, 65535, 65535]
3107
3108 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3109
3117
3126
3136
3146
3152
3160
3166
3173
3179
3181 etree = self.etree
3182 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3183 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3184 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3185 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3186
3187 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3188
3189 tree = etree.parse(BytesIO(xml))
3190 docinfo = tree.docinfo
3191 self.assertEqual(docinfo.encoding, "ascii")
3192 self.assertEqual(docinfo.xml_version, "1.0")
3193 self.assertEqual(docinfo.public_id, pub_id)
3194 self.assertEqual(docinfo.system_url, sys_id)
3195 self.assertEqual(docinfo.root_name, 'html')
3196 self.assertEqual(docinfo.doctype, doctype_string)
3197
3213
3225
3237
3243
3245 etree = self.etree
3246 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3247 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3248 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3249
3250 xml = _bytes('<!DOCTYPE root>\n<root/>')
3251 tree = etree.parse(BytesIO(xml))
3252 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3253 etree.tostring(tree, doctype=doctype_string))
3254
3256 etree = self.etree
3257 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3258 self.assertEqual(root.base, "http://no/such/url")
3259 self.assertEqual(
3260 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3261 root.base = "https://secret/url"
3262 self.assertEqual(root.base, "https://secret/url")
3263 self.assertEqual(
3264 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3265 "https://secret/url")
3266
3268 etree = self.etree
3269 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3270 self.assertEqual(root.base, "http://no/such/url")
3271 self.assertEqual(
3272 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3273 root.set('{http://www.w3.org/XML/1998/namespace}base',
3274 "https://secret/url")
3275 self.assertEqual(root.base, "https://secret/url")
3276 self.assertEqual(
3277 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3278 "https://secret/url")
3279
3285
3290
3297
3311
3313 Element = self.etree.Element
3314
3315 a = Element('a')
3316 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3317 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3318
3319 self.assertRaises(ValueError, Element, 'ha\0ho')
3320
3322 Element = self.etree.Element
3323
3324 a = Element('a')
3325 self.assertRaises(ValueError, setattr, a, "text",
3326 _str('ha\0ho'))
3327 self.assertRaises(ValueError, setattr, a, "tail",
3328 _str('ha\0ho'))
3329
3330 self.assertRaises(ValueError, Element,
3331 _str('ha\0ho'))
3332
3334 Element = self.etree.Element
3335
3336 a = Element('a')
3337 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3338 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3339
3340 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3341 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3342
3343 self.assertRaises(ValueError, Element, 'ha\x07ho')
3344 self.assertRaises(ValueError, Element, 'ha\x02ho')
3345
3347 Element = self.etree.Element
3348
3349 a = Element('a')
3350 self.assertRaises(ValueError, setattr, a, "text",
3351 _str('ha\x07ho'))
3352 self.assertRaises(ValueError, setattr, a, "text",
3353 _str('ha\x02ho'))
3354
3355 self.assertRaises(ValueError, setattr, a, "tail",
3356 _str('ha\x07ho'))
3357 self.assertRaises(ValueError, setattr, a, "tail",
3358 _str('ha\x02ho'))
3359
3360 self.assertRaises(ValueError, Element,
3361 _str('ha\x07ho'))
3362 self.assertRaises(ValueError, Element,
3363 _str('ha\x02ho'))
3364
3366 Element = self.etree.Element
3367
3368 a = Element('a')
3369 self.assertRaises(ValueError, setattr, a, "text",
3370 _str('ha\u1234\x07ho'))
3371 self.assertRaises(ValueError, setattr, a, "text",
3372 _str('ha\u1234\x02ho'))
3373
3374 self.assertRaises(ValueError, setattr, a, "tail",
3375 _str('ha\u1234\x07ho'))
3376 self.assertRaises(ValueError, setattr, a, "tail",
3377 _str('ha\u1234\x02ho'))
3378
3379 self.assertRaises(ValueError, Element,
3380 _str('ha\u1234\x07ho'))
3381 self.assertRaises(ValueError, Element,
3382 _str('ha\u1234\x02ho'))
3383
3397
3402
3420
3440
3442 tostring = self.etree.tostring
3443 html = self.etree.fromstring(
3444 '<html><body>'
3445 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3446 '</body></html>',
3447 parser=self.etree.HTMLParser())
3448 self.assertEqual(html.tag, 'html')
3449 div = html.find('.//div')
3450 self.assertEqual(div.tail, '\r\n')
3451 result = tostring(div, method='html')
3452 self.assertEqual(
3453 result,
3454 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3455 result = tostring(div, method='html', with_tail=True)
3456 self.assertEqual(
3457 result,
3458 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3459 result = tostring(div, method='html', with_tail=False)
3460 self.assertEqual(
3461 result,
3462 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3463
3485
3487 tostring = self.etree.tostring
3488 XML = self.etree.XML
3489 ElementTree = self.etree.ElementTree
3490
3491 root = XML(_bytes("<root/>"))
3492
3493 tree = ElementTree(root)
3494 self.assertEqual(None, tree.docinfo.standalone)
3495
3496 result = tostring(root, xml_declaration=True, encoding="ASCII")
3497 self.assertEqual(result, _bytes(
3498 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3499
3500 result = tostring(root, xml_declaration=True, encoding="ASCII",
3501 standalone=True)
3502 self.assertEqual(result, _bytes(
3503 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3504
3505 tree = ElementTree(XML(result))
3506 self.assertEqual(True, tree.docinfo.standalone)
3507
3508 result = tostring(root, xml_declaration=True, encoding="ASCII",
3509 standalone=False)
3510 self.assertEqual(result, _bytes(
3511 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3512
3513 tree = ElementTree(XML(result))
3514 self.assertEqual(False, tree.docinfo.standalone)
3515
3535
3537 tostring = self.etree.tostring
3538 Element = self.etree.Element
3539 SubElement = self.etree.SubElement
3540
3541 a = Element('a')
3542 a.text = "A"
3543 a.tail = "tail"
3544 b = SubElement(a, 'b')
3545 b.text = "B"
3546 b.tail = _str("Søk på nettet")
3547 c = SubElement(a, 'c')
3548 c.text = "C"
3549
3550 result = tostring(a, method="text", encoding="UTF-16")
3551
3552 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3553 result)
3554
3556 tostring = self.etree.tostring
3557 Element = self.etree.Element
3558 SubElement = self.etree.SubElement
3559
3560 a = Element('a')
3561 a.text = _str('Søk på nettetA')
3562 a.tail = "tail"
3563 b = SubElement(a, 'b')
3564 b.text = "B"
3565 b.tail = _str('Søk på nettetB')
3566 c = SubElement(a, 'c')
3567 c.text = "C"
3568
3569 self.assertRaises(UnicodeEncodeError,
3570 tostring, a, method="text")
3571
3572 self.assertEqual(
3573 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3574 tostring(a, encoding="UTF-8", method="text"))
3575
3588
3604
3608
3623
3641
3654
3656 tostring = self.etree.tostring
3657 Element = self.etree.Element
3658 SubElement = self.etree.SubElement
3659
3660 a = Element('a')
3661 b = SubElement(a, 'b')
3662 c = SubElement(a, 'c')
3663 d = SubElement(c, 'd')
3664 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3665 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3666 self.assertEqual(_bytes('<b></b>'),
3667 canonicalize(tostring(b, encoding=_unicode)))
3668 self.assertEqual(_bytes('<c><d></d></c>'),
3669 canonicalize(tostring(c, encoding=_unicode)))
3670
3675
3690
3692 tostring = self.etree.tostring
3693 Element = self.etree.Element
3694 SubElement = self.etree.SubElement
3695
3696 a = Element('a')
3697 b = SubElement(a, 'b')
3698 c = SubElement(a, 'c')
3699
3700 result = tostring(a, encoding=_unicode)
3701 self.assertEqual(result, "<a><b/><c/></a>")
3702
3703 result = tostring(a, encoding=_unicode, pretty_print=False)
3704 self.assertEqual(result, "<a><b/><c/></a>")
3705
3706 result = tostring(a, encoding=_unicode, pretty_print=True)
3707 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3708
3720
3722 class SubEl(etree.ElementBase):
3723 pass
3724
3725 el1 = SubEl()
3726 el2 = SubEl()
3727 self.assertEqual('SubEl', el1.tag)
3728 self.assertEqual('SubEl', el2.tag)
3729 el1.other = el2
3730 el2.other = el1
3731
3732 del el1, el2
3733 gc.collect()
3734
3735
3749
3751 root = etree.Element('parent')
3752 c1 = etree.SubElement(root, 'child1')
3753 c2 = etree.SubElement(root, 'child2')
3754
3755 root.remove(c1)
3756 root.remove(c2)
3757 c1.addnext(c2)
3758 c1.tail = 'abc'
3759 c2.tail = 'xyz'
3760 del c1
3761
3762 c2.getprevious()
3763
3764 self.assertEqual('child1', c2.getprevious().tag)
3765 self.assertEqual('abc', c2.getprevious().tail)
3766
3767
3768
3769 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3780
3781
3825
3826 res_instance = res()
3827 parser = etree.XMLParser(load_dtd = True)
3828 parser.resolvers.add(res_instance)
3829
3830 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3831 parser = parser)
3832
3833 self.include(tree)
3834
3835 called = list(res_instance.called.items())
3836 called.sort()
3837 self.assertEqual(
3838 [("dtd", True), ("include", True), ("input", True)],
3839 called)
3840
3842 data = textwrap.dedent('''
3843 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3844 <foo/>
3845 <xi:include href="./test.xml" />
3846 </doc>
3847 ''')
3848
3849 class Resolver(etree.Resolver):
3850 called = {}
3851
3852 def resolve(self, url, id, context):
3853 if url.endswith("test_xinclude.xml"):
3854 assert not self.called.get("input")
3855 self.called["input"] = True
3856 return None
3857 elif url.endswith('/test5.xml'):
3858 assert not self.called.get("DONE")
3859 self.called["DONE"] = True
3860 return self.resolve_string('<DONE/>', context)
3861 else:
3862 _, filename = url.rsplit('/', 1)
3863 assert not self.called.get(filename)
3864 self.called[filename] = True
3865 next_data = data.replace(
3866 'test.xml', 'test%d.xml' % len(self.called))
3867 return self.resolve_string(next_data, context)
3868
3869 res_instance = Resolver()
3870 parser = etree.XMLParser(load_dtd=True)
3871 parser.resolvers.add(res_instance)
3872
3873 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3874 parser=parser)
3875
3876 self.include(tree)
3877
3878 called = list(res_instance.called.items())
3879 called.sort()
3880 self.assertEqual(
3881 [("DONE", True), ("input", True), ("test.xml", True),
3882 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
3883 called)
3884
3885
3889
3890
3895
3896
3899 tree = self.parse(_bytes('<a><b/></a>'))
3900 f = BytesIO()
3901 tree.write_c14n(f)
3902 s = f.getvalue()
3903 self.assertEqual(_bytes('<a><b></b></a>'),
3904 s)
3905
3907 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3908 f = BytesIO()
3909 tree.write_c14n(f, compression=9)
3910 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3911 try:
3912 s = gzfile.read()
3913 finally:
3914 gzfile.close()
3915 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
3916 s)
3917
3929
3945
3963
3975
3987
3989 tree = self.parse(_bytes(
3990 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3991 f = BytesIO()
3992 tree.write_c14n(f)
3993 s = f.getvalue()
3994 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3995 s)
3996 f = BytesIO()
3997 tree.write_c14n(f, exclusive=False)
3998 s = f.getvalue()
3999 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4000 s)
4001 f = BytesIO()
4002 tree.write_c14n(f, exclusive=True)
4003 s = f.getvalue()
4004 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4005 s)
4006
4007 f = BytesIO()
4008 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4009 s = f.getvalue()
4010 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4011 s)
4012
4014 tree = self.parse(_bytes(
4015 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4016 s = etree.tostring(tree, method='c14n')
4017 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4018 s)
4019 s = etree.tostring(tree, method='c14n', exclusive=False)
4020 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4021 s)
4022 s = etree.tostring(tree, method='c14n', exclusive=True)
4023 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4024 s)
4025
4026 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4027 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4028 s)
4029
4031 tree = self.parse(_bytes(
4032 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4033 s = etree.tostring(tree.getroot(), method='c14n')
4034 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4035 s)
4036 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4037 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4038 s)
4039 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4040 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4041 s)
4042
4043 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4044 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4045 s)
4046 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4047 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4048 s)
4049
4050 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4051 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4052 s)
4053
4055 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4056 tree = self.parse(_bytes(
4057 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4058
4059 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4060 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4061 s)
4062
4063
4066 tree = self.parse(_bytes('<a><b/></a>'))
4067 f = BytesIO()
4068 tree.write(f)
4069 s = f.getvalue()
4070 self.assertEqual(_bytes('<a><b/></a>'),
4071 s)
4072
4074 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4075 f = BytesIO()
4076 tree.write(f, compression=9)
4077 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
4078 try:
4079 s = gzfile.read()
4080 finally:
4081 gzfile.close()
4082 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4083 s)
4084
4086 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4087 f = BytesIO()
4088 tree.write(f, compression=0)
4089 s0 = f.getvalue()
4090
4091 f = BytesIO()
4092 tree.write(f)
4093 self.assertEqual(f.getvalue(), s0)
4094
4095 f = BytesIO()
4096 tree.write(f, compression=1)
4097 s = f.getvalue()
4098 self.assertTrue(len(s) <= len(s0))
4099 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4100 try:
4101 s1 = gzfile.read()
4102 finally:
4103 gzfile.close()
4104
4105 f = BytesIO()
4106 tree.write(f, compression=9)
4107 s = f.getvalue()
4108 self.assertTrue(len(s) <= len(s0))
4109 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
4110 try:
4111 s9 = gzfile.read()
4112 finally:
4113 gzfile.close()
4114
4115 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4116 s0)
4117 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4118 s1)
4119 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4120 s9)
4121
4133
4149
4161
4174
4176 etree = etree
4177
4199
4201 """This can't really be tested as long as there isn't a way to
4202 reset the logging setup ...
4203 """
4204 parse = self.etree.parse
4205
4206 messages = []
4207 class Logger(self.etree.PyErrorLog):
4208 def log(self, entry, message, *args):
4209 messages.append(message)
4210
4211 self.etree.use_global_python_log(Logger())
4212 f = BytesIO('<a><b></c></b></a>')
4213 try:
4214 parse(f)
4215 except SyntaxError:
4216 pass
4217 f.close()
4218
4219 self.assertTrue([ message for message in messages
4220 if 'mismatch' in message ])
4221 self.assertTrue([ message for message in messages
4222 if ':PARSER:' in message])
4223 self.assertTrue([ message for message in messages
4224 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4225 self.assertTrue([ message for message in messages
4226 if ':1:15:' in message ])
4227
4228
4230 etree = etree
4231
4235
4237 class Target(object):
4238 def start(self, tag, attrib):
4239 return 'start(%s)' % tag
4240 def end(self, tag):
4241 return 'end(%s)' % tag
4242 def close(self):
4243 return 'close()'
4244
4245 parser = self.etree.XMLPullParser(target=Target())
4246 events = parser.read_events()
4247
4248 parser.feed('<root><element>')
4249 self.assertFalse(list(events))
4250 self.assertFalse(list(events))
4251 parser.feed('</element><child>')
4252 self.assertEqual([('end', 'end(element)')], list(events))
4253 parser.feed('</child>')
4254 self.assertEqual([('end', 'end(child)')], list(events))
4255 parser.feed('</root>')
4256 self.assertEqual([('end', 'end(root)')], list(events))
4257 self.assertFalse(list(events))
4258 self.assertEqual('close()', parser.close())
4259
4261 class Target(object):
4262 def start(self, tag, attrib):
4263 return 'start(%s)' % tag
4264 def end(self, tag):
4265 return 'end(%s)' % tag
4266 def close(self):
4267 return 'close()'
4268
4269 parser = self.etree.XMLPullParser(
4270 ['start', 'end'], target=Target())
4271 events = parser.read_events()
4272
4273 parser.feed('<root><element>')
4274 self.assertEqual(
4275 [('start', 'start(root)'), ('start', 'start(element)')],
4276 list(events))
4277 self.assertFalse(list(events))
4278 parser.feed('</element><child>')
4279 self.assertEqual(
4280 [('end', 'end(element)'), ('start', 'start(child)')],
4281 list(events))
4282 parser.feed('</child>')
4283 self.assertEqual(
4284 [('end', 'end(child)')],
4285 list(events))
4286 parser.feed('</root>')
4287 self.assertEqual(
4288 [('end', 'end(root)')],
4289 list(events))
4290 self.assertFalse(list(events))
4291 self.assertEqual('close()', parser.close())
4292
4294 parser = self.etree.XMLPullParser(
4295 ['start', 'end'], target=etree.TreeBuilder())
4296 events = parser.read_events()
4297
4298 parser.feed('<root><element>')
4299 self.assert_event_tags(
4300 events, [('start', 'root'), ('start', 'element')])
4301 self.assertFalse(list(events))
4302 parser.feed('</element><child>')
4303 self.assert_event_tags(
4304 events, [('end', 'element'), ('start', 'child')])
4305 parser.feed('</child>')
4306 self.assert_event_tags(
4307 events, [('end', 'child')])
4308 parser.feed('</root>')
4309 self.assert_event_tags(
4310 events, [('end', 'root')])
4311 self.assertFalse(list(events))
4312 root = parser.close()
4313 self.assertEqual('root', root.tag)
4314
4316 class Target(etree.TreeBuilder):
4317 def end(self, tag):
4318 el = super(Target, self).end(tag)
4319 el.tag += '-huhu'
4320 return el
4321
4322 parser = self.etree.XMLPullParser(
4323 ['start', 'end'], target=Target())
4324 events = parser.read_events()
4325
4326 parser.feed('<root><element>')
4327 self.assert_event_tags(
4328 events, [('start', 'root'), ('start', 'element')])
4329 self.assertFalse(list(events))
4330 parser.feed('</element><child>')
4331 self.assert_event_tags(
4332 events, [('end', 'element-huhu'), ('start', 'child')])
4333 parser.feed('</child>')
4334 self.assert_event_tags(
4335 events, [('end', 'child-huhu')])
4336 parser.feed('</root>')
4337 self.assert_event_tags(
4338 events, [('end', 'root-huhu')])
4339 self.assertFalse(list(events))
4340 root = parser.close()
4341 self.assertEqual('root-huhu', root.tag)
4342
4343
4367
4368 if __name__ == '__main__':
4369 print('to test use test.py %s' % __file__)
4370