1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import gc
16 import operator
17 import tempfile
18 import gzip
19
20 this_dir = os.path.dirname(__file__)
21 if this_dir not in sys.path:
22 sys.path.insert(0, this_dir)
23
24 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
25 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
26 from common_imports import canonicalize, sorted, _str, _bytes
27
28 print("")
29 print("TESTED VERSION: %s" % etree.__version__)
30 print(" Python: " + repr(sys.version_info))
31 print(" lxml.etree: " + repr(etree.LXML_VERSION))
32 print(" libxml used: " + repr(etree.LIBXML_VERSION))
33 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
34 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
35 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
36 print("")
37
38 try:
39 _unicode = unicode
40 except NameError:
41
42 _unicode = str
43
45 """Tests only for etree, not ElementTree"""
46 etree = etree
47
58
67
74
76 Element = self.etree.Element
77 el = Element('name')
78 self.assertRaises(ValueError, Element, '{}')
79 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
80
81 self.assertRaises(ValueError, Element, '{test}')
82 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
83
91
93 Element = self.etree.Element
94 self.assertRaises(ValueError, Element, "p'name")
95 self.assertRaises(ValueError, Element, 'p"name')
96
97 self.assertRaises(ValueError, Element, "{test}p'name")
98 self.assertRaises(ValueError, Element, '{test}p"name')
99
100 el = Element('name')
101 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
102 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
103
105 Element = self.etree.Element
106 self.assertRaises(ValueError, Element, ' name ')
107 self.assertRaises(ValueError, Element, 'na me')
108 self.assertRaises(ValueError, Element, '{test} name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
112
120
128
130 Element = self.etree.Element
131 SubElement = self.etree.SubElement
132
133 el = Element('name')
134 self.assertRaises(ValueError, SubElement, el, "p'name")
135 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
136
137 self.assertRaises(ValueError, SubElement, el, 'p"name')
138 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
139
148
157
159 QName = self.etree.QName
160 self.assertRaises(ValueError, QName, '')
161 self.assertRaises(ValueError, QName, 'test', '')
162
164 QName = self.etree.QName
165 self.assertRaises(ValueError, QName, 'p:name')
166 self.assertRaises(ValueError, QName, 'test', 'p:name')
167
169 QName = self.etree.QName
170 self.assertRaises(ValueError, QName, ' name ')
171 self.assertRaises(ValueError, QName, 'na me')
172 self.assertRaises(ValueError, QName, 'test', ' name')
173
181
183
184 QName = self.etree.QName
185 qname1 = QName('http://myns', 'a')
186 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
187
188 qname2 = QName(a)
189 self.assertEqual(a.tag, qname1.text)
190 self.assertEqual(qname1.text, qname2.text)
191 self.assertEqual(qname1, qname2)
192
194
195 etree = self.etree
196 qname = etree.QName('http://myns', 'a')
197 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
198 a.text = qname
199
200 self.assertEqual("p:a", a.text)
201
210
225
231
239
253
275
277 XML = self.etree.XML
278 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
279
280 root = XML(xml)
281 self.etree.strip_elements(root, 'a')
282 self.assertEqual(_bytes('<test><x></x></test>'),
283 self._writeElement(root))
284
285 root = XML(xml)
286 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
287 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
288 self._writeElement(root))
289
290 root = XML(xml)
291 self.etree.strip_elements(root, 'c')
292 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
293 self._writeElement(root))
294
296 XML = self.etree.XML
297 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
298
299 root = XML(xml)
300 self.etree.strip_elements(root, 'a')
301 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
302 self._writeElement(root))
303
304 root = XML(xml)
305 self.etree.strip_elements(root, '{urn:a}b', 'c')
306 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
307 self._writeElement(root))
308
309 root = XML(xml)
310 self.etree.strip_elements(root, '{urn:a}*', 'c')
311 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
312 self._writeElement(root))
313
314 root = XML(xml)
315 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
316 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
317 self._writeElement(root))
318
337
363
390
416
435
448
459
465
467 XML = self.etree.XML
468 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
469 self.assertEqual(root[0].target, "mypi")
470 self.assertEqual(root[0].get('my'), "1")
471 self.assertEqual(root[0].get('test'), " abc ")
472 self.assertEqual(root[0].get('quotes'), "' '")
473 self.assertEqual(root[0].get('only'), None)
474 self.assertEqual(root[0].get('names'), None)
475 self.assertEqual(root[0].get('nope'), None)
476
478 XML = self.etree.XML
479 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
480 self.assertEqual(root[0].target, "mypi")
481 self.assertEqual(root[0].attrib['my'], "1")
482 self.assertEqual(root[0].attrib['test'], " abc ")
483 self.assertEqual(root[0].attrib['quotes'], "' '")
484 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
485 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
486 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
487
489
490 ProcessingInstruction = self.etree.ProcessingInstruction
491
492 a = ProcessingInstruction("PI", "ONE")
493 b = copy.deepcopy(a)
494 b.text = "ANOTHER"
495
496 self.assertEqual('ONE', a.text)
497 self.assertEqual('ANOTHER', b.text)
498
514
529
539
551
570
575
588
599
600 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
601 events = list(iterparse(f, events=('end', 'comment')))
602 root = events[-1][1]
603 self.assertEqual(6, len(events))
604 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
605 [ name(*item) for item in events ])
606 self.assertEqual(
607 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
608 tostring(root))
609
621
622 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
623 events = list(iterparse(f, events=('end', 'pi')))
624 root = events[-2][1]
625 self.assertEqual(8, len(events))
626 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
627 ('pid','d'), 'a', ('pie','e')],
628 [ name(*item) for item in events ])
629 self.assertEqual(
630 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
631 tostring(ElementTree(root)))
632
647
653
655 iterparse = self.etree.iterparse
656 f = BytesIO("""
657 <a> \n \n <b> b test </b> \n
658
659 \n\t <c> \n </c> </a> \n """)
660 iterator = iterparse(f, remove_blank_text=True)
661 text = [ (element.text, element.tail)
662 for event, element in iterator ]
663 self.assertEqual(
664 [(" b test ", None), (" \n ", None), (None, None)],
665 text)
666
668 iterparse = self.etree.iterparse
669 f = BytesIO('<a><b><d/></b><c/></a>')
670
671 iterator = iterparse(f, tag="b", events=('start', 'end'))
672 events = list(iterator)
673 root = iterator.root
674 self.assertEqual(
675 [('start', root[0]), ('end', root[0])],
676 events)
677
679 iterparse = self.etree.iterparse
680 f = BytesIO('<a><b><d/></b><c/></a>')
681
682 iterator = iterparse(f, tag="*", events=('start', 'end'))
683 events = list(iterator)
684 self.assertEqual(
685 8,
686 len(events))
687
689 iterparse = self.etree.iterparse
690 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
691
692 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
693 events = list(iterator)
694 root = iterator.root
695 self.assertEqual(
696 [('start', root[0]), ('end', root[0])],
697 events)
698
700 iterparse = self.etree.iterparse
701 f = BytesIO('<a><b><d/></b><c/></a>')
702 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
703 events = list(iterator)
704 root = iterator.root
705 self.assertEqual(
706 [('start', root[0]), ('end', root[0])],
707 events)
708
709 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
710 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
711 events = list(iterator)
712 root = iterator.root
713 self.assertEqual([], events)
714
716 iterparse = self.etree.iterparse
717 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
718 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
719 events = list(iterator)
720 self.assertEqual(8, len(events))
721
723 iterparse = self.etree.iterparse
724 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
725 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
726 events = list(iterator)
727 self.assertEqual([], events)
728
729 f = BytesIO('<a><b><d/></b><c/></a>')
730 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
731 events = list(iterator)
732 self.assertEqual(8, len(events))
733
735 text = _str('Søk på nettet')
736 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
737 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
738 ).encode('iso-8859-1')
739
740 self.assertRaises(self.etree.ParseError,
741 list, self.etree.iterparse(BytesIO(xml_latin1)))
742
744 text = _str('Søk på nettet', encoding="UTF-8")
745 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
746 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
747 ).encode('iso-8859-1')
748
749 iterator = self.etree.iterparse(BytesIO(xml_latin1),
750 encoding="iso-8859-1")
751 self.assertEqual(1, len(list(iterator)))
752
753 a = iterator.root
754 self.assertEqual(a.text, text)
755
757 tostring = self.etree.tostring
758 f = BytesIO('<root><![CDATA[test]]></root>')
759 context = self.etree.iterparse(f, strip_cdata=False)
760 content = [ el.text for event,el in context ]
761
762 self.assertEqual(['test'], content)
763 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
764 tostring(context.root))
765
769
774
793
794
795
806 def end(self, tag):
807 events.append("end")
808 assertEqual("TAG", tag)
809 def close(self):
810 return "DONE"
811
812 parser = self.etree.XMLParser(target=Target())
813 tree = self.etree.ElementTree()
814
815 self.assertRaises(TypeError,
816 tree.parse, BytesIO("<TAG/>"), parser=parser)
817 self.assertEqual(["start", "end"], events)
818
820
821 events = []
822 class Target(object):
823 def start(self, tag, attrib):
824 events.append("start-" + tag)
825 def end(self, tag):
826 events.append("end-" + tag)
827 if tag == 'a':
828 raise ValueError("dead and gone")
829 def data(self, data):
830 events.append("data-" + data)
831 def close(self):
832 events.append("close")
833 return "DONE"
834
835 parser = self.etree.XMLParser(target=Target())
836
837 try:
838 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
839 done = parser.close()
840 self.fail("error expected, but parsing succeeded")
841 except ValueError:
842 done = 'value error received as expected'
843
844 self.assertEqual(["start-root", "data-A", "start-a",
845 "data-ca", "end-a", "close"],
846 events)
847
849
850 events = []
851 class Target(object):
852 def start(self, tag, attrib):
853 events.append("start-" + tag)
854 def end(self, tag):
855 events.append("end-" + tag)
856 if tag == 'a':
857 raise ValueError("dead and gone")
858 def data(self, data):
859 events.append("data-" + data)
860 def close(self):
861 events.append("close")
862 return "DONE"
863
864 parser = self.etree.XMLParser(target=Target())
865
866 try:
867 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
868 parser=parser)
869 self.fail("error expected, but parsing succeeded")
870 except ValueError:
871 done = 'value error received as expected'
872
873 self.assertEqual(["start-root", "data-A", "start-a",
874 "data-ca", "end-a", "close"],
875 events)
876
882 def end(self, tag):
883 events.append("end-" + tag)
884 def data(self, data):
885 events.append("data-" + data)
886 def comment(self, text):
887 events.append("comment-" + text)
888 def close(self):
889 return "DONE"
890
891 parser = self.etree.XMLParser(target=Target())
892
893 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
894 done = parser.close()
895
896 self.assertEqual("DONE", done)
897 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
898 "start-sub", "end-sub", "comment-c", "data-B",
899 "end-root", "comment-d"],
900 events)
901
903 events = []
904 class Target(object):
905 def start(self, tag, attrib):
906 events.append("start-" + tag)
907 def end(self, tag):
908 events.append("end-" + tag)
909 def data(self, data):
910 events.append("data-" + data)
911 def pi(self, target, data):
912 events.append("pi-" + target + "-" + data)
913 def close(self):
914 return "DONE"
915
916 parser = self.etree.XMLParser(target=Target())
917
918 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
919 done = parser.close()
920
921 self.assertEqual("DONE", done)
922 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
923 "data-B", "end-root", "pi-test-c"],
924 events)
925
927 events = []
928 class Target(object):
929 def start(self, tag, attrib):
930 events.append("start-" + tag)
931 def end(self, tag):
932 events.append("end-" + tag)
933 def data(self, data):
934 events.append("data-" + data)
935 def close(self):
936 return "DONE"
937
938 parser = self.etree.XMLParser(target=Target(),
939 strip_cdata=False)
940
941 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
942 done = parser.close()
943
944 self.assertEqual("DONE", done)
945 self.assertEqual(["start-root", "data-A", "start-a",
946 "data-ca", "end-a", "data-B", "end-root"],
947 events)
948
950 events = []
951 class Target(object):
952 def start(self, tag, attrib):
953 events.append("start-" + tag)
954 def end(self, tag):
955 events.append("end-" + tag)
956 def data(self, data):
957 events.append("data-" + data)
958 def close(self):
959 events.append("close")
960 return "DONE"
961
962 parser = self.etree.XMLParser(target=Target(),
963 recover=True)
964
965 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
966 done = parser.close()
967
968 self.assertEqual("DONE", done)
969 self.assertEqual(["start-root", "data-A", "start-a",
970 "data-ca", "end-a", "data-B",
971 "end-root", "close"],
972 events)
973
983
993
1002
1012
1014 iterwalk = self.etree.iterwalk
1015 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1016
1017 iterator = iterwalk(root, events=('start','end'))
1018 events = list(iterator)
1019 self.assertEqual(
1020 [('start', root), ('start', root[0]), ('end', root[0]),
1021 ('start', root[1]), ('end', root[1]), ('end', root)],
1022 events)
1023
1034
1036 iterwalk = self.etree.iterwalk
1037 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1038
1039 attr_name = '{testns}bla'
1040 events = []
1041 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1042 for event, elem in iterator:
1043 events.append(event)
1044 if event == 'start':
1045 if elem.tag != '{ns1}a':
1046 elem.set(attr_name, 'value')
1047
1048 self.assertEqual(
1049 ['start-ns', 'start', 'start', 'start-ns', 'start',
1050 'end', 'end-ns', 'end', 'end', 'end-ns'],
1051 events)
1052
1053 self.assertEqual(
1054 None,
1055 root.get(attr_name))
1056 self.assertEqual(
1057 'value',
1058 root[0].get(attr_name))
1059
1070
1072 parse = self.etree.parse
1073 parser = self.etree.XMLParser(dtd_validation=True)
1074 assertEqual = self.assertEqual
1075 test_url = _str("__nosuch.dtd")
1076
1077 class MyResolver(self.etree.Resolver):
1078 def resolve(self, url, id, context):
1079 assertEqual(url, test_url)
1080 return self.resolve_string(
1081 _str('''<!ENTITY myentity "%s">
1082 <!ELEMENT doc ANY>''') % url, context)
1083
1084 parser.resolvers.add(MyResolver())
1085
1086 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1087 tree = parse(StringIO(xml), parser)
1088 root = tree.getroot()
1089 self.assertEqual(root.text, test_url)
1090
1092 parse = self.etree.parse
1093 parser = self.etree.XMLParser(dtd_validation=True)
1094 assertEqual = self.assertEqual
1095 test_url = _str("__nosuch.dtd")
1096
1097 class MyResolver(self.etree.Resolver):
1098 def resolve(self, url, id, context):
1099 assertEqual(url, test_url)
1100 return self.resolve_string(
1101 (_str('''<!ENTITY myentity "%s">
1102 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1103 context)
1104
1105 parser.resolvers.add(MyResolver())
1106
1107 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1108 tree = parse(StringIO(xml), parser)
1109 root = tree.getroot()
1110 self.assertEqual(root.text, test_url)
1111
1113 parse = self.etree.parse
1114 parser = self.etree.XMLParser(dtd_validation=True)
1115 assertEqual = self.assertEqual
1116 test_url = _str("__nosuch.dtd")
1117
1118 class MyResolver(self.etree.Resolver):
1119 def resolve(self, url, id, context):
1120 assertEqual(url, test_url)
1121 return self.resolve_file(
1122 SillyFileLike(
1123 _str('''<!ENTITY myentity "%s">
1124 <!ELEMENT doc ANY>''') % url), context)
1125
1126 parser.resolvers.add(MyResolver())
1127
1128 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1129 tree = parse(StringIO(xml), parser)
1130 root = tree.getroot()
1131 self.assertEqual(root.text, test_url)
1132
1134 parse = self.etree.parse
1135 parser = self.etree.XMLParser(attribute_defaults=True)
1136 assertEqual = self.assertEqual
1137 test_url = _str("__nosuch.dtd")
1138
1139 class MyResolver(self.etree.Resolver):
1140 def resolve(self, url, id, context):
1141 assertEqual(url, test_url)
1142 return self.resolve_filename(
1143 fileInTestDir('test.dtd'), context)
1144
1145 parser.resolvers.add(MyResolver())
1146
1147 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1148 tree = parse(StringIO(xml), parser)
1149 root = tree.getroot()
1150 self.assertEqual(
1151 root.attrib, {'default': 'valueA'})
1152 self.assertEqual(
1153 root[0].attrib, {'default': 'valueB'})
1154
1166
1167 parser.resolvers.add(MyResolver())
1168
1169 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1170 tree = parse(StringIO(xml), parser,
1171 base_url=fileInTestDir('__test.xml'))
1172 root = tree.getroot()
1173 self.assertEqual(
1174 root.attrib, {'default': 'valueA'})
1175 self.assertEqual(
1176 root[0].attrib, {'default': 'valueB'})
1177
1179 parse = self.etree.parse
1180 parser = self.etree.XMLParser(attribute_defaults=True)
1181 assertEqual = self.assertEqual
1182 test_url = _str("__nosuch.dtd")
1183
1184 class MyResolver(self.etree.Resolver):
1185 def resolve(self, url, id, context):
1186 assertEqual(url, test_url)
1187 return self.resolve_file(
1188 open(fileInTestDir('test.dtd'), 'rb'), context)
1189
1190 parser.resolvers.add(MyResolver())
1191
1192 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1193 tree = parse(StringIO(xml), parser)
1194 root = tree.getroot()
1195 self.assertEqual(
1196 root.attrib, {'default': 'valueA'})
1197 self.assertEqual(
1198 root[0].attrib, {'default': 'valueB'})
1199
1201 parse = self.etree.parse
1202 parser = self.etree.XMLParser(load_dtd=True)
1203 assertEqual = self.assertEqual
1204 test_url = _str("__nosuch.dtd")
1205
1206 class check(object):
1207 resolved = False
1208
1209 class MyResolver(self.etree.Resolver):
1210 def resolve(self, url, id, context):
1211 assertEqual(url, test_url)
1212 check.resolved = True
1213 return self.resolve_empty(context)
1214
1215 parser.resolvers.add(MyResolver())
1216
1217 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1218 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1219 self.assertTrue(check.resolved)
1220
1227
1228 class MyResolver(self.etree.Resolver):
1229 def resolve(self, url, id, context):
1230 raise _LocalException
1231
1232 parser.resolvers.add(MyResolver())
1233
1234 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1235 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1236
1237 if etree.LIBXML_VERSION > (2,6,20):
1254
1256 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1257 <root>
1258 <child1/>
1259 <child2/>
1260 <child3> </child3>
1261 </root>''')
1262
1263 parser = self.etree.XMLParser(resolve_entities=False)
1264 root = etree.fromstring(xml, parser)
1265 self.assertEqual([ el.tag for el in root ],
1266 ['child1', 'child2', 'child3'])
1267
1268 root[0] = root[-1]
1269 self.assertEqual([ el.tag for el in root ],
1270 ['child3', 'child2'])
1271 self.assertEqual(root[0][0].text, ' ')
1272 self.assertEqual(root[0][0].name, 'nbsp')
1273
1289
1296
1298 Entity = self.etree.Entity
1299 self.assertRaises(ValueError, Entity, 'a b c')
1300 self.assertRaises(ValueError, Entity, 'a,b')
1301 self.assertRaises(ValueError, Entity, 'a\0b')
1302 self.assertRaises(ValueError, Entity, '#abc')
1303 self.assertRaises(ValueError, Entity, '#xxyz')
1304
1317
1330
1332 CDATA = self.etree.CDATA
1333 Element = self.etree.Element
1334
1335 root = Element("root")
1336 cdata = CDATA('test')
1337
1338 self.assertRaises(TypeError,
1339 setattr, root, 'tail', cdata)
1340 self.assertRaises(TypeError,
1341 root.set, 'attr', cdata)
1342 self.assertRaises(TypeError,
1343 operator.setitem, root.attrib, 'attr', cdata)
1344
1353
1362
1363
1373
1382
1395
1408
1414
1420
1435
1448
1463
1476
1491
1504
1519
1532
1533
1541
1542
1552
1553
1568
1569
1579
1580
1591
1592
1594 self.assertRaises(TypeError, self.etree.dump, None)
1595
1608
1621
1642
1651
1660
1669
1678
1680 XML = self.etree.XML
1681
1682 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
1683 result = []
1684 for el in root.iterchildren(tag=['two', 'three']):
1685 result.append(el.text)
1686 self.assertEqual(['Two', 'Bla', None], result)
1687
1696
1717
1732
1734 Element = self.etree.Element
1735 SubElement = self.etree.SubElement
1736
1737 a = Element('a')
1738 b = SubElement(a, 'b')
1739 c = SubElement(a, 'c')
1740 d = SubElement(b, 'd')
1741 self.assertEqual(
1742 [b, a],
1743 list(d.iterancestors(tag=('a', 'b'))))
1744 self.assertEqual(
1745 [],
1746 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
1747 self.assertEqual(
1748 [],
1749 list(d.iterancestors(tag=('d', 'x'))))
1750 self.assertEqual(
1751 [b, a],
1752 list(d.iterancestors(tag=('b', '*'))))
1753 self.assertEqual(
1754 [b],
1755 list(d.iterancestors(tag=('b', 'c'))))
1756
1773
1794
1796 Element = self.etree.Element
1797 SubElement = self.etree.SubElement
1798
1799 a = Element('a')
1800 b = SubElement(a, 'b')
1801 c = SubElement(a, 'c')
1802 d = SubElement(b, 'd')
1803 e = SubElement(c, 'e')
1804
1805 self.assertEqual(
1806 [b, e],
1807 list(a.iterdescendants(tag=('a', 'b', 'e'))))
1808 a2 = SubElement(e, 'a')
1809 self.assertEqual(
1810 [b, a2],
1811 list(a.iterdescendants(tag=('a', 'b'))))
1812 self.assertEqual(
1813 [],
1814 list(c.iterdescendants(tag=('x', 'y', 'z'))))
1815 self.assertEqual(
1816 [b, d, c, e, a2],
1817 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
1818
1836
1853
1871
1895
1897 Element = self.etree.Element
1898 SubElement = self.etree.SubElement
1899
1900 a = Element('a')
1901 b = SubElement(a, 'b')
1902 c = SubElement(a, 'c')
1903 d = SubElement(b, 'd')
1904 self.assertEqual(
1905 [],
1906 list(a.itersiblings(tag='XXX')))
1907 self.assertEqual(
1908 [c],
1909 list(b.itersiblings(tag='c')))
1910 self.assertEqual(
1911 [c],
1912 list(b.itersiblings(tag='*')))
1913 self.assertEqual(
1914 [b],
1915 list(c.itersiblings(preceding=True, tag='b')))
1916 self.assertEqual(
1917 [],
1918 list(c.itersiblings(preceding=True, tag='c')))
1919
1921 Element = self.etree.Element
1922 SubElement = self.etree.SubElement
1923
1924 a = Element('a')
1925 b = SubElement(a, 'b')
1926 c = SubElement(a, 'c')
1927 d = SubElement(b, 'd')
1928 e = SubElement(a, 'e')
1929 self.assertEqual(
1930 [],
1931 list(a.itersiblings(tag=('XXX', 'YYY'))))
1932 self.assertEqual(
1933 [c, e],
1934 list(b.itersiblings(tag=('c', 'd', 'e'))))
1935 self.assertEqual(
1936 [b],
1937 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
1938 self.assertEqual(
1939 [c, b],
1940 list(e.itersiblings(preceding=True, tag=('c', '*'))))
1941
1943 parseid = self.etree.parseid
1944 XML = self.etree.XML
1945 xml_text = _bytes('''
1946 <!DOCTYPE document [
1947 <!ELEMENT document (h1,p)*>
1948 <!ELEMENT h1 (#PCDATA)>
1949 <!ATTLIST h1 myid ID #REQUIRED>
1950 <!ELEMENT p (#PCDATA)>
1951 <!ATTLIST p someid ID #REQUIRED>
1952 ]>
1953 <document>
1954 <h1 myid="chapter1">...</h1>
1955 <p id="note1" class="note">...</p>
1956 <p>Regular paragraph.</p>
1957 <p xml:id="xmlid">XML:ID paragraph.</p>
1958 <p someid="warn1" class="warning">...</p>
1959 </document>
1960 ''')
1961
1962 tree, dic = parseid(BytesIO(xml_text))
1963 root = tree.getroot()
1964 root2 = XML(xml_text)
1965 self.assertEqual(self._writeElement(root),
1966 self._writeElement(root2))
1967 expected = {
1968 "chapter1" : root[0],
1969 "xmlid" : root[3],
1970 "warn1" : root[4]
1971 }
1972 self.assertTrue("chapter1" in dic)
1973 self.assertTrue("warn1" in dic)
1974 self.assertTrue("xmlid" in dic)
1975 self._checkIDDict(dic, expected)
1976
1978 XMLDTDID = self.etree.XMLDTDID
1979 XML = self.etree.XML
1980 xml_text = _bytes('''
1981 <!DOCTYPE document [
1982 <!ELEMENT document (h1,p)*>
1983 <!ELEMENT h1 (#PCDATA)>
1984 <!ATTLIST h1 myid ID #REQUIRED>
1985 <!ELEMENT p (#PCDATA)>
1986 <!ATTLIST p someid ID #REQUIRED>
1987 ]>
1988 <document>
1989 <h1 myid="chapter1">...</h1>
1990 <p id="note1" class="note">...</p>
1991 <p>Regular paragraph.</p>
1992 <p xml:id="xmlid">XML:ID paragraph.</p>
1993 <p someid="warn1" class="warning">...</p>
1994 </document>
1995 ''')
1996
1997 root, dic = XMLDTDID(xml_text)
1998 root2 = XML(xml_text)
1999 self.assertEqual(self._writeElement(root),
2000 self._writeElement(root2))
2001 expected = {
2002 "chapter1" : root[0],
2003 "xmlid" : root[3],
2004 "warn1" : root[4]
2005 }
2006 self.assertTrue("chapter1" in dic)
2007 self.assertTrue("warn1" in dic)
2008 self.assertTrue("xmlid" in dic)
2009 self._checkIDDict(dic, expected)
2010
2012 XMLDTDID = self.etree.XMLDTDID
2013 XML = self.etree.XML
2014 xml_text = _bytes('''
2015 <document>
2016 <h1 myid="chapter1">...</h1>
2017 <p id="note1" class="note">...</p>
2018 <p>Regular paragraph.</p>
2019 <p someid="warn1" class="warning">...</p>
2020 </document>
2021 ''')
2022
2023 root, dic = XMLDTDID(xml_text)
2024 root2 = XML(xml_text)
2025 self.assertEqual(self._writeElement(root),
2026 self._writeElement(root2))
2027 expected = {}
2028 self._checkIDDict(dic, expected)
2029
2031 self.assertEqual(len(dic),
2032 len(expected))
2033 self.assertEqual(sorted(dic.items()),
2034 sorted(expected.items()))
2035 if sys.version_info < (3,):
2036 self.assertEqual(sorted(dic.iteritems()),
2037 sorted(expected.iteritems()))
2038 self.assertEqual(sorted(dic.keys()),
2039 sorted(expected.keys()))
2040 if sys.version_info < (3,):
2041 self.assertEqual(sorted(dic.iterkeys()),
2042 sorted(expected.iterkeys()))
2043 if sys.version_info < (3,):
2044 self.assertEqual(sorted(dic.values()),
2045 sorted(expected.values()))
2046 self.assertEqual(sorted(dic.itervalues()),
2047 sorted(expected.itervalues()))
2048
2050 etree = self.etree
2051
2052 r = {'foo': 'http://ns.infrae.com/foo'}
2053 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2054 self.assertEqual(
2055 'foo',
2056 e.prefix)
2057 self.assertEqual(
2058 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2059 self._writeElement(e))
2060
2062 etree = self.etree
2063
2064 r = {None: 'http://ns.infrae.com/foo'}
2065 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2066 self.assertEqual(
2067 None,
2068 e.prefix)
2069 self.assertEqual(
2070 '{http://ns.infrae.com/foo}bar',
2071 e.tag)
2072 self.assertEqual(
2073 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2074 self._writeElement(e))
2075
2077 etree = self.etree
2078
2079 r = {None: 'http://ns.infrae.com/foo',
2080 'hoi': 'http://ns.infrae.com/hoi'}
2081 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2082 e.set('{http://ns.infrae.com/hoi}test', 'value')
2083 self.assertEqual(
2084 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2085 self._writeElement(e))
2086
2088 etree = self.etree
2089
2090 root = etree.Element('{http://test/ns}root',
2091 nsmap={None: 'http://test/ns'})
2092 sub = etree.Element('{http://test/ns}sub',
2093 nsmap={'test': 'http://test/ns'})
2094
2095 sub.attrib['{http://test/ns}attr'] = 'value'
2096 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2097 self.assertEqual(
2098 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2099 etree.tostring(sub))
2100
2101 root.append(sub)
2102 self.assertEqual(
2103 _bytes('<root xmlns="http://test/ns">'
2104 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2105 '</root>'),
2106 etree.tostring(root))
2107
2109 etree = self.etree
2110
2111 root = etree.Element('root')
2112 sub = etree.Element('{http://test/ns}sub',
2113 nsmap={'test': 'http://test/ns'})
2114
2115 sub.attrib['{http://test/ns}attr'] = 'value'
2116 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2117 self.assertEqual(
2118 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2119 etree.tostring(sub))
2120
2121 root.append(sub)
2122 self.assertEqual(
2123 _bytes('<root>'
2124 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2125 '</root>'),
2126 etree.tostring(root))
2127
2129 etree = self.etree
2130
2131 root = etree.Element('root')
2132 sub = etree.Element('{http://test/ns}sub',
2133 nsmap={None: 'http://test/ns'})
2134
2135 sub.attrib['{http://test/ns}attr'] = 'value'
2136 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2137 self.assertEqual(
2138 _bytes('<sub xmlns="http://test/ns" '
2139 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2140 etree.tostring(sub))
2141
2142 root.append(sub)
2143 self.assertEqual(
2144 _bytes('<root>'
2145 '<sub xmlns="http://test/ns"'
2146 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2147 '</root>'),
2148 etree.tostring(root))
2149
2151 etree = self.etree
2152
2153 root = etree.Element('{http://test/ns}root',
2154 nsmap={'test': 'http://test/ns',
2155 None: 'http://test/ns'})
2156 sub = etree.Element('{http://test/ns}sub',
2157 nsmap={None: 'http://test/ns'})
2158
2159 sub.attrib['{http://test/ns}attr'] = 'value'
2160 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2161 self.assertEqual(
2162 _bytes('<sub xmlns="http://test/ns" '
2163 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2164 etree.tostring(sub))
2165
2166 root.append(sub)
2167 self.assertEqual(
2168 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2169 '<test:sub test:attr="value"/>'
2170 '</test:root>'),
2171 etree.tostring(root))
2172
2174 etree = self.etree
2175 r = {None: 'http://ns.infrae.com/foo',
2176 'hoi': 'http://ns.infrae.com/hoi'}
2177 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2178 tree = etree.ElementTree(element=e)
2179 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2180 self.assertEqual(
2181 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2182 self._writeElement(e))
2183
2185 etree = self.etree
2186
2187 r = {None: 'http://ns.infrae.com/foo'}
2188 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2189 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2190
2191 e1.append(e2)
2192
2193 self.assertEqual(
2194 None,
2195 e1.prefix)
2196 self.assertEqual(
2197 None,
2198 e1[0].prefix)
2199 self.assertEqual(
2200 '{http://ns.infrae.com/foo}bar',
2201 e1.tag)
2202 self.assertEqual(
2203 '{http://ns.infrae.com/foo}bar',
2204 e1[0].tag)
2205
2207 etree = self.etree
2208
2209 r = {None: 'http://ns.infrae.com/BAR'}
2210 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2211 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2212
2213 e1.append(e2)
2214
2215 self.assertEqual(
2216 None,
2217 e1.prefix)
2218 self.assertNotEqual(
2219 None,
2220 e2.prefix)
2221 self.assertEqual(
2222 '{http://ns.infrae.com/BAR}bar',
2223 e1.tag)
2224 self.assertEqual(
2225 '{http://ns.infrae.com/foo}bar',
2226 e2.tag)
2227
2229 ns_href = "http://a.b.c"
2230 one = self.etree.fromstring(
2231 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2232 baz = one[0][0]
2233
2234 two = self.etree.fromstring(
2235 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2236 two.append(baz)
2237 del one
2238
2239 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2240 self.assertEqual(
2241 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2242 self.etree.tostring(two))
2243
2253
2255 etree = self.etree
2256
2257 r = {None: 'http://ns.infrae.com/foo',
2258 'hoi': 'http://ns.infrae.com/hoi'}
2259 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2260 self.assertEqual(
2261 r,
2262 e.nsmap)
2263
2265 etree = self.etree
2266
2267 re = {None: 'http://ns.infrae.com/foo',
2268 'hoi': 'http://ns.infrae.com/hoi'}
2269 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2270
2271 rs = {None: 'http://ns.infrae.com/honk',
2272 'top': 'http://ns.infrae.com/top'}
2273 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2274
2275 r = re.copy()
2276 r.update(rs)
2277 self.assertEqual(re, e.nsmap)
2278 self.assertEqual(r, s.nsmap)
2279
2281 etree = self.etree
2282 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2283 self.assertEqual({'hha': None}, el.nsmap)
2284
2286 Element = self.etree.Element
2287 SubElement = self.etree.SubElement
2288
2289 a = Element('a')
2290 b = SubElement(a, 'b')
2291 c = SubElement(a, 'c')
2292 d = SubElement(b, 'd')
2293 e = SubElement(c, 'e')
2294 f = SubElement(c, 'f')
2295
2296 self.assertEqual(
2297 [a, b],
2298 list(a.getiterator('a', 'b')))
2299 self.assertEqual(
2300 [],
2301 list(a.getiterator('x', 'y')))
2302 self.assertEqual(
2303 [a, f],
2304 list(a.getiterator('f', 'a')))
2305 self.assertEqual(
2306 [c, e, f],
2307 list(c.getiterator('c', '*', 'a')))
2308 self.assertEqual(
2309 [],
2310 list(a.getiterator( (), () )))
2311
2313 Element = self.etree.Element
2314 SubElement = self.etree.SubElement
2315
2316 a = Element('a')
2317 b = SubElement(a, 'b')
2318 c = SubElement(a, 'c')
2319 d = SubElement(b, 'd')
2320 e = SubElement(c, 'e')
2321 f = SubElement(c, 'f')
2322
2323 self.assertEqual(
2324 [a, b],
2325 list(a.getiterator( ('a', 'b') )))
2326 self.assertEqual(
2327 [],
2328 list(a.getiterator( ('x', 'y') )))
2329 self.assertEqual(
2330 [a, f],
2331 list(a.getiterator( ('f', 'a') )))
2332 self.assertEqual(
2333 [c, e, f],
2334 list(c.getiterator( ('c', '*', 'a') )))
2335 self.assertEqual(
2336 [],
2337 list(a.getiterator( () )))
2338
2340 Element = self.etree.Element
2341 SubElement = self.etree.SubElement
2342
2343 a = Element('{a}a')
2344 b = SubElement(a, '{a}b')
2345 c = SubElement(a, '{a}c')
2346 d = SubElement(b, '{b}d')
2347 e = SubElement(c, '{a}e')
2348 f = SubElement(c, '{b}f')
2349 g = SubElement(c, 'g')
2350
2351 self.assertEqual(
2352 [a],
2353 list(a.getiterator('{a}a')))
2354 self.assertEqual(
2355 [],
2356 list(a.getiterator('{b}a')))
2357 self.assertEqual(
2358 [],
2359 list(a.getiterator('a')))
2360 self.assertEqual(
2361 [a,b,d,c,e,f,g],
2362 list(a.getiterator('*')))
2363 self.assertEqual(
2364 [f],
2365 list(c.getiterator('{b}*')))
2366 self.assertEqual(
2367 [d, f],
2368 list(a.getiterator('{b}*')))
2369 self.assertEqual(
2370 [g],
2371 list(a.getiterator('g')))
2372 self.assertEqual(
2373 [g],
2374 list(a.getiterator('{}g')))
2375 self.assertEqual(
2376 [g],
2377 list(a.getiterator('{}*')))
2378
2380 Element = self.etree.Element
2381 SubElement = self.etree.SubElement
2382
2383 a = Element('{a}a')
2384 b = SubElement(a, '{nsA}b')
2385 c = SubElement(b, '{nsB}b')
2386 d = SubElement(a, 'b')
2387 e = SubElement(a, '{nsA}e')
2388 f = SubElement(e, '{nsB}e')
2389 g = SubElement(e, 'e')
2390
2391 self.assertEqual(
2392 [b, c, d],
2393 list(a.getiterator('{*}b')))
2394 self.assertEqual(
2395 [e, f, g],
2396 list(a.getiterator('{*}e')))
2397 self.assertEqual(
2398 [a, b, c, d, e, f, g],
2399 list(a.getiterator('{*}*')))
2400
2425
2441
2458
2465
2472
2481
2483 XML = self.etree.XML
2484 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2485 self.assertEqual(len(root.findall(".//{X}b")), 2)
2486 self.assertEqual(len(root.findall(".//{X}*")), 2)
2487 self.assertEqual(len(root.findall(".//b")), 3)
2488
2490 XML = self.etree.XML
2491 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2492 nsmap = {'xx': 'X'}
2493 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2494 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2495 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2496 nsmap = {'xx': 'Y'}
2497 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2498 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2499 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2500
2502 XML = self.etree.XML
2503 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
2504 nsmap = {'xx': 'X'}
2505 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2506 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
2507 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2508 nsmap = {'xx': 'Y'}
2509 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2510 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
2511 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2512
2519
2521 etree = self.etree
2522 e = etree.Element('foo')
2523 for i in range(10):
2524 etree.SubElement(e, 'a%s' % i)
2525 for i in range(10):
2526 self.assertEqual(
2527 i,
2528 e.index(e[i]))
2529 self.assertEqual(
2530 3, e.index(e[3], 3))
2531 self.assertRaises(
2532 ValueError, e.index, e[3], 4)
2533 self.assertRaises(
2534 ValueError, e.index, e[3], 0, 2)
2535 self.assertRaises(
2536 ValueError, e.index, e[8], 0, -3)
2537 self.assertRaises(
2538 ValueError, e.index, e[8], -5, -3)
2539 self.assertEqual(
2540 8, e.index(e[8], 0, -1))
2541 self.assertEqual(
2542 8, e.index(e[8], -12, -1))
2543 self.assertEqual(
2544 0, e.index(e[0], -12, -1))
2545
2547 etree = self.etree
2548 e = etree.Element('foo')
2549 for i in range(10):
2550 el = etree.SubElement(e, 'a%s' % i)
2551 el.text = "text%d" % i
2552 el.tail = "tail%d" % i
2553
2554 child0 = e[0]
2555 child1 = e[1]
2556 child2 = e[2]
2557
2558 e.replace(e[0], e[1])
2559 self.assertEqual(
2560 9, len(e))
2561 self.assertEqual(
2562 child1, e[0])
2563 self.assertEqual(
2564 child1.text, "text1")
2565 self.assertEqual(
2566 child1.tail, "tail1")
2567 self.assertEqual(
2568 child0.tail, "tail0")
2569 self.assertEqual(
2570 child2, e[1])
2571
2572 e.replace(e[-1], e[0])
2573 self.assertEqual(
2574 child1, e[-1])
2575 self.assertEqual(
2576 child1.text, "text1")
2577 self.assertEqual(
2578 child1.tail, "tail1")
2579 self.assertEqual(
2580 child2, e[0])
2581
2583 etree = self.etree
2584 e = etree.Element('foo')
2585 for i in range(10):
2586 etree.SubElement(e, 'a%s' % i)
2587
2588 new_element = etree.Element("test")
2589 new_element.text = "TESTTEXT"
2590 new_element.tail = "TESTTAIL"
2591 child1 = e[1]
2592 e.replace(e[0], new_element)
2593 self.assertEqual(
2594 new_element, e[0])
2595 self.assertEqual(
2596 "TESTTEXT",
2597 e[0].text)
2598 self.assertEqual(
2599 "TESTTAIL",
2600 e[0].tail)
2601 self.assertEqual(
2602 child1, e[1])
2603
2619
2637
2655
2673
2675 Element = self.etree.Element
2676 SubElement = self.etree.SubElement
2677 try:
2678 slice
2679 except NameError:
2680 print("slice() not found")
2681 return
2682
2683 a = Element('a')
2684 b = SubElement(a, 'b')
2685 c = SubElement(a, 'c')
2686 d = SubElement(a, 'd')
2687 e = SubElement(a, 'e')
2688
2689 x = Element('x')
2690 y = Element('y')
2691 z = Element('z')
2692
2693 self.assertRaises(
2694 ValueError,
2695 operator.setitem, a, slice(1,None,2), [x, y, z])
2696
2697 self.assertEqual(
2698 [b, c, d, e],
2699 list(a))
2700
2713
2721
2730
2740
2750
2756
2764
2770
2777
2783
2785 etree = self.etree
2786 xml_header = '<?xml version="1.0" encoding="ascii"?>'
2787 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2788 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2789 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2790
2791 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2792
2793 tree = etree.parse(BytesIO(xml))
2794 docinfo = tree.docinfo
2795 self.assertEqual(docinfo.encoding, "ascii")
2796 self.assertEqual(docinfo.xml_version, "1.0")
2797 self.assertEqual(docinfo.public_id, pub_id)
2798 self.assertEqual(docinfo.system_url, sys_id)
2799 self.assertEqual(docinfo.root_name, 'html')
2800 self.assertEqual(docinfo.doctype, doctype_string)
2801
2817
2829
2841
2847
2849 etree = self.etree
2850 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2851 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2852 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2853
2854 xml = _bytes('<!DOCTYPE root>\n<root/>')
2855 tree = etree.parse(BytesIO(xml))
2856 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2857 etree.tostring(tree, doctype=doctype_string))
2858
2860 etree = self.etree
2861 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2862 self.assertEqual(root.base, "http://no/such/url")
2863 self.assertEqual(
2864 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2865 root.base = "https://secret/url"
2866 self.assertEqual(root.base, "https://secret/url")
2867 self.assertEqual(
2868 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2869 "https://secret/url")
2870
2872 etree = self.etree
2873 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2874 self.assertEqual(root.base, "http://no/such/url")
2875 self.assertEqual(
2876 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2877 root.set('{http://www.w3.org/XML/1998/namespace}base',
2878 "https://secret/url")
2879 self.assertEqual(root.base, "https://secret/url")
2880 self.assertEqual(
2881 root.get('{http://www.w3.org/XML/1998/namespace}base'),
2882 "https://secret/url")
2883
2889
2894
2901
2915
2917 Element = self.etree.Element
2918
2919 a = Element('a')
2920 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2921 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2922
2923 self.assertRaises(ValueError, Element, 'ha\0ho')
2924
2926 Element = self.etree.Element
2927
2928 a = Element('a')
2929 self.assertRaises(ValueError, setattr, a, "text",
2930 _str('ha\0ho'))
2931 self.assertRaises(ValueError, setattr, a, "tail",
2932 _str('ha\0ho'))
2933
2934 self.assertRaises(ValueError, Element,
2935 _str('ha\0ho'))
2936
2938 Element = self.etree.Element
2939
2940 a = Element('a')
2941 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2942 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2943
2944 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2945 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2946
2947 self.assertRaises(ValueError, Element, 'ha\x07ho')
2948 self.assertRaises(ValueError, Element, 'ha\x02ho')
2949
2951 Element = self.etree.Element
2952
2953 a = Element('a')
2954 self.assertRaises(ValueError, setattr, a, "text",
2955 _str('ha\x07ho'))
2956 self.assertRaises(ValueError, setattr, a, "text",
2957 _str('ha\x02ho'))
2958
2959 self.assertRaises(ValueError, setattr, a, "tail",
2960 _str('ha\x07ho'))
2961 self.assertRaises(ValueError, setattr, a, "tail",
2962 _str('ha\x02ho'))
2963
2964 self.assertRaises(ValueError, Element,
2965 _str('ha\x07ho'))
2966 self.assertRaises(ValueError, Element,
2967 _str('ha\x02ho'))
2968
2970 Element = self.etree.Element
2971
2972 a = Element('a')
2973 self.assertRaises(ValueError, setattr, a, "text",
2974 _str('ha\u1234\x07ho'))
2975 self.assertRaises(ValueError, setattr, a, "text",
2976 _str('ha\u1234\x02ho'))
2977
2978 self.assertRaises(ValueError, setattr, a, "tail",
2979 _str('ha\u1234\x07ho'))
2980 self.assertRaises(ValueError, setattr, a, "tail",
2981 _str('ha\u1234\x02ho'))
2982
2983 self.assertRaises(ValueError, Element,
2984 _str('ha\u1234\x07ho'))
2985 self.assertRaises(ValueError, Element,
2986 _str('ha\u1234\x02ho'))
2987
3001
3006
3024
3044
3066
3068 tostring = self.etree.tostring
3069 XML = self.etree.XML
3070 ElementTree = self.etree.ElementTree
3071
3072 root = XML(_bytes("<root/>"))
3073
3074 tree = ElementTree(root)
3075 self.assertEqual(None, tree.docinfo.standalone)
3076
3077 result = tostring(root, xml_declaration=True, encoding="ASCII")
3078 self.assertEqual(result, _bytes(
3079 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3080
3081 result = tostring(root, xml_declaration=True, encoding="ASCII",
3082 standalone=True)
3083 self.assertEqual(result, _bytes(
3084 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3085
3086 tree = ElementTree(XML(result))
3087 self.assertEqual(True, tree.docinfo.standalone)
3088
3089 result = tostring(root, xml_declaration=True, encoding="ASCII",
3090 standalone=False)
3091 self.assertEqual(result, _bytes(
3092 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3093
3094 tree = ElementTree(XML(result))
3095 self.assertEqual(False, tree.docinfo.standalone)
3096
3116
3118 tostring = self.etree.tostring
3119 Element = self.etree.Element
3120 SubElement = self.etree.SubElement
3121
3122 a = Element('a')
3123 a.text = "A"
3124 a.tail = "tail"
3125 b = SubElement(a, 'b')
3126 b.text = "B"
3127 b.tail = _str("Søk på nettet")
3128 c = SubElement(a, 'c')
3129 c.text = "C"
3130
3131 result = tostring(a, method="text", encoding="UTF-16")
3132
3133 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3134 result)
3135
3137 tostring = self.etree.tostring
3138 Element = self.etree.Element
3139 SubElement = self.etree.SubElement
3140
3141 a = Element('a')
3142 a.text = _str('Søk på nettetA')
3143 a.tail = "tail"
3144 b = SubElement(a, 'b')
3145 b.text = "B"
3146 b.tail = _str('Søk på nettetB')
3147 c = SubElement(a, 'c')
3148 c.text = "C"
3149
3150 self.assertRaises(UnicodeEncodeError,
3151 tostring, a, method="text")
3152
3153 self.assertEqual(
3154 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3155 tostring(a, encoding="UTF-8", method="text"))
3156
3169
3185
3189
3204
3222
3235
3237 tostring = self.etree.tostring
3238 Element = self.etree.Element
3239 SubElement = self.etree.SubElement
3240
3241 a = Element('a')
3242 b = SubElement(a, 'b')
3243 c = SubElement(a, 'c')
3244 d = SubElement(c, 'd')
3245 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3246 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3247 self.assertEqual(_bytes('<b></b>'),
3248 canonicalize(tostring(b, encoding=_unicode)))
3249 self.assertEqual(_bytes('<c><d></d></c>'),
3250 canonicalize(tostring(c, encoding=_unicode)))
3251
3256
3271
3273 tostring = self.etree.tostring
3274 Element = self.etree.Element
3275 SubElement = self.etree.SubElement
3276
3277 a = Element('a')
3278 b = SubElement(a, 'b')
3279 c = SubElement(a, 'c')
3280
3281 result = tostring(a, encoding=_unicode)
3282 self.assertEqual(result, "<a><b/><c/></a>")
3283
3284 result = tostring(a, encoding=_unicode, pretty_print=False)
3285 self.assertEqual(result, "<a><b/><c/></a>")
3286
3287 result = tostring(a, encoding=_unicode, pretty_print=True)
3288 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3289
3301
3302
3303
3304 - def _writeElement(self, element, encoding='us-ascii', compression=0):
3315
3316
3319 filename = fileInTestDir('test_broken.xml')
3320 root = etree.XML(_bytes('''\
3321 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
3322 <xi:include href="%s" parse="text"/>
3323 </doc>
3324 ''' % filename))
3325 old_text = root.text
3326 content = read_file(filename)
3327 old_tail = root[0].tail
3328
3329 self.include( etree.ElementTree(root) )
3330 self.assertEqual(old_text + content + old_tail,
3331 root.text)
3332
3344
3346 class res(etree.Resolver):
3347 include_text = read_file(fileInTestDir('test.xml'))
3348 called = {}
3349 def resolve(self, url, id, context):
3350 if url.endswith(".dtd"):
3351 self.called["dtd"] = True
3352 return self.resolve_filename(
3353 fileInTestDir('test.dtd'), context)
3354 elif url.endswith("test_xinclude.xml"):
3355 self.called["input"] = True
3356 return None
3357 else:
3358 self.called["include"] = True
3359 return self.resolve_string(self.include_text, context)
3360
3361 res_instance = res()
3362 parser = etree.XMLParser(load_dtd = True)
3363 parser.resolvers.add(res_instance)
3364
3365 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3366 parser = parser)
3367
3368 self.include(tree)
3369
3370 called = list(res_instance.called.items())
3371 called.sort()
3372 self.assertEqual(
3373 [("dtd", True), ("include", True), ("input", True)],
3374 called)
3375
3379
3380
3385
3386
3389 tree = self.parse(_bytes('<a><b/></a>'))
3390 f = BytesIO()
3391 tree.write_c14n(f)
3392 s = f.getvalue()
3393 self.assertEqual(_bytes('<a><b></b></a>'),
3394 s)
3395
3397 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3398 f = BytesIO()
3399 tree.write_c14n(f, compression=9)
3400 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3401 try:
3402 s = gzfile.read()
3403 finally:
3404 gzfile.close()
3405 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
3406 s)
3407
3419
3435
3453
3465
3477
3479 tree = self.parse(_bytes(
3480 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3481 f = BytesIO()
3482 tree.write_c14n(f)
3483 s = f.getvalue()
3484 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3485 s)
3486 f = BytesIO()
3487 tree.write_c14n(f, exclusive=False)
3488 s = f.getvalue()
3489 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3490 s)
3491 f = BytesIO()
3492 tree.write_c14n(f, exclusive=True)
3493 s = f.getvalue()
3494 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3495 s)
3496
3497 f = BytesIO()
3498 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
3499 s = f.getvalue()
3500 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
3501 s)
3502
3504 tree = self.parse(_bytes(
3505 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3506 s = etree.tostring(tree, method='c14n')
3507 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3508 s)
3509 s = etree.tostring(tree, method='c14n', exclusive=False)
3510 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3511 s)
3512 s = etree.tostring(tree, method='c14n', exclusive=True)
3513 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3514 s)
3515
3516 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3517 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
3518 s)
3519
3521 tree = self.parse(_bytes(
3522 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3523 s = etree.tostring(tree.getroot(), method='c14n')
3524 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3525 s)
3526 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3527 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3528 s)
3529 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3530 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3531 s)
3532
3533 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3534 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3535 s)
3536 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3537 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3538 s)
3539
3540 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
3541 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3542 s)
3543
3545 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
3546 tree = self.parse(_bytes(
3547 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3548
3549 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
3550 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3551 s)
3552
3553
3556 tree = self.parse(_bytes('<a><b/></a>'))
3557 f = BytesIO()
3558 tree.write(f)
3559 s = f.getvalue()
3560 self.assertEqual(_bytes('<a><b/></a>'),
3561 s)
3562
3564 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3565 f = BytesIO()
3566 tree.write(f, compression=9)
3567 gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3568 try:
3569 s = gzfile.read()
3570 finally:
3571 gzfile.close()
3572 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3573 s)
3574
3576 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3577 f = BytesIO()
3578 tree.write(f, compression=0)
3579 s0 = f.getvalue()
3580
3581 f = BytesIO()
3582 tree.write(f)
3583 self.assertEqual(f.getvalue(), s0)
3584
3585 f = BytesIO()
3586 tree.write(f, compression=1)
3587 s = f.getvalue()
3588 self.assertTrue(len(s) <= len(s0))
3589 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3590 try:
3591 s1 = gzfile.read()
3592 finally:
3593 gzfile.close()
3594
3595 f = BytesIO()
3596 tree.write(f, compression=9)
3597 s = f.getvalue()
3598 self.assertTrue(len(s) <= len(s0))
3599 gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3600 try:
3601 s9 = gzfile.read()
3602 finally:
3603 gzfile.close()
3604
3605 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3606 s0)
3607 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3608 s1)
3609 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
3610 s9)
3611
3623
3639
3651
3664
3666 etree = etree
3667
3689
3691 """This can't really be tested as long as there isn't a way to
3692 reset the logging setup ...
3693 """
3694 parse = self.etree.parse
3695
3696 messages = []
3697 class Logger(self.etree.PyErrorLog):
3698 def log(self, entry, message, *args):
3699 messages.append(message)
3700
3701 self.etree.use_global_python_log(Logger())
3702 f = BytesIO('<a><b></c></b></a>')
3703 try:
3704 parse(f)
3705 except SyntaxError:
3706 pass
3707 f.close()
3708
3709 self.assertTrue([ message for message in messages
3710 if 'mismatch' in message ])
3711 self.assertTrue([ message for message in messages
3712 if ':PARSER:' in message])
3713 self.assertTrue([ message for message in messages
3714 if ':ERR_TAG_NAME_MISMATCH:' in message ])
3715 self.assertTrue([ message for message in messages
3716 if ':1:15:' in message ])
3717
3739
3740 if __name__ == '__main__':
3741 print('to test use test.py %s' % __file__)
3742