X Tutup
Skip to content

Commit c16adce

Browse files
committed
Adjust PullDOM to use a DOMImplementation instance to create new Document
objects; uses minidom if one is not provided to the constructor. parse(): Pick up the default_bufsize default value dynamically so that the value in the module may be (meaningfully) changed at runtime. This (partially) closes patch python#102477.
1 parent adf5410 commit c16adce

File tree

1 file changed

+37
-26
lines changed

1 file changed

+37
-26
lines changed

Lib/xml/dom/pulldom.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import minidom
2-
import xml.sax,xml.sax.handler
1+
import xml.sax
2+
import xml.sax.handler
33

44
START_ELEMENT = "START_ELEMENT"
55
END_ELEMENT = "END_ELEMENT"
@@ -11,23 +11,28 @@
1111
CHARACTERS = "CHARACTERS"
1212

1313
class PullDOM(xml.sax.ContentHandler):
14-
def __init__(self):
14+
_locator = None
15+
document = None
16+
17+
def __init__(self, documentFactory=None):
18+
self.documentFactory = documentFactory
1519
self.firstEvent = [None, None]
1620
self.lastEvent = self.firstEvent
1721
self._ns_contexts = [{}] # contains uri -> prefix dicts
1822
self._current_context = self._ns_contexts[-1]
1923

20-
def setDocumentLocator(self, locator): pass
24+
def setDocumentLocator(self, locator):
25+
self._locator = locator
2126

2227
def startPrefixMapping(self, prefix, uri):
2328
self._ns_contexts.append(self._current_context.copy())
24-
self._current_context[uri] = prefix
29+
self._current_context[uri] = prefix or ''
2530

2631
def endPrefixMapping(self, prefix):
27-
del self._ns_contexts[-1]
32+
self._current_context = self._ns_contexts.pop()
2833

2934
def startElementNS(self, name, tagName , attrs):
30-
uri,localname = name
35+
uri, localname = name
3136
if uri:
3237
# When using namespaces, the reader may or may not
3338
# provide us with the original name. If not, create
@@ -50,8 +55,7 @@ def startElementNS(self, name, tagName , attrs):
5055
attr.value = value
5156
node.setAttributeNode(attr)
5257

53-
parent = self.curNode
54-
node.parentNode = parent
58+
node.parentNode = self.curNode
5559
self.curNode = node
5660

5761
self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -63,7 +67,7 @@ def endElementNS(self, name, tagName):
6367
self.lastEvent[1] = [(END_ELEMENT, node), None]
6468
self.lastEvent = self.lastEvent[1]
6569
#self.events.append((END_ELEMENT, node))
66-
self.curNode = node.parentNode
70+
self.curNode = self.curNode.parentNode
6771

6872
def startElement(self, name, attrs):
6973
node = self.document.createElement(name)
@@ -73,8 +77,7 @@ def startElement(self, name, attrs):
7377
attr.value = value
7478
node.setAttributeNode(attr)
7579

76-
parent = self.curNode
77-
node.parentNode = parent
80+
node.parentNode = self.curNode
7881
self.curNode = node
7982

8083
self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -106,7 +109,7 @@ def processingInstruction(self, target, data):
106109
#self.events.append((PROCESSING_INSTRUCTION, node))
107110

108111
def ignorableWhitespace(self, chars):
109-
node = self.document.createTextNode(chars[start:start + length])
112+
node = self.document.createTextNode(chars)
110113
parent = self.curNode
111114
node.parentNode = parent
112115
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
@@ -121,20 +124,25 @@ def characters(self, chars):
121124
self.lastEvent = self.lastEvent[1]
122125

123126
def startDocument(self):
124-
node = self.curNode = self.document = minidom.Document()
125-
node.parentNode = None
127+
publicId = systemId = None
128+
if self._locator:
129+
publicId = self._locator.getPublicId()
130+
systemId = self._locator.getSystemId()
131+
if self.documentFactory is None:
132+
import xml.dom.minidom
133+
self.documentFactory = xml.dom.minidom.Document.implementation
134+
node = self.documentFactory.createDocument(None, publicId, systemId)
135+
self.curNode = self.document = node
126136
self.lastEvent[1] = [(START_DOCUMENT, node), None]
127137
self.lastEvent = self.lastEvent[1]
128138
#self.events.append((START_DOCUMENT, node))
129139

130140
def endDocument(self):
131-
assert not self.curNode.parentNode
132-
for node in self.curNode.childNodes:
133-
if node.nodeType == node.ELEMENT_NODE:
134-
self.document.documentElement = node
135-
#if not self.document.documentElement:
136-
# raise Error, "No document element"
137-
141+
assert self.curNode.parentNode is None, \
142+
"not all elements have been properly closed"
143+
assert self.curNode.documentElement is not None, \
144+
"document does not contain a root element"
145+
node = self.curNode.documentElement
138146
self.lastEvent[1] = [(END_DOCUMENT, node), None]
139147
#self.events.append((END_DOCUMENT, self.curNode))
140148

@@ -156,7 +164,7 @@ def __init__(self, stream, parser, bufsize):
156164
def reset(self):
157165
self.pulldom = PullDOM()
158166
# This content handler relies on namespace support
159-
self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
167+
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
160168
self.parser.setContentHandler(self.pulldom)
161169

162170
def __getitem__(self, pos):
@@ -179,7 +187,7 @@ def getEvent(self):
179187
if not self.pulldom.firstEvent[1]:
180188
self.pulldom.lastEvent = self.pulldom.firstEvent
181189
while not self.pulldom.firstEvent[1]:
182-
buf=self.stream.read(self.bufsize)
190+
buf = self.stream.read(self.bufsize)
183191
if not buf:
184192
#FIXME: why doesn't Expat close work?
185193
#self.parser.close()
@@ -214,10 +222,13 @@ def characters(self, chars):
214222
node = self.lastEvent[0][1]
215223
node.parentNode.appendChild(node)
216224

225+
217226
default_bufsize = (2 ** 14) - 20
218227

219-
def parse(stream_or_string, parser=None, bufsize=default_bufsize):
220-
if type(stream_or_string) is type(""):
228+
def parse(stream_or_string, parser=None, bufsize=None):
229+
if bufsize is None:
230+
bufsize = default_bufsize
231+
if type(stream_or_string) in [type(""), type(u"")]:
221232
stream = open(stream_or_string)
222233
else:
223234
stream = stream_or_string

0 commit comments

Comments
 (0)
X Tutup