X Tutup
Skip to content

Commit b35c850

Browse files
committed
python#11684: Complete parser bytes interface by adding BytesHeaderParser
Patch by Steffen Daode Nurpmeso.
1 parent f400ab4 commit b35c850

File tree

5 files changed

+46
-8
lines changed

5 files changed

+46
-8
lines changed

Doc/library/email.parser.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,14 @@ Parser class API
9494
The :class:`Parser` class, imported from the :mod:`email.parser` module,
9595
provides an API that can be used to parse a message when the complete contents
9696
of the message are available in a string or file. The :mod:`email.parser`
97-
module also provides a second class, called :class:`HeaderParser` which can be
98-
used if you're only interested in the headers of the message.
99-
:class:`HeaderParser` can be much faster in these situations, since it does not
100-
attempt to parse the message body, instead setting the payload to the raw body
101-
as a string. :class:`HeaderParser` has the same API as the :class:`Parser`
102-
class.
97+
module also provides header-only parsers, called :class:`HeaderParser` and
98+
:class:`BytesHeaderParser`, which can be used if you're only interested in the
99+
headers of the message. :class:`HeaderParser` and :class:`BytesHeaderParser`
100+
can be much faster in these situations, since they do not attempt to parse the
101+
message body, instead setting the payload to the raw body as a string. They
102+
have the same API as the :class:`Parser` and :class:`BytesParser` classes.
103+
104+
.. versionadded:: 3.3 BytesHeaderParser
103105

104106

105107
.. class:: Parser(_class=email.message.Message)

Lib/email/generator.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,10 +297,12 @@ def _handle_message(self, msg):
297297
# message/rfc822. Such messages are generated by, for example,
298298
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
299299
# in that case we just emit the string body.
300-
payload = msg.get_payload()
300+
payload = msg._payload
301301
if isinstance(payload, list):
302302
g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
303303
payload = s.getvalue()
304+
else:
305+
payload = self._encode(payload)
304306
self._fp.write(payload)
305307

306308
# This used to be a module level function; we use a classmethod for this

Lib/email/parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
"""A parser of RFC 2822 and MIME email messages."""
66

7-
__all__ = ['Parser', 'HeaderParser']
7+
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
88

99
import warnings
1010
from io import StringIO, TextIOWrapper
@@ -114,3 +114,11 @@ def parsebytes(self, text, headersonly=False):
114114
"""
115115
text = text.decode('ASCII', errors='surrogateescape')
116116
return self.parser.parsestr(text, headersonly)
117+
118+
119+
class BytesHeaderParser(BytesParser):
120+
def parse(self, fp, headersonly=True):
121+
return BytesParser.parse(self, fp, headersonly=True)
122+
123+
def parsebytes(self, text, headersonly=True):
124+
return BytesParser.parsebytes(self, text, headersonly=True)

Lib/test/test_email/test_email.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,17 @@ def test_message_rfc822_only(self):
177177
gen.flatten(msg, False)
178178
self.assertEqual(out.getvalue(), msgdata)
179179

180+
def test_byte_message_rfc822_only(self):
181+
# Make sure new bytes header parser also passes this.
182+
with openfile('msg_46.txt', 'rb') as fp:
183+
msgdata = fp.read()
184+
parser = email.parser.BytesHeaderParser()
185+
msg = parser.parsebytes(msgdata)
186+
out = BytesIO()
187+
gen = email.generator.BytesGenerator(out)
188+
gen.flatten(msg)
189+
self.assertEqual(out.getvalue(), msgdata)
190+
180191
def test_get_decoded_payload(self):
181192
eq = self.assertEqual
182193
msg = self._msgobj('msg_10.txt')
@@ -2749,6 +2760,7 @@ def test_pushCR_LF(self):
27492760

27502761

27512762
class TestParsers(TestEmailBase):
2763+
27522764
def test_header_parser(self):
27532765
eq = self.assertEqual
27542766
# Parse only the headers of a complex multipart MIME document
@@ -2760,6 +2772,18 @@ def test_header_parser(self):
27602772
self.assertFalse(msg.is_multipart())
27612773
self.assertTrue(isinstance(msg.get_payload(), str))
27622774

2775+
def test_bytes_header_parser(self):
2776+
eq = self.assertEqual
2777+
# Parse only the headers of a complex multipart MIME document
2778+
with openfile('msg_02.txt', 'rb') as fp:
2779+
msg = email.parser.BytesHeaderParser().parse(fp)
2780+
eq(msg['from'], 'ppp-request@zzz.org')
2781+
eq(msg['to'], 'ppp@zzz.org')
2782+
eq(msg.get_content_type(), 'multipart/mixed')
2783+
self.assertFalse(msg.is_multipart())
2784+
self.assertTrue(isinstance(msg.get_payload(), str))
2785+
self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
2786+
27632787
def test_whitespace_continuation(self):
27642788
eq = self.assertEqual
27652789
# This message contains a line after the Subject: header that has only

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ Core and Builtins
103103
Library
104104
-------
105105

106+
- Issue #11684: complete email.parser bytes API by adding BytesHeaderParser.
107+
106108
- The bz2 module now handles 4GiB+ input buffers correctly.
107109

108110
- Issue #9233: Fix json.loads('{}') to return a dict (instead of a list), when

0 commit comments

Comments
 (0)
X Tutup