X Tutup
Skip to content

Commit ee3074e

Browse files
committed
Issue python#22088: Clarify base-64 alphabets and which characters are discarded
* There are only two base-64 alphabets defined by the RFCs, not three * Due to the internal translation, plus (+) and slash (/) are never discarded * standard_ and urlsafe_b64decode() discard characters as well Also update the doc strings to clarify data types, based on revision 92760d2edc9e, correct the exception raised by b16decode(), and correct the parameter name for the base-85 functions.
1 parent e1d4e58 commit ee3074e

File tree

3 files changed

+92
-79
lines changed

3 files changed

+92
-79
lines changed

Doc/library/base64.rst

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ POST request. The encoding algorithm is not the same as the
2424
There are two interfaces provided by this module. The modern interface
2525
supports encoding :term:`bytes-like objects <bytes-like object>` to ASCII
2626
:class:`bytes`, and decoding :term:`bytes-like objects <bytes-like object>` or
27-
strings containing ASCII to :class:`bytes`. All three :rfc:`3548` defined
28-
alphabets (normal, URL-safe, and filesystem-safe) are supported.
27+
strings containing ASCII to :class:`bytes`. Both base-64 alphabets
28+
defined in :rfc:`3548` (normal, and URL- and filesystem-safe) are supported.
2929

3030
The legacy interface does not support decoding from strings, but it does
3131
provide functions for encoding and decoding to and from :term:`file objects
@@ -69,9 +69,10 @@ The modern interface provides:
6969
A :exc:`binascii.Error` exception is raised
7070
if *s* is incorrectly padded.
7171

72-
If *validate* is ``False`` (the default), non-base64-alphabet characters are
72+
If *validate* is ``False`` (the default), characters that are neither
73+
in the normal base-64 alphabet nor the alternative alphabet are
7374
discarded prior to the padding check. If *validate* is ``True``,
74-
non-base64-alphabet characters in the input result in a
75+
these non-alphabet characters in the input result in a
7576
:exc:`binascii.Error`.
7677

7778

@@ -89,15 +90,17 @@ The modern interface provides:
8990

9091
.. function:: urlsafe_b64encode(s)
9192

92-
Encode :term:`bytes-like object` *s* using a URL-safe alphabet, which
93+
Encode :term:`bytes-like object` *s* using the
94+
URL- and filesystem-safe alphabet, which
9395
substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the
9496
standard Base64 alphabet, and return the encoded :class:`bytes`. The result
9597
can still contain ``=``.
9698

9799

98100
.. function:: urlsafe_b64decode(s)
99101

100-
Decode :term:`bytes-like object` or ASCII string *s* using a URL-safe
102+
Decode :term:`bytes-like object` or ASCII string *s*
103+
using the URL- and filesystem-safe
101104
alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of
102105
``/`` in the standard Base64 alphabet, and return the decoded
103106
:class:`bytes`.
@@ -145,14 +148,14 @@ The modern interface provides:
145148
lowercase alphabet is acceptable as input. For security purposes, the default
146149
is ``False``.
147150

148-
A :exc:`TypeError` is raised if *s* is
151+
A :exc:`binascii.Error` is raised if *s* is
149152
incorrectly padded or if there are non-alphabet characters present in the
150153
input.
151154

152155

153-
.. function:: a85encode(s, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
156+
.. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
154157

155-
Encode the :term:`bytes-like object` *s* using Ascii85 and return the
158+
Encode the :term:`bytes-like object` *b* using Ascii85 and return the
156159
encoded :class:`bytes`.
157160

158161
*foldspaces* is an optional flag that uses the special short sequence 'y'
@@ -172,9 +175,9 @@ The modern interface provides:
172175
.. versionadded:: 3.4
173176

174177

175-
.. function:: a85decode(s, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
178+
.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
176179

177-
Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *s* and
180+
Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and
178181
return the decoded :class:`bytes`.
179182

180183
*foldspaces* is a flag that specifies whether the 'y' short sequence
@@ -192,9 +195,9 @@ The modern interface provides:
192195
.. versionadded:: 3.4
193196

194197

195-
.. function:: b85encode(s, pad=False)
198+
.. function:: b85encode(b, pad=False)
196199

197-
Encode the :term:`bytes-like object` *s* using base85 (as used in e.g.
200+
Encode the :term:`bytes-like object` *b* using base85 (as used in e.g.
198201
git-style binary diffs) and return the encoded :class:`bytes`.
199202

200203
If *pad* is true, the input is padded with ``b'\0'`` so its length is a

Lib/base64.py

Lines changed: 58 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313

1414
__all__ = [
15-
# Legacy interface exports traditional RFC 1521 Base64 encodings
15+
# Legacy interface exports traditional RFC 2045 Base64 encodings
1616
'encode', 'decode', 'encodebytes', 'decodebytes',
1717
# Generalized interface for other encodings
1818
'b64encode', 'b64decode', 'b32encode', 'b32decode',
@@ -49,14 +49,11 @@ def _bytes_from_decode_data(s):
4949
# Base64 encoding/decoding uses binascii
5050

5151
def b64encode(s, altchars=None):
52-
"""Encode a byte string using Base64.
52+
"""Encode the bytes-like object s using Base64 and return a bytes object.
5353
54-
s is the byte string to encode. Optional altchars must be a byte
55-
string of length 2 which specifies an alternative alphabet for the
56-
'+' and '/' characters. This allows an application to
57-
e.g. generate url or filesystem safe Base64 strings.
58-
59-
The encoded byte string is returned.
54+
Optional altchars should be a byte string of length 2 which specifies an
55+
alternative alphabet for the '+' and '/' characters. This allows an
56+
application to e.g. generate url or filesystem safe Base64 strings.
6057
"""
6158
# Strip off the trailing newline
6259
encoded = binascii.b2a_base64(s)[:-1]
@@ -67,18 +64,19 @@ def b64encode(s, altchars=None):
6764

6865

6966
def b64decode(s, altchars=None, validate=False):
70-
"""Decode a Base64 encoded byte string.
67+
"""Decode the Base64 encoded bytes-like object or ASCII string s.
7168
72-
s is the byte string to decode. Optional altchars must be a
73-
string of length 2 which specifies the alternative alphabet used
74-
instead of the '+' and '/' characters.
69+
Optional altchars must be a bytes-like object or ASCII string of length 2
70+
which specifies the alternative alphabet used instead of the '+' and '/'
71+
characters.
7572
76-
The decoded string is returned. A binascii.Error is raised if s is
77-
incorrectly padded.
73+
The result is returned as a bytes object. A binascii.Error is raised if
74+
s is incorrectly padded.
7875
79-
If validate is False (the default), non-base64-alphabet characters are
80-
discarded prior to the padding check. If validate is True,
81-
non-base64-alphabet characters in the input result in a binascii.Error.
76+
If validate is False (the default), characters that are neither in the
77+
normal base-64 alphabet nor the alternative alphabet are discarded prior
78+
to the padding check. If validate is True, these non-alphabet characters
79+
in the input result in a binascii.Error.
8280
"""
8381
s = _bytes_from_decode_data(s)
8482
if altchars is not None:
@@ -91,19 +89,19 @@ def b64decode(s, altchars=None, validate=False):
9189

9290

9391
def standard_b64encode(s):
94-
"""Encode a byte string using the standard Base64 alphabet.
92+
"""Encode bytes-like object s using the standard Base64 alphabet.
9593
96-
s is the byte string to encode. The encoded byte string is returned.
94+
The result is returned as a bytes object.
9795
"""
9896
return b64encode(s)
9997

10098
def standard_b64decode(s):
101-
"""Decode a byte string encoded with the standard Base64 alphabet.
99+
"""Decode bytes encoded with the standard Base64 alphabet.
102100
103-
s is the byte string to decode. The decoded byte string is
104-
returned. binascii.Error is raised if the input is incorrectly
105-
padded or if there are non-alphabet characters present in the
106-
input.
101+
Argument s is a bytes-like object or ASCII string to decode. The result
102+
is returned as a bytes object. A binascii.Error is raised if the input
103+
is incorrectly padded. Characters that are not in the standard alphabet
104+
are discarded prior to the padding check.
107105
"""
108106
return b64decode(s)
109107

@@ -112,21 +110,22 @@ def standard_b64decode(s):
112110
_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
113111

114112
def urlsafe_b64encode(s):
115-
"""Encode a byte string using a url-safe Base64 alphabet.
113+
"""Encode bytes using the URL- and filesystem-safe Base64 alphabet.
116114
117-
s is the byte string to encode. The encoded byte string is
118-
returned. The alphabet uses '-' instead of '+' and '_' instead of
115+
Argument s is a bytes-like object to encode. The result is returned as a
116+
bytes object. The alphabet uses '-' instead of '+' and '_' instead of
119117
'/'.
120118
"""
121119
return b64encode(s).translate(_urlsafe_encode_translation)
122120

123121
def urlsafe_b64decode(s):
124-
"""Decode a byte string encoded with the standard Base64 alphabet.
122+
"""Decode bytes using the URL- and filesystem-safe Base64 alphabet.
125123
126-
s is the byte string to decode. The decoded byte string is
127-
returned. binascii.Error is raised if the input is incorrectly
128-
padded or if there are non-alphabet characters present in the
129-
input.
124+
Argument s is a bytes-like object or ASCII string to decode. The result
125+
is returned as a bytes object. A binascii.Error is raised if the input
126+
is incorrectly padded. Characters that are not in the URL-safe base-64
127+
alphabet, and are not a plus '+' or slash '/', are discarded prior to the
128+
padding check.
130129
131130
The alphabet uses '-' instead of '+' and '_' instead of '/'.
132131
"""
@@ -142,9 +141,7 @@ def urlsafe_b64decode(s):
142141
_b32rev = None
143142

144143
def b32encode(s):
145-
"""Encode a byte string using Base32.
146-
147-
s is the byte string to encode. The encoded byte string is returned.
144+
"""Encode the bytes-like object s using Base32 and return a bytes object.
148145
"""
149146
global _b32tab2
150147
# Delay the initialization of the table to not waste memory
@@ -182,11 +179,10 @@ def b32encode(s):
182179
return bytes(encoded)
183180

184181
def b32decode(s, casefold=False, map01=None):
185-
"""Decode a Base32 encoded byte string.
182+
"""Decode the Base32 encoded bytes-like object or ASCII string s.
186183
187-
s is the byte string to decode. Optional casefold is a flag
188-
specifying whether a lowercase alphabet is acceptable as input.
189-
For security purposes, the default is False.
184+
Optional casefold is a flag specifying whether a lowercase alphabet is
185+
acceptable as input. For security purposes, the default is False.
190186
191187
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
192188
letter O (oh), and for optional mapping of the digit 1 (one) to
@@ -196,7 +192,7 @@ def b32decode(s, casefold=False, map01=None):
196192
the letter O). For security purposes the default is None, so that
197193
0 and 1 are not allowed in the input.
198194
199-
The decoded byte string is returned. binascii.Error is raised if
195+
The result is returned as a bytes object. A binascii.Error is raised if
200196
the input is incorrectly padded or if there are non-alphabet
201197
characters present in the input.
202198
"""
@@ -257,23 +253,20 @@ def b32decode(s, casefold=False, map01=None):
257253
# lowercase. The RFC also recommends against accepting input case
258254
# insensitively.
259255
def b16encode(s):
260-
"""Encode a byte string using Base16.
261-
262-
s is the byte string to encode. The encoded byte string is returned.
256+
"""Encode the bytes-like object s using Base16 and return a bytes object.
263257
"""
264258
return binascii.hexlify(s).upper()
265259

266260

267261
def b16decode(s, casefold=False):
268-
"""Decode a Base16 encoded byte string.
262+
"""Decode the Base16 encoded bytes-like object or ASCII string s.
269263
270-
s is the byte string to decode. Optional casefold is a flag
271-
specifying whether a lowercase alphabet is acceptable as input.
272-
For security purposes, the default is False.
264+
Optional casefold is a flag specifying whether a lowercase alphabet is
265+
acceptable as input. For security purposes, the default is False.
273266
274-
The decoded byte string is returned. binascii.Error is raised if
275-
s were incorrectly padded or if there are non-alphabet characters
276-
present in the string.
267+
The result is returned as a bytes object. A binascii.Error is raised if
268+
s is incorrectly padded or if there are non-alphabet characters present
269+
in the input.
277270
"""
278271
s = _bytes_from_decode_data(s)
279272
if casefold:
@@ -316,19 +309,17 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
316309
return b''.join(chunks)
317310

318311
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
319-
"""Encode a byte string using Ascii85.
320-
321-
b is the byte string to encode. The encoded byte string is returned.
312+
"""Encode bytes-like object b using Ascii85 and return a bytes object.
322313
323314
foldspaces is an optional flag that uses the special short sequence 'y'
324315
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
325316
feature is not supported by the "standard" Adobe encoding.
326317
327-
wrapcol controls whether the output should have newline ('\\n') characters
318+
wrapcol controls whether the output should have newline (b'\\n') characters
328319
added to it. If this is non-zero, each output line will be at most this
329320
many characters long.
330321
331-
pad controls whether the input string is padded to a multiple of 4 before
322+
pad controls whether the input is padded to a multiple of 4 before
332323
encoding. Note that the btoa implementation always pads.
333324
334325
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
@@ -359,9 +350,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
359350
return result
360351

361352
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
362-
"""Decode an Ascii85 encoded byte string.
363-
364-
s is the byte string to decode.
353+
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
365354
366355
foldspaces is a flag that specifies whether the 'y' short sequence should be
367356
accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
@@ -373,6 +362,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
373362
ignorechars should be a byte string containing characters to ignore from the
374363
input. This should only contain whitespace characters, and by default
375364
contains all whitespace characters in ASCII.
365+
366+
The result is returned as a bytes object.
376367
"""
377368
b = _bytes_from_decode_data(b)
378369
if adobe:
@@ -432,10 +423,10 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
432423
_b85dec = None
433424

434425
def b85encode(b, pad=False):
435-
"""Encode an ASCII-encoded byte array in base85 format.
426+
"""Encode bytes-like object b in base85 format and return a bytes object.
436427
437-
If pad is true, the input is padded with "\\0" so its length is a multiple of
438-
4 characters before encoding.
428+
If pad is true, the input is padded with b'\\0' so its length is a multiple of
429+
4 bytes before encoding.
439430
"""
440431
global _b85chars, _b85chars2
441432
# Delay the initialization of tables to not waste memory
@@ -446,7 +437,10 @@ def b85encode(b, pad=False):
446437
return _85encode(b, _b85chars, _b85chars2, pad)
447438

448439
def b85decode(b):
449-
"""Decode base85-encoded byte array"""
440+
"""Decode the base85-encoded bytes-like object or ASCII string b
441+
442+
The result is returned as a bytes object.
443+
"""
450444
global _b85dec
451445
# Delay the initialization of tables to not waste memory
452446
# if the function is never called
@@ -531,7 +525,7 @@ def _input_type_check(s):
531525

532526

533527
def encodebytes(s):
534-
"""Encode a bytestring into a bytestring containing multiple lines
528+
"""Encode a bytestring into a bytes object containing multiple lines
535529
of base-64 data."""
536530
_input_type_check(s)
537531
pieces = []
@@ -549,7 +543,7 @@ def encodestring(s):
549543

550544

551545
def decodebytes(s):
552-
"""Decode a bytestring of base-64 data into a bytestring."""
546+
"""Decode a bytestring of base-64 data into a bytes object."""
553547
_input_type_check(s)
554548
return binascii.a2b_base64(s)
555549

Lib/test/test_base64.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,26 @@ def test_b64decode_invalid_chars(self):
243243
(b'@@', b''),
244244
(b'!', b''),
245245
(b'YWJj\nYWI=', b'abcab'))
246+
funcs = (
247+
base64.b64decode,
248+
base64.standard_b64decode,
249+
base64.urlsafe_b64decode,
250+
)
246251
for bstr, res in tests:
247-
self.assertEqual(base64.b64decode(bstr), res)
248-
self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
252+
for func in funcs:
253+
with self.subTest(bstr=bstr, func=func):
254+
self.assertEqual(func(bstr), res)
255+
self.assertEqual(func(bstr.decode('ascii')), res)
249256
with self.assertRaises(binascii.Error):
250257
base64.b64decode(bstr, validate=True)
251258
with self.assertRaises(binascii.Error):
252259
base64.b64decode(bstr.decode('ascii'), validate=True)
253260

261+
# Normal alphabet characters not discarded when alternative given
262+
res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
263+
self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
264+
self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
265+
254266
def test_b32encode(self):
255267
eq = self.assertEqual
256268
eq(base64.b32encode(b''), b'')
@@ -360,6 +372,10 @@ def test_b16decode(self):
360372
b'\x01\x02\xab\xcd\xef')
361373
eq(base64.b16decode(array('B', b"0102abcdef"), True),
362374
b'\x01\x02\xab\xcd\xef')
375+
# Non-alphabet characters
376+
self.assertRaises(binascii.Error, base64.b16decode, '0102AG')
377+
# Incorrect "padding"
378+
self.assertRaises(binascii.Error, base64.b16decode, '010')
363379

364380
def test_a85encode(self):
365381
eq = self.assertEqual

0 commit comments

Comments
 (0)
X Tutup