-
-
Notifications
You must be signed in to change notification settings - Fork 34.3k
bpo-32147: Improved perfomance of binascii.unhexlify(). #4586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4215f0a
4f5e429
bf57e77
6e35854
adc9d23
d42cb6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| :func:`binascii.unhexlify` is now up to 2 times faster. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add "Patch by your name." Add your name in |
||
| Patch by Sergey Fedoseev. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1130,21 +1130,6 @@ binascii_hexlify_impl(PyObject *module, Py_buffer *data) | |
| return _Py_strhex_bytes((const char *)data->buf, data->len); | ||
| } | ||
|
|
||
| static int | ||
| to_int(int c) | ||
| { | ||
| if (Py_ISDIGIT(c)) | ||
| return c - '0'; | ||
| else { | ||
| if (Py_ISUPPER(c)) | ||
| c = Py_TOLOWER(c); | ||
| if (c >= 'a' && c <= 'f') | ||
| return c - 'a' + 10; | ||
| } | ||
| return -1; | ||
| } | ||
|
|
||
|
|
||
| /*[clinic input] | ||
| binascii.a2b_hex | ||
|
|
||
|
|
@@ -1187,9 +1172,9 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) | |
| retbuf = PyBytes_AS_STRING(retval); | ||
|
|
||
| for (i=j=0; i < arglen; i += 2) { | ||
| int top = to_int(Py_CHARMASK(argbuf[i])); | ||
| int bot = to_int(Py_CHARMASK(argbuf[i+1])); | ||
| if (top == -1 || bot == -1) { | ||
| unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; | ||
| unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])]; | ||
| if (top >= 16 || bot >= 16) { | ||
| PyErr_SetString(Error, | ||
| "Non-hexadecimal digit found"); | ||
| goto finally; | ||
|
|
@@ -1218,19 +1203,6 @@ binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr) | |
| return binascii_a2b_hex_impl(module, hexstr); | ||
| } | ||
|
|
||
| static const int table_hex[128] = { | ||
| -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, | ||
| -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, | ||
| -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 | ||
| }; | ||
|
|
||
| #define hexval(c) table_hex[(unsigned int)(c)] | ||
|
|
||
| #define MAXLINESIZE 76 | ||
|
|
||
|
|
||
|
|
@@ -1293,9 +1265,9 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) | |
| (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') || | ||
| (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) { | ||
| /* hexval */ | ||
| ch = hexval(ascii_data[in]) << 4; | ||
| ch = _PyLong_DigitValue[ascii_data[in]] << 4; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if replace the above checks with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean like here? bf57e77#diff-37791d6fab7c332f1ef6b333d28b7aceR1261
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, exactly. What were the problems with this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It becomes slower than it was for some reason. After: |
||
| in++; | ||
| ch |= hexval(ascii_data[in]); | ||
| ch |= _PyLong_DigitValue[ascii_data[in]]; | ||
| in++; | ||
| odata[out++] = ch; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add tests for
b'0G',b'G0',b'0g'andb'g0'.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's on to you. Since your initial code shared the table with
binascii.a2b_qpI thought that you perhaps want to keep the relation betweenbinascii.a2b_hexandbinascii.a2b_qp.It is better to not squish intermediate commits. Squishing them makes reviewing harder.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@serhiy-storchaka, sorry I removed my previous comment. I'll modify PR to use
_PyLong_DigitValueinbinascii.a2b_qpa bit later.