X Tutup
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5147,6 +5147,95 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
}


/* UTF-8 encoder using the surrogateescape error handler .

On success, return a pointer to a newly allocated character string (use
PyMem_Free() to free the memory).

On encoding failure, return NULL and write the position of the invalid
surrogate character into *error_pos (if error_pos is set).

On memory allocation failure, return NULL and write (size_t)-1 into
*error_pos (if error_pos is set). */
char*
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
{
const Py_ssize_t max_char_size = 4;
Py_ssize_t len = wcslen(text);

assert(len >= 0);

char *bytes;
if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
bytes = PyMem_Malloc((len + 1) * max_char_size);
}
else {
bytes = NULL;
}
if (bytes == NULL) {
if (error_pos != NULL) {
*error_pos = (size_t)-1;
}
return NULL;
}

char *p = bytes;
Py_ssize_t i;
for (i = 0; i < len;) {
Py_UCS4 ch = text[i++];

if (ch < 0x80) {
/* Encode ASCII */
*p++ = (char) ch;

}
else if (ch < 0x0800) {
/* Encode Latin-1 */
*p++ = (char)(0xc0 | (ch >> 6));
*p++ = (char)(0x80 | (ch & 0x3f));
}
else if (Py_UNICODE_IS_SURROGATE(ch)) {
/* surrogateescape error handler */
if (!(0xDC80 <= ch && ch <= 0xDCFF)) {
if (error_pos != NULL) {
*error_pos = (size_t)i - 1;
}
goto error;
}
*p++ = (char)(ch & 0xff);
}
else if (ch < 0x10000) {
*p++ = (char)(0xe0 | (ch >> 12));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
}
else { /* ch >= 0x10000 */
assert(ch <= MAX_UNICODE);
/* Encode UCS4 Unicode ordinals */
*p++ = (char)(0xf0 | (ch >> 18));
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
}
}
*p++ = '\0';

size_t final_size = (p - bytes);
char *bytes2 = PyMem_Realloc(bytes, final_size);
if (bytes2 == NULL) {
if (error_pos != NULL) {
*error_pos = (size_t)-1;
}
goto error;
}
return bytes2;

error:
PyMem_Free(bytes);
return NULL;
}


/* Primary internal function which creates utf8 encoded bytes objects.

Allocation strategy: if the string is short, convert into a stack buffer
Expand Down
42 changes: 4 additions & 38 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ extern int winerror_to_errno(int);

extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
size_t *p_wlen);
extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
size_t *error_pos);

#ifdef O_CLOEXEC
/* Does open() support the O_CLOEXEC flag? Possible values:
Expand Down Expand Up @@ -418,42 +420,6 @@ Py_DecodeLocale(const char* arg, size_t *size)
#endif /* __APPLE__ or __ANDROID__ */
}

static char*
_Py_EncodeLocaleUTF8(const wchar_t *text, size_t *error_pos)
{
Py_ssize_t len;
PyObject *unicode, *bytes = NULL;
char *cpath;

unicode = PyUnicode_FromWideChar(text, wcslen(text));
if (unicode == NULL) {
return NULL;
}

bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
Py_DECREF(unicode);
if (bytes == NULL) {
PyErr_Clear();
if (error_pos != NULL) {
*error_pos = (size_t)-1;
}
return NULL;
}

len = PyBytes_GET_SIZE(bytes);
cpath = PyMem_Malloc(len+1);
if (cpath == NULL) {
PyErr_Clear();
Py_DECREF(bytes);
if (error_pos != NULL) {
*error_pos = (size_t)-1;
}
return NULL;
}
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
Py_DECREF(bytes);
return cpath;
}

#if !defined(__APPLE__) && !defined(__ANDROID__)
static char*
Expand Down Expand Up @@ -537,10 +503,10 @@ char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeLocaleUTF8(text, error_pos);
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
return _Py_EncodeLocaleUTF8(text, error_pos);
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
}

#ifndef MS_WINDOWS
Expand Down
X Tutup