diff -r b5530669ef70 Lib/idlelib/PyShell.py
--- a/Lib/idlelib/PyShell.py Wed Sep 04 20:52:14 2013 +0200
+++ b/Lib/idlelib/PyShell.py Thu Sep 05 13:52:14 2013 +0300
@@ -1271,16 +1271,6 @@
self.set_line_and_column()
def write(self, s, tags=()):
- if isinstance(s, str) and len(s) and max(s) > '\uffff':
- # Tk doesn't support outputting non-BMP characters
- # Let's assume what printed string is not very long,
- # find first non-BMP character and construct informative
- # UnicodeEncodeError exception.
- for start, char in enumerate(s):
- if char > '\uffff':
- break
- raise UnicodeEncodeError("UCS-2", char, start, start+1,
- 'Non-BMP character not supported in Tk')
try:
self.text.mark_gravity("iomark", "right")
count = OutputWindow.write(self, s, tags, "iomark")
diff -r b5530669ef70 Lib/test/test_tcl.py
--- a/Lib/test/test_tcl.py Wed Sep 04 20:52:14 2013 +0200
+++ b/Lib/test/test_tcl.py Thu Sep 05 13:52:14 2013 +0300
@@ -163,19 +163,50 @@
self.assertEqual(passValue(False), False)
self.assertEqual(passValue('string'), 'string')
self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
+ self.assertEqual(passValue('string\ud801'), 'string\ud801')
+ self.assertEqual(passValue('string\ud801\udca2'), 'string\ud801\udca2')
+ self.assertEqual(passValue('string\U000104a2'), 'string\U000104a2')
+ self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
+ self.assertEqual(passValue(b'str\x00ing'), 'str\x00ing')
+ self.assertEqual(passValue(b'str\xc0\x80ing'), 'str\x00ing')
for i in (0, 1, -1, 2**31-1, -2**31):
self.assertEqual(passValue(i), i)
for f in (0.0, 1.0, -1.0, 1/3,
sys.float_info.min, sys.float_info.max,
-sys.float_info.min, -sys.float_info.max):
self.assertEqual(passValue(f), f)
- for f in float('nan'), float('inf'), -float('inf'):
- if f != f: # NaN
- self.assertNotEqual(passValue(f), f)
- else:
- self.assertEqual(passValue(f), f)
+ self.assertEqual(passValue(float('inf')), float('inf'))
+ self.assertEqual(passValue(-float('inf')), -float('inf'))
+ x = passValue(float('nan'))
+ self.assertNotEqual(x, x)
self.assertEqual(passValue((1, '2', (3.4,))), (1, '2', (3.4,)))
+ def test_user_command(self):
+ self.interp.createcommand('testfunc', lambda arg: arg)
+ def testfunc(value):
+ return self.interp.call('testfunc', value)
+
+ self.assertEqual(testfunc(True), '1')
+ self.assertEqual(testfunc(False), '0')
+ self.assertEqual(testfunc('string'), 'string')
+ self.assertEqual(testfunc('string\u20ac'), 'string\u20ac')
+ self.assertEqual(testfunc('string\udca2'), 'string\ufffd\ufffd\ufffd')
+ self.assertEqual(testfunc('string\ud801\udca2'),
+ 'string\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd')
+ self.assertEqual(testfunc('string\U000104a2'), 'string\U000104a2')
+ #self.assertEqual(testfunc('str\x00ing'), 'str\x00ing')
+ for i in (0, 1, -1, 2**31-1, -2**31):
+ self.assertEqual(testfunc(i), str(i))
+ for f in (0.0, 1.0, -1.0, 1/3,
+ sys.float_info.min, sys.float_info.max,
+ -sys.float_info.min, -sys.float_info.max):
+ self.assertEqual(testfunc(f), str(f))
+ self.assertEqual(testfunc(float('inf')), 'Inf')
+ self.assertEqual(testfunc(-float('inf')), '-Inf')
+ self.assertEqual(testfunc(float('nan')), 'NaN')
+ self.assertEqual(testfunc(()), '')
+ self.assertEqual(testfunc((1, '2', (3.4,))), '1 2 3.4')
+
def test_splitlist(self):
splitlist = self.interp.tk.splitlist
call = self.interp.tk.call
diff -r b5530669ef70 Modules/_tkinter.c
--- a/Modules/_tkinter.c Wed Sep 04 20:52:14 2013 +0200
+++ b/Modules/_tkinter.c Thu Sep 05 13:52:14 2013 +0300
@@ -343,6 +343,44 @@
static PyObject *
+fromTclStringAndSize(const char *s, Py_ssize_t size)
+{
+ PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL);
+ if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ char *buf = NULL;
+ PyErr_Clear();
+ /* Tcl encodes null character as \xc0\x80 */
+ if (memchr(s, '\xc0', size)) {
+ const char *e = s + size;
+ char *q = buf = (char *)PyMem_Malloc(size);
+ if (buf == NULL)
+ return NULL;
+ while (s < e) {
+ if (s + 1 < e && s[0] == '\xc0' && s[1] == '\x80') {
+ *q++ = '\0';
+ s += 2;
+ }
+ else
+ *q++ = *s++;
+ }
+ s = buf;
+ size = q - s;
+ }
+ r = PyUnicode_DecodeUTF8(s, size, "replace");
+ if (buf != NULL)
+ PyMem_Free(buf);
+ }
+ return r;
+}
+
+static PyObject *
+fromTclString(const char *s)
+{
+ return fromTclStringAndSize(s, strlen(s));
+}
+
+
+static PyObject *
Split(char *list)
{
int argc;
@@ -358,13 +396,13 @@
* Could be a quoted string containing funnies, e.g. {"}.
* Return the string itself.
*/
- return PyUnicode_FromString(list);
+ return fromTclString(list);
}
if (argc == 0)
v = PyUnicode_FromString("");
else if (argc == 1)
- v = PyUnicode_FromString(argv[0]);
+ v = fromTclString(argv[0]);
else if ((v = PyTuple_New(argc)) != NULL) {
int i;
PyObject *w;
@@ -712,7 +750,7 @@
int len;
if (!self->string) {
s = Tcl_GetStringFromObj(self->value, &len);
- self->string = PyUnicode_FromStringAndSize(s, len);
+ self->string = fromTclStringAndSize(s, len);
if (!self->string)
return NULL;
}
@@ -801,7 +839,7 @@
static PyObject*
get_typename(PyTclObject* obj, void* ignored)
{
- return PyUnicode_FromString(obj->value->typePtr->name);
+ return fromTclString(obj->value->typePtr->name);
}
@@ -888,6 +926,27 @@
return NULL;
}
kind = PyUnicode_KIND(value);
+ if (kind == sizeof(Tcl_UniChar))
+ return Tcl_NewUnicodeObj(inbuf, size);
+ if (PyUnicode_IS_COMPACT_ASCII(value) || kind > sizeof(Tcl_UniChar)) {
+ PyObject *bytes = NULL;
+ const char *utf8 = PyUnicode_AsUTF8AndSize(value, &size);
+ if (utf8 == NULL) {
+ bytes = PyUnicode_AsEncodedString(value, "utf-8", "surrogatepass");
+ if (bytes == NULL)
+ return NULL;
+ utf8 = PyBytes_AS_STRING(bytes);
+ size = PyBytes_GET_SIZE(bytes);
+ }
+ if (size > INT_MAX) {
+ PyErr_Format(Tkinter_TclError, "string too long");
+ Py_XDECREF(bytes);
+ return NULL;
+ }
+ result = Tcl_NewStringObj(utf8, size);
+ Py_XDECREF(bytes);
+ return result;
+ }
allocsize = ((size_t)size) * sizeof(Tcl_UniChar);
outbuf = (Tcl_UniChar*)ckalloc(allocsize);
/* Else overflow occurred, and we take the next exit */
@@ -895,23 +954,8 @@
PyErr_NoMemory();
return NULL;
}
- for (i = 0; i < size; i++) {
- Py_UCS4 ch = PyUnicode_READ(kind, inbuf, i);
- /* We cannot test for sizeof(Tcl_UniChar) directly,
- so we test for UTF-8 size instead. */
-#if TCL_UTF_MAX == 3
- if (ch >= 0x10000) {
- /* Tcl doesn't do UTF-16, yet. */
- PyErr_Format(Tkinter_TclError,
- "character U+%x is above the range "
- "(U+0000-U+FFFF) allowed by Tcl",
- ch);
- ckfree(FREECAST outbuf);
- return NULL;
- }
-#endif
- outbuf[i] = ch;
- }
+ for (i = 0; i < size; i++)
+ outbuf[i] = (Tcl_UniChar)PyUnicode_READ(kind, inbuf, i);
result = Tcl_NewUnicodeObj(outbuf, size);
ckfree(FREECAST outbuf);
return result;
@@ -938,8 +982,7 @@
TkappObject *app = (TkappObject*)tkapp;
if (value->typePtr == NULL) {
- return PyUnicode_FromStringAndSize(value->bytes,
- value->length);
+ return fromTclStringAndSize(value->bytes, value->length);
}
if (value->typePtr == app->BooleanType) {
@@ -996,15 +1039,9 @@
}
if (value->typePtr == app->StringType) {
-#if TCL_UTF_MAX==3
return PyUnicode_FromKindAndData(
- PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value),
+ sizeof(Tcl_UniChar), Tcl_GetUnicode(value),
Tcl_GetCharLength(value));
-#else
- return PyUnicode_FromKindAndData(
- PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value),
- Tcl_GetCharLength(value));
-#endif
}
return newPyTclObject(value);
@@ -1110,7 +1147,7 @@
const char *s = Tcl_GetStringResult(self->interp);
const char *p = s;
- res = PyUnicode_FromStringAndSize(s, (int)(p-s));
+ res = fromTclStringAndSize(s, (int)(p-s));
}
return res;
}
@@ -1265,7 +1302,7 @@
if (err == TCL_ERROR)
res = Tkinter_Error(self);
else
- res = PyUnicode_FromString(Tkapp_Result(self));
+ res = fromTclString(Tkapp_Result(self));
LEAVE_OVERLAP_TCL
return res;
}
@@ -1289,7 +1326,7 @@
res = Tkinter_Error(self);
else
- res = PyUnicode_FromString(Tkapp_Result(self));
+ res = fromTclString(Tkapp_Result(self));
LEAVE_OVERLAP_TCL
return res;
}
@@ -1312,7 +1349,7 @@
if (err == TCL_ERROR)
res = Tkinter_Error(self);
else
- res = PyUnicode_FromString(Tkapp_Result(self));
+ res = fromTclString(Tkapp_Result(self));
LEAVE_OVERLAP_TCL
return res;
}
@@ -1535,7 +1572,7 @@
res = FromObj(self, tres);
}
else {
- res = PyUnicode_FromString(Tcl_GetString(tres));
+ res = fromTclString(Tcl_GetString(tres));
}
}
LEAVE_OVERLAP_TCL
@@ -1674,7 +1711,7 @@
if (retval == TCL_ERROR)
res = Tkinter_Error(self);
else
- res = Py_BuildValue("s", Tkapp_Result(self));
+ res = fromTclString(Tkapp_Result(self));
LEAVE_OVERLAP_TCL
return res;
}
@@ -1799,7 +1836,7 @@
goto finally;
for (i = 0; i < argc; i++) {
- PyObject *s = PyUnicode_FromString(argv[i]);
+ PyObject *s = fromTclString(argv[i]);
if (!s || PyTuple_SetItem(v, i, s)) {
Py_DECREF(v);
v = NULL;
@@ -1897,20 +1934,8 @@
return PythonCmd_Error(interp);
for (i = 0; i < (argc - 1); i++) {
- PyObject *s = PyUnicode_FromString(argv[i + 1]);
- if (!s) {
- /* Is Tk leaking 0xC080 in %A - a "modified" utf-8 null? */
- if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) &&
- !strcmp(argv[i + 1], "\xC0\x80")) {
- PyErr_Clear();
- /* Convert to "strict" utf-8 null */
- s = PyUnicode_FromString("\0");
- } else {
- Py_DECREF(arg);
- return PythonCmd_Error(interp);
- }
- }
- if (PyTuple_SetItem(arg, i, s)) {
+ PyObject *s = fromTclString(argv[i + 1]);
+ if (!s || PyTuple_SetItem(arg, i, s)) {
Py_DECREF(arg);
return PythonCmd_Error(interp);
}