X Tutup
Skip to content

Commit 2e89e21

Browse files
jsmnbomjh0ker
authored andcommitted
Fix text_markdown and text_html (python-telegram-bot#623)
* Fix text_markdown and text_html * Missed a few narrow build checks * Added tests for emoji-first strings and emojis in url
1 parent 6479e15 commit 2e89e21

File tree

2 files changed

+57
-6
lines changed

2 files changed

+57
-6
lines changed

telegram/message.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,9 @@ def text_html(self):
628628
"""
629629
entities = self.parse_entities()
630630
message_text = self.text
631+
if not sys.maxunicode == 0xffff:
632+
message_text = message_text.encode('utf-16-le')
633+
631634
markdown_text = ''
632635
last_offset = 0
633636

@@ -647,10 +650,18 @@ def text_html(self):
647650
else:
648651
insert = text
649652

650-
markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert
653+
if sys.maxunicode == 0xffff:
654+
markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert
655+
else:
656+
markdown_text += escape_html(message_text[last_offset * 2:entity.offset * 2]
657+
.decode('utf-16-le')) + insert
658+
651659
last_offset = entity.offset + entity.length
652660

653-
markdown_text += escape_html(message_text[last_offset:])
661+
if sys.maxunicode == 0xffff:
662+
markdown_text += escape_html(message_text[last_offset:])
663+
else:
664+
markdown_text += escape_html(message_text[last_offset * 2:].decode('utf-16-le'))
654665
return markdown_text
655666

656667
@property
@@ -667,6 +678,9 @@ def text_markdown(self):
667678
"""
668679
entities = self.parse_entities()
669680
message_text = self.text
681+
if not sys.maxunicode == 0xffff:
682+
message_text = message_text.encode('utf-16-le')
683+
670684
markdown_text = ''
671685
last_offset = 0
672686

@@ -685,9 +699,16 @@ def text_markdown(self):
685699
insert = '```' + text + '```'
686700
else:
687701
insert = text
702+
if sys.maxunicode == 0xffff:
703+
markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert
704+
else:
705+
markdown_text += escape_markdown(message_text[last_offset * 2:entity.offset * 2]
706+
.decode('utf-16-le')) + insert
688707

689-
markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert
690708
last_offset = entity.offset + entity.length
691709

692-
markdown_text += escape_markdown(message_text[last_offset:])
710+
if sys.maxunicode == 0xffff:
711+
markdown_text += escape_markdown(message_text[last_offset:])
712+
else:
713+
markdown_text += escape_markdown(message_text[last_offset * 2:].decode('utf-16-le'))
693714
return markdown_text

tests/test_message.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,46 @@ def test_parse_entities(self):
9898
{entity: 'http://google.com',
9999
entity_2: 'h'})
100100

101-
def test_text_html(self):
101+
def test_text_html_simple(self):
102102
test_html_string = 'Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, <a href="http://github.com/">links</a> and <pre>pre</pre>.'
103103
text_html = self.test_message.text_html
104104
self.assertEquals(test_html_string, text_html)
105105

106-
def test_text_markdown(self):
106+
def test_text_markdown_simple(self):
107107
test_md_string = 'Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and ```pre```.'
108108
text_markdown = self.test_message.text_markdown
109109
self.assertEquals(test_md_string, text_markdown)
110110

111+
def test_text_html_emoji(self):
112+
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d ABC').decode('unicode-escape')
113+
expected = (b'\\U0001f469\\u200d\\U0001f469\\u200d <b>ABC</b>').decode('unicode-escape')
114+
bold_entity = telegram.MessageEntity(type=telegram.MessageEntity.BOLD, offset=7, length=3)
115+
message = telegram.Message(
116+
message_id=1, from_user=None, date=None, chat=None, text=text, entities=[bold_entity])
117+
self.assertEquals(expected, message.text_html)
118+
119+
def test_text_markdown_emoji(self):
120+
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d ABC').decode('unicode-escape')
121+
expected = (b'\\U0001f469\\u200d\\U0001f469\\u200d *ABC*').decode('unicode-escape')
122+
bold_entity = telegram.MessageEntity(type=telegram.MessageEntity.BOLD, offset=7, length=3)
123+
message = telegram.Message(
124+
message_id=1, from_user=None, date=None, chat=None, text=text, entities=[bold_entity])
125+
self.assertEquals(expected, message.text_markdown)
126+
127+
def test_parse_entities_url_emoji(self):
128+
url = b'http://github.com/?unicode=\\u2713\\U0001f469'.decode('unicode-escape')
129+
text = 'some url'
130+
link_entity = telegram.MessageEntity(type=telegram.MessageEntity.URL, offset=0, length=8, url=url)
131+
message = telegram.Message(
132+
message_id=1,
133+
from_user=None,
134+
date=None,
135+
chat=None,
136+
text=text,
137+
entities=[link_entity])
138+
self.assertDictEqual(message.parse_entities(), {link_entity: text})
139+
self.assertEqual(next(iter(message.parse_entities())).url, url)
140+
111141
@flaky(3, 1)
112142
def test_reply_text(self):
113143
"""Test for Message.reply_text"""

0 commit comments

Comments
 (0)
X Tutup