X Tutup
Skip to content

Commit 9850e68

Browse files
committed
fix(HtmlLexer): handle CR in input stream per HTML spec
fixes #5618 Closes #5629
1 parent daaa8ee commit 9850e68

File tree

2 files changed

+59
-20
lines changed

2 files changed

+59
-20
lines changed

modules/angular2/src/compiler/html_lexer.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ const $x = 120;
8383

8484
const $NBSP = 160;
8585

86+
var CRLF_REGEXP = /\r\n/g;
87+
var CR_REGEXP = /\r/g;
88+
8689
function unexpectedCharacterErrorMsg(charCode: number): string {
8790
var char = charCode === $EOF ? 'EOF' : StringWrapper.fromCharCode(charCode);
8891
return `Unexpected character "${char}"`;
@@ -119,6 +122,14 @@ class _HtmlTokenizer {
119122
this._advance();
120123
}
121124

125+
private _processCarriageReturns(content: string): string {
126+
// http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
127+
// In order to keep the original position in the source, we can not pre-process it.
128+
// Instead CRs are processed right before instantiating the tokens.
129+
content = StringWrapper.replaceAll(content, CRLF_REGEXP, '\r');
130+
return StringWrapper.replaceAll(content, CR_REGEXP, '\n');
131+
}
132+
122133
tokenize(): HtmlTokenizeResult {
123134
while (this.peek !== $EOF) {
124135
var start = this._getLocation();
@@ -315,7 +326,7 @@ class _HtmlTokenizer {
315326
parts.push(this._readChar(decodeEntities));
316327
}
317328
}
318-
return this._endToken([parts.join('')], tagCloseStart);
329+
return this._endToken([this._processCarriageReturns(parts.join(''))], tagCloseStart);
319330
}
320331

321332
private _consumeComment(start: ParseLocation) {
@@ -428,7 +439,7 @@ class _HtmlTokenizer {
428439
this._requireUntilFn(isNameEnd, 1);
429440
value = this.input.substring(valueStart, this.index);
430441
}
431-
this._endToken([value]);
442+
this._endToken([this._processCarriageReturns(value)]);
432443
}
433444

434445
private _consumeTagOpenEnd() {
@@ -456,7 +467,7 @@ class _HtmlTokenizer {
456467
while (!isTextEnd(this.peek)) {
457468
parts.push(this._readChar(true));
458469
}
459-
this._endToken([parts.join('')]);
470+
this._endToken([this._processCarriageReturns(parts.join(''))]);
460471
}
461472

462473
private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }

modules/angular2/test/compiler/html_lexer_spec.ts

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,38 @@ export function main() {
5353
[HtmlTokenType.EOF, '2:5']
5454
]);
5555
});
56+
57+
it('should work with CR and LF', () => {
58+
expect(tokenizeAndHumanizeLineColumn('<t\n>\r\na\r</t>'))
59+
.toEqual([
60+
[HtmlTokenType.TAG_OPEN_START, '0:0'],
61+
[HtmlTokenType.TAG_OPEN_END, '1:0'],
62+
[HtmlTokenType.TEXT, '1:1'],
63+
[HtmlTokenType.TAG_CLOSE, '2:1'],
64+
[HtmlTokenType.EOF, '2:5']
65+
]);
66+
});
5667
});
5768

5869
describe('comments', () => {
5970
it('should parse comments', () => {
60-
expect(tokenizeAndHumanizeParts('<!--test-->'))
71+
expect(tokenizeAndHumanizeParts('<!--t\ne\rs\r\nt-->'))
6172
.toEqual([
6273
[HtmlTokenType.COMMENT_START],
63-
[HtmlTokenType.RAW_TEXT, 'test'],
74+
[HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'],
6475
[HtmlTokenType.COMMENT_END],
6576
[HtmlTokenType.EOF]
6677
]);
6778
});
6879

69-
it('should store the locations', () => {expect(tokenizeAndHumanizeSourceSpans('<!--test-->'))
70-
.toEqual([
71-
[HtmlTokenType.COMMENT_START, '<!--'],
72-
[HtmlTokenType.RAW_TEXT, 'test'],
73-
[HtmlTokenType.COMMENT_END, '-->'],
74-
[HtmlTokenType.EOF, '']
75-
])});
80+
it('should store the locations',
81+
() => {expect(tokenizeAndHumanizeSourceSpans('<!--t\ne\rs\r\nt-->'))
82+
.toEqual([
83+
[HtmlTokenType.COMMENT_START, '<!--'],
84+
[HtmlTokenType.RAW_TEXT, 't\ne\rs\r\nt'],
85+
[HtmlTokenType.COMMENT_END, '-->'],
86+
[HtmlTokenType.EOF, '']
87+
])});
7688

7789
it('should report <!- without -', () => {
7890
expect(tokenizeAndHumanizeErrors('<!-a'))
@@ -104,20 +116,20 @@ export function main() {
104116

105117
describe('cdata', () => {
106118
it('should parse cdata', () => {
107-
expect(tokenizeAndHumanizeParts('<![cdata[test]]>'))
119+
expect(tokenizeAndHumanizeParts('<![cdata[t\ne\rs\r\nt]]>'))
108120
.toEqual([
109121
[HtmlTokenType.CDATA_START],
110-
[HtmlTokenType.RAW_TEXT, 'test'],
122+
[HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'],
111123
[HtmlTokenType.CDATA_END],
112124
[HtmlTokenType.EOF]
113125
]);
114126
});
115127

116128
it('should store the locations', () => {
117-
expect(tokenizeAndHumanizeSourceSpans('<![cdata[test]]>'))
129+
expect(tokenizeAndHumanizeSourceSpans('<![cdata[t\ne\rs\r\nt]]>'))
118130
.toEqual([
119131
[HtmlTokenType.CDATA_START, '<![cdata['],
120-
[HtmlTokenType.RAW_TEXT, 'test'],
132+
[HtmlTokenType.RAW_TEXT, 't\ne\rs\r\nt'],
121133
[HtmlTokenType.CDATA_END, ']]>'],
122134
[HtmlTokenType.EOF, '']
123135
]);
@@ -301,6 +313,17 @@ export function main() {
301313
]);
302314
});
303315

316+
it('should parse values with CR and LF', () => {
317+
expect(tokenizeAndHumanizeParts("<t a='t\ne\rs\r\nt'>"))
318+
.toEqual([
319+
[HtmlTokenType.TAG_OPEN_START, null, 't'],
320+
[HtmlTokenType.ATTR_NAME, null, 'a'],
321+
[HtmlTokenType.ATTR_VALUE, 't\ne\ns\nt'],
322+
[HtmlTokenType.TAG_OPEN_END],
323+
[HtmlTokenType.EOF]
324+
]);
325+
});
326+
304327
it('should store the locations', () => {
305328
expect(tokenizeAndHumanizeSourceSpans('<t a=b>'))
306329
.toEqual([
@@ -406,6 +429,11 @@ export function main() {
406429
.toEqual([[HtmlTokenType.TEXT, 'a'], [HtmlTokenType.EOF]]);
407430
});
408431

432+
it('should handle CR & LF', () => {
433+
expect(tokenizeAndHumanizeParts('t\ne\rs\r\nt'))
434+
.toEqual([[HtmlTokenType.TEXT, 't\ne\ns\nt'], [HtmlTokenType.EOF]]);
435+
});
436+
409437
it('should parse entities', () => {
410438
expect(tokenizeAndHumanizeParts('a&amp;b'))
411439
.toEqual([[HtmlTokenType.TEXT, 'a&b'], [HtmlTokenType.EOF]]);
@@ -424,11 +452,11 @@ export function main() {
424452

425453
describe('raw text', () => {
426454
it('should parse text', () => {
427-
expect(tokenizeAndHumanizeParts(`<script>a</script>`))
455+
expect(tokenizeAndHumanizeParts(`<script>t\ne\rs\r\nt</script>`))
428456
.toEqual([
429457
[HtmlTokenType.TAG_OPEN_START, null, 'script'],
430458
[HtmlTokenType.TAG_OPEN_END],
431-
[HtmlTokenType.RAW_TEXT, 'a'],
459+
[HtmlTokenType.RAW_TEXT, 't\ne\ns\nt'],
432460
[HtmlTokenType.TAG_CLOSE, null, 'script'],
433461
[HtmlTokenType.EOF]
434462
]);
@@ -482,11 +510,11 @@ export function main() {
482510

483511
describe('escapable raw text', () => {
484512
it('should parse text', () => {
485-
expect(tokenizeAndHumanizeParts(`<title>a</title>`))
513+
expect(tokenizeAndHumanizeParts(`<title>t\ne\rs\r\nt</title>`))
486514
.toEqual([
487515
[HtmlTokenType.TAG_OPEN_START, null, 'title'],
488516
[HtmlTokenType.TAG_OPEN_END],
489-
[HtmlTokenType.ESCAPABLE_RAW_TEXT, 'a'],
517+
[HtmlTokenType.ESCAPABLE_RAW_TEXT, 't\ne\ns\nt'],
490518
[HtmlTokenType.TAG_CLOSE, null, 'title'],
491519
[HtmlTokenType.EOF]
492520
]);

0 commit comments

Comments
 (0)
X Tutup