X Tutup
Skip to content

Commit 36a423f

Browse files
committed
feat(Compiler): case sensitive html parser
close #4417 Closes #5264
1 parent adb8756 commit 36a423f

File tree

13 files changed

+834
-381
lines changed

13 files changed

+834
-381
lines changed

modules/angular2/src/compiler/html_lexer.ts

Lines changed: 66 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import {
66
CONST_EXPR,
77
serializeEnum
88
} from 'angular2/src/facade/lang';
9-
import {BaseException} from 'angular2/src/facade/exceptions';
109
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
1110
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
1211

@@ -50,12 +49,14 @@ export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlToke
5049
const $EOF = 0;
5150
const $TAB = 9;
5251
const $LF = 10;
52+
const $FF = 12;
5353
const $CR = 13;
5454

5555
const $SPACE = 32;
5656

5757
const $BANG = 33;
5858
const $DQ = 34;
59+
const $HASH = 35;
5960
const $$ = 36;
6061
const $AMPERSAND = 38;
6162
const $SQ = 39;
@@ -76,7 +77,9 @@ const $Z = 90;
7677
const $LBRACKET = 91;
7778
const $RBRACKET = 93;
7879
const $a = 97;
80+
const $f = 102;
7981
const $z = 122;
82+
const $x = 120;
8083

8184
const $NBSP = 160;
8285

@@ -86,7 +89,7 @@ function unexpectedCharacterErrorMsg(charCode: number): string {
8689
}
8790

8891
function unknownEntityErrorMsg(entitySrc: string): string {
89-
return `Unknown entity "${entitySrc}"`;
92+
return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
9093
}
9194

9295
class ControlFlowError {
@@ -249,23 +252,50 @@ class _HtmlTokenizer {
249252

250253
private _readChar(decodeEntities: boolean): string {
251254
if (decodeEntities && this.peek === $AMPERSAND) {
252-
var start = this._getLocation();
253-
this._attemptUntilChar($SEMICOLON);
254-
this._advance();
255-
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
256-
var decodedEntity = decodeEntity(entitySrc);
257-
if (isPresent(decodedEntity)) {
258-
return decodedEntity;
259-
} else {
260-
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
261-
}
255+
return this._decodeEntity();
262256
} else {
263257
var index = this.index;
264258
this._advance();
265259
return this.input[index];
266260
}
267261
}
268262

263+
private _decodeEntity(): string {
264+
var start = this._getLocation();
265+
this._advance();
266+
if (this._attemptChar($HASH)) {
267+
let isHex = this._attemptChar($x);
268+
let numberStart = this._getLocation().offset;
269+
this._attemptUntilFn(isDigitEntityEnd);
270+
if (this.peek != $SEMICOLON) {
271+
throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getLocation());
272+
}
273+
this._advance();
274+
let strNum = this.input.substring(numberStart, this.index - 1);
275+
try {
276+
let charCode = NumberWrapper.parseInt(strNum, isHex ? 16 : 10);
277+
return StringWrapper.fromCharCode(charCode);
278+
} catch (e) {
279+
let entity = this.input.substring(start.offset + 1, this.index - 1);
280+
throw this._createError(unknownEntityErrorMsg(entity), start);
281+
}
282+
} else {
283+
let startPosition = this._savePosition();
284+
this._attemptUntilFn(isNamedEntityEnd);
285+
if (this.peek != $SEMICOLON) {
286+
this._restorePosition(startPosition);
287+
return '&';
288+
}
289+
this._advance();
290+
let name = this.input.substring(start.offset + 1, this.index - 1);
291+
let char = NAMED_ENTITIES[name];
292+
if (isBlank(char)) {
293+
throw this._createError(unknownEntityErrorMsg(name), start);
294+
}
295+
return char;
296+
}
297+
}
298+
269299
private _consumeRawText(decodeEntities: boolean, firstCharOfEnd: number,
270300
attemptEndRest: Function): HtmlToken {
271301
var tagCloseStart;
@@ -428,6 +458,15 @@ class _HtmlTokenizer {
428458
}
429459
this._endToken([parts.join('')]);
430460
}
461+
462+
private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
463+
464+
private _restorePosition(position: number[]): void {
465+
this.peek = position[0];
466+
this.index = position[1];
467+
this.column = position[2];
468+
this.line = position[3];
469+
}
431470
}
432471

433472
function isNotWhitespace(code: number): boolean {
@@ -440,39 +479,29 @@ function isWhitespace(code: number): boolean {
440479

441480
function isNameEnd(code: number): boolean {
442481
return isWhitespace(code) || code === $GT || code === $SLASH || code === $SQ || code === $DQ ||
443-
code === $EQ
482+
code === $EQ;
444483
}
445484

446485
function isPrefixEnd(code: number): boolean {
447486
return (code < $a || $z < code) && (code < $A || $Z < code) && (code < $0 || code > $9);
448487
}
449488

489+
function isDigitEntityEnd(code: number): boolean {
490+
return code == $SEMICOLON || code == $EOF || !isAsciiHexDigit(code);
491+
}
492+
493+
function isNamedEntityEnd(code: number): boolean {
494+
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
495+
}
496+
450497
function isTextEnd(code: number): boolean {
451498
return code === $LT || code === $EOF;
452499
}
453500

454-
function decodeEntity(entity: string): string {
455-
var i = 0;
456-
var isNumber = entity.length > i && entity[i] == '#';
457-
if (isNumber) i++;
458-
var isHex = entity.length > i && entity[i] == 'x';
459-
if (isHex) i++;
460-
var value = entity.substring(i);
461-
var result = null;
462-
if (isNumber) {
463-
var charCode;
464-
try {
465-
charCode = NumberWrapper.parseInt(value, isHex ? 16 : 10);
466-
} catch (e) {
467-
return null;
468-
}
469-
result = StringWrapper.fromCharCode(charCode);
470-
} else {
471-
result = NAMED_ENTITIES[value];
472-
}
473-
if (isPresent(result)) {
474-
return result;
475-
} else {
476-
return null;
477-
}
501+
function isAsciiLetter(code: number): boolean {
502+
return code >= $a && code <= $z;
503+
}
504+
505+
function isAsciiHexDigit(code: number): boolean {
506+
return code >= $a && code <= $f || code >= $0 && code <= $9;
478507
}

modules/angular2/src/compiler/html_parser.ts

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,22 @@ import {
99
serializeEnum,
1010
CONST_EXPR
1111
} from 'angular2/src/facade/lang';
12-
import {DOM} from 'angular2/src/core/dom/dom_adapter';
12+
1313
import {ListWrapper} from 'angular2/src/facade/collection';
1414

1515
import {HtmlAst, HtmlAttrAst, HtmlTextAst, HtmlElementAst} from './html_ast';
1616

17-
import {escapeDoubleQuoteString} from './util';
1817
import {Injectable} from 'angular2/src/core/di';
1918
import {HtmlToken, HtmlTokenType, tokenizeHtml} from './html_lexer';
2019
import {ParseError, ParseLocation, ParseSourceSpan} from './parse_util';
2120
import {HtmlTagDefinition, getHtmlTagDefinition} from './html_tags';
2221

23-
// TODO: remove this, just provide a plain error message!
24-
export enum HtmlTreeErrorType {
25-
UnexpectedClosingTag
26-
}
27-
28-
const HTML_ERROR_TYPE_MSGS = CONST_EXPR(['Unexpected closing tag']);
29-
30-
3122
export class HtmlTreeError extends ParseError {
32-
static create(type: HtmlTreeErrorType, elementName: string,
33-
location: ParseLocation): HtmlTreeError {
34-
return new HtmlTreeError(type, HTML_ERROR_TYPE_MSGS[serializeEnum(type)], elementName,
35-
location);
23+
static create(elementName: string, location: ParseLocation, msg: string): HtmlTreeError {
24+
return new HtmlTreeError(elementName, location, msg);
3625
}
3726

38-
constructor(public type: HtmlTreeErrorType, msg: string, public elementName: string,
39-
location: ParseLocation) {
27+
constructor(public elementName: string, location: ParseLocation, msg: string) {
4028
super(location, msg);
4129
}
4230
}
@@ -55,11 +43,8 @@ export class HtmlParser {
5543
}
5644
}
5745

58-
var NS_PREFIX_RE = /^@[^:]+/g;
59-
6046
class TreeBuilder {
6147
private index: number = -1;
62-
private length: number;
6348
private peek: HtmlToken;
6449

6550
private rootNodes: HtmlAst[] = [];
@@ -129,7 +114,7 @@ class TreeBuilder {
129114
while (this.peek.type === HtmlTokenType.ATTR_NAME) {
130115
attrs.push(this._consumeAttr(this._advance()));
131116
}
132-
var fullName = elementName(prefix, name, this._getParentElement());
117+
var fullName = getElementFullName(prefix, name, this._getParentElement());
133118
var voidElement = false;
134119
// Note: There could have been a tokenizer error
135120
// so that we don't get a token for the end tag...
@@ -150,15 +135,12 @@ class TreeBuilder {
150135
}
151136

152137
private _pushElement(el: HtmlElementAst) {
153-
var stackIndex = this.elementStack.length - 1;
154-
while (stackIndex >= 0) {
155-
var parentEl = this.elementStack[stackIndex];
156-
if (!getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
157-
break;
138+
if (this.elementStack.length > 0) {
139+
var parentEl = ListWrapper.last(this.elementStack);
140+
if (getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
141+
this.elementStack.pop();
158142
}
159-
stackIndex--;
160143
}
161-
this.elementStack.splice(stackIndex, this.elementStack.length - 1 - stackIndex);
162144

163145
var tagDef = getHtmlTagDefinition(el.name);
164146
var parentEl = this._getParentElement();
@@ -175,35 +157,29 @@ class TreeBuilder {
175157

176158
private _consumeEndTag(endTagToken: HtmlToken) {
177159
var fullName =
178-
elementName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
160+
getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
179161
if (!this._popElement(fullName)) {
180-
this.errors.push(HtmlTreeError.create(HtmlTreeErrorType.UnexpectedClosingTag, fullName,
181-
endTagToken.sourceSpan.start));
162+
this.errors.push(HtmlTreeError.create(fullName, endTagToken.sourceSpan.start,
163+
`Unexpected closing tag "${endTagToken.parts[1]}"`));
182164
}
183165
}
184166

185167
private _popElement(fullName: string): boolean {
186-
var stackIndex = this.elementStack.length - 1;
187-
var hasError = false;
188-
while (stackIndex >= 0) {
168+
for (let stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
189169
var el = this.elementStack[stackIndex];
190-
if (el.name == fullName) {
191-
break;
170+
if (el.name.toLowerCase() == fullName.toLowerCase()) {
171+
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
172+
return true;
192173
}
193174
if (!getHtmlTagDefinition(el.name).closedByParent) {
194-
hasError = true;
195-
break;
175+
return false;
196176
}
197-
stackIndex--;
198-
}
199-
if (!hasError) {
200-
this.elementStack.splice(stackIndex, this.elementStack.length - stackIndex);
201177
}
202-
return !hasError;
178+
return false;
203179
}
204180

205181
private _consumeAttr(attrName: HtmlToken): HtmlAttrAst {
206-
var fullName = elementName(attrName.parts[0], attrName.parts[1], null);
182+
var fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
207183
var end = attrName.sourceSpan.end;
208184
var value = '';
209185
if (this.peek.type === HtmlTokenType.ATTR_VALUE) {
@@ -228,20 +204,24 @@ class TreeBuilder {
228204
}
229205
}
230206

231-
function elementName(prefix: string, localName: string, parentElement: HtmlElementAst) {
207+
function mergeNsAndName(prefix: string, localName: string): string {
208+
return isPresent(prefix) ? `@${prefix}:${localName}` : localName;
209+
}
210+
211+
function getElementFullName(prefix: string, localName: string,
212+
parentElement: HtmlElementAst): string {
232213
if (isBlank(prefix)) {
233214
prefix = getHtmlTagDefinition(localName).implicitNamespacePrefix;
215+
if (isBlank(prefix) && isPresent(parentElement)) {
216+
prefix = namespacePrefix(parentElement.name);
217+
}
234218
}
235-
if (isBlank(prefix) && isPresent(parentElement)) {
236-
prefix = namespacePrefix(parentElement.name);
237-
}
238-
if (isPresent(prefix)) {
239-
return `@${prefix}:${localName}`;
240-
} else {
241-
return localName;
242-
}
219+
220+
return mergeNsAndName(prefix, localName);
243221
}
244222

223+
var NS_PREFIX_RE = /^@([^:]+)/g;
224+
245225
function namespacePrefix(elementName: string): string {
246226
var match = RegExpWrapper.firstMatch(NS_PREFIX_RE, elementName);
247227
return isBlank(match) ? null : match[1];

0 commit comments

Comments
 (0)
X Tutup