X Tutup
Skip to content

Commit a8edc1e

Browse files
committed
feat(Compiler): case sensitive html parser
close #4417 Closes #5264
1 parent 86aeb8b commit a8edc1e

File tree

13 files changed

+737
-344
lines changed

13 files changed

+737
-344
lines changed

modules/angular2/src/compiler/html_lexer.ts

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import {
66
CONST_EXPR,
77
serializeEnum
88
} from 'angular2/src/facade/lang';
9-
import {BaseException} from 'angular2/src/facade/exceptions';
109
import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
1110
import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
1211

@@ -50,6 +49,7 @@ export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlToke
5049
const $EOF = 0;
5150
const $TAB = 9;
5251
const $LF = 10;
52+
const $FF = 12;
5353
const $CR = 13;
5454

5555
const $SPACE = 32;
@@ -247,17 +247,22 @@ class _HtmlTokenizer {
247247
}
248248
}
249249

250-
private _readChar(decodeEntities: boolean): string {
250+
private _readChar(decodeEntities: boolean, extraNotCharRef: number = null): string {
251251
if (decodeEntities && this.peek === $AMPERSAND) {
252252
var start = this._getLocation();
253-
this._attemptUntilChar($SEMICOLON);
254253
this._advance();
255-
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
256-
var decodedEntity = decodeEntity(entitySrc);
257-
if (isPresent(decodedEntity)) {
258-
return decodedEntity;
254+
if (isCharRefStart(this.peek, extraNotCharRef)) {
255+
this._attemptUntilChar($SEMICOLON);
256+
this._advance();
257+
var entitySrc = this.input.substring(start.offset + 1, this.index - 1);
258+
var decodedEntity = decodeEntity(entitySrc);
259+
if (isPresent(decodedEntity)) {
260+
return decodedEntity;
261+
} else {
262+
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
263+
}
259264
} else {
260-
throw this._createError(unknownEntityErrorMsg(entitySrc), start);
265+
return '&';
261266
}
262267
} else {
263268
var index = this.index;
@@ -389,7 +394,7 @@ class _HtmlTokenizer {
389394
this._advance();
390395
var parts = [];
391396
while (this.peek !== quoteChar) {
392-
parts.push(this._readChar(true));
397+
parts.push(this._readChar(true, quoteChar));
393398
}
394399
value = parts.join('');
395400
this._advance();
@@ -440,7 +445,13 @@ function isWhitespace(code: number): boolean {
440445

441446
function isNameEnd(code: number): boolean {
442447
return isWhitespace(code) || code === $GT || code === $SLASH || code === $SQ || code === $DQ ||
443-
code === $EQ
448+
code === $EQ;
449+
}
450+
451+
// http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
452+
function isCharRefStart(code: number, extraNotCharRef: number): boolean {
453+
return code != $TAB && code != $LF && code != $FF && code != $SPACE && code != $LT &&
454+
code != $AMPERSAND && code != $EOF && code !== extraNotCharRef;
444455
}
445456

446457
function isPrefixEnd(code: number): boolean {

modules/angular2/src/compiler/html_parser.ts

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,22 @@ import {
99
serializeEnum,
1010
CONST_EXPR
1111
} from 'angular2/src/facade/lang';
12-
import {DOM} from 'angular2/src/core/dom/dom_adapter';
12+
1313
import {ListWrapper} from 'angular2/src/facade/collection';
1414

1515
import {HtmlAst, HtmlAttrAst, HtmlTextAst, HtmlElementAst} from './html_ast';
1616

17-
import {escapeDoubleQuoteString} from './util';
1817
import {Injectable} from 'angular2/src/core/di';
1918
import {HtmlToken, HtmlTokenType, tokenizeHtml} from './html_lexer';
2019
import {ParseError, ParseLocation, ParseSourceSpan} from './parse_util';
2120
import {HtmlTagDefinition, getHtmlTagDefinition} from './html_tags';
2221

23-
// TODO: remove this, just provide a plain error message!
24-
export enum HtmlTreeErrorType {
25-
UnexpectedClosingTag
26-
}
27-
28-
const HTML_ERROR_TYPE_MSGS = CONST_EXPR(['Unexpected closing tag']);
29-
30-
3122
export class HtmlTreeError extends ParseError {
32-
static create(type: HtmlTreeErrorType, elementName: string,
33-
location: ParseLocation): HtmlTreeError {
34-
return new HtmlTreeError(type, HTML_ERROR_TYPE_MSGS[serializeEnum(type)], elementName,
35-
location);
23+
static create(elementName: string, location: ParseLocation, msg: string): HtmlTreeError {
24+
return new HtmlTreeError(elementName, location, msg);
3625
}
3726

38-
constructor(public type: HtmlTreeErrorType, msg: string, public elementName: string,
39-
location: ParseLocation) {
27+
constructor(public elementName: string, location: ParseLocation, msg: string) {
4028
super(location, msg);
4129
}
4230
}
@@ -55,11 +43,8 @@ export class HtmlParser {
5543
}
5644
}
5745

58-
var NS_PREFIX_RE = /^@[^:]+/g;
59-
6046
class TreeBuilder {
6147
private index: number = -1;
62-
private length: number;
6348
private peek: HtmlToken;
6449

6550
private rootNodes: HtmlAst[] = [];
@@ -129,7 +114,7 @@ class TreeBuilder {
129114
while (this.peek.type === HtmlTokenType.ATTR_NAME) {
130115
attrs.push(this._consumeAttr(this._advance()));
131116
}
132-
var fullName = elementName(prefix, name, this._getParentElement());
117+
var fullName = getElementFullName(prefix, name, this._getParentElement());
133118
var voidElement = false;
134119
// Note: There could have been a tokenizer error
135120
// so that we don't get a token for the end tag...
@@ -150,15 +135,13 @@ class TreeBuilder {
150135
}
151136

152137
private _pushElement(el: HtmlElementAst) {
153-
var stackIndex = this.elementStack.length - 1;
154-
while (stackIndex >= 0) {
138+
for (var stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
155139
var parentEl = this.elementStack[stackIndex];
156-
if (!getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
140+
if (getHtmlTagDefinition(parentEl.name).isClosedByChild(el.name)) {
141+
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
157142
break;
158143
}
159-
stackIndex--;
160144
}
161-
this.elementStack.splice(stackIndex, this.elementStack.length - 1 - stackIndex);
162145

163146
var tagDef = getHtmlTagDefinition(el.name);
164147
var parentEl = this._getParentElement();
@@ -175,35 +158,29 @@ class TreeBuilder {
175158

176159
private _consumeEndTag(endTagToken: HtmlToken) {
177160
var fullName =
178-
elementName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
161+
getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
179162
if (!this._popElement(fullName)) {
180-
this.errors.push(HtmlTreeError.create(HtmlTreeErrorType.UnexpectedClosingTag, fullName,
181-
endTagToken.sourceSpan.start));
163+
this.errors.push(HtmlTreeError.create(fullName, endTagToken.sourceSpan.start,
164+
`Unexpected closing tag "${endTagToken.parts[1]}"`));
182165
}
183166
}
184167

185168
private _popElement(fullName: string): boolean {
186-
var stackIndex = this.elementStack.length - 1;
187-
var hasError = false;
188-
while (stackIndex >= 0) {
169+
for (let stackIndex = this.elementStack.length - 1; stackIndex >= 0; stackIndex--) {
189170
var el = this.elementStack[stackIndex];
190-
if (el.name == fullName) {
191-
break;
171+
if (el.name.toLowerCase() == fullName.toLowerCase()) {
172+
ListWrapper.splice(this.elementStack, stackIndex, this.elementStack.length - stackIndex);
173+
return true;
192174
}
193175
if (!getHtmlTagDefinition(el.name).closedByParent) {
194-
hasError = true;
195-
break;
176+
return false;
196177
}
197-
stackIndex--;
198178
}
199-
if (!hasError) {
200-
this.elementStack.splice(stackIndex, this.elementStack.length - stackIndex);
201-
}
202-
return !hasError;
179+
return false;
203180
}
204181

205182
private _consumeAttr(attrName: HtmlToken): HtmlAttrAst {
206-
var fullName = elementName(attrName.parts[0], attrName.parts[1], null);
183+
var fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
207184
var end = attrName.sourceSpan.end;
208185
var value = '';
209186
if (this.peek.type === HtmlTokenType.ATTR_VALUE) {
@@ -228,20 +205,24 @@ class TreeBuilder {
228205
}
229206
}
230207

231-
function elementName(prefix: string, localName: string, parentElement: HtmlElementAst) {
208+
function mergeNsAndName(prefix: string, localName: string): string {
209+
return isPresent(prefix) ? `@${prefix}:${localName}` : localName;
210+
}
211+
212+
function getElementFullName(prefix: string, localName: string,
213+
parentElement: HtmlElementAst): string {
232214
if (isBlank(prefix)) {
233215
prefix = getHtmlTagDefinition(localName).implicitNamespacePrefix;
216+
if (isBlank(prefix) && isPresent(parentElement)) {
217+
prefix = namespacePrefix(parentElement.name);
218+
}
234219
}
235-
if (isBlank(prefix) && isPresent(parentElement)) {
236-
prefix = namespacePrefix(parentElement.name);
237-
}
238-
if (isPresent(prefix)) {
239-
return `@${prefix}:${localName}`;
240-
} else {
241-
return localName;
242-
}
220+
221+
return mergeNsAndName(prefix, localName);
243222
}
244223

224+
var NS_PREFIX_RE = /^@([^:]+)/g;
225+
245226
function namespacePrefix(elementName: string): string {
246227
var match = RegExpWrapper.firstMatch(NS_PREFIX_RE, elementName);
247228
return isBlank(match) ? null : match[1];

modules/angular2/src/compiler/html_tags.ts

Lines changed: 93 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,61 @@
11
import {isPresent, isBlank, normalizeBool, CONST_EXPR} from 'angular2/src/facade/lang';
22

3-
// TODO: fill this!
4-
export const NAMED_ENTITIES: {[key: string]: string} = <any>CONST_EXPR({'amp': '&'});
3+
// see http://www.w3.org/TR/html51/syntax.html#named-character-references
4+
// see https://html.spec.whatwg.org/multipage/entities.json
5+
// This list is not exhaustive to keep the compiler footprint low.
6+
// The `&#123;` / `&#x1ab;` syntax should be used when the named character reference does not exist.
7+
export const NAMED_ENTITIES = CONST_EXPR({
8+
'lt': '<',
9+
'gt': '>',
10+
'nbsp': '\u00A0',
11+
'amp': '&',
12+
'Aacute': '\u00C1',
13+
'Acirc': '\u00C2',
14+
'Agrave': '\u00C0',
15+
'Atilde': '\u00C3',
16+
'Auml': '\u00C4',
17+
'Ccedil': '\u00C7',
18+
'Eacute': '\u00C9',
19+
'Ecirc': '\u00CA',
20+
'Egrave': '\u00C8',
21+
'Euml': '\u00CB',
22+
'Iacute': '\u00CD',
23+
'Icirc': '\u00CE',
24+
'Igrave': '\u00CC',
25+
'Iuml': '\u00CF',
26+
'Oacute': '\u00D3',
27+
'Ocirc': '\u00D4',
28+
'Ograve': '\u00D2',
29+
'Otilde': '\u00D5',
30+
'Ouml': '\u00D6',
31+
'Uacute': '\u00DA',
32+
'Ucirc': '\u00DB',
33+
'Ugrave': '\u00D9',
34+
'Uuml': '\u00DC',
35+
'aacute': '\u00E1',
36+
'acirc': '\u00E2',
37+
'agrave': '\u00E0',
38+
'atilde': '\u00E3',
39+
'auml': '\u00E4',
40+
'ccedil': '\u00E7',
41+
'eacute': '\u00E9',
42+
'ecirc': '\u00EA',
43+
'egrave': '\u00E8',
44+
'euml': '\u00EB',
45+
'iacute': '\u00ED',
46+
'icirc': '\u00EE',
47+
'igrave': '\u00EC',
48+
'iuml': '\u00EF',
49+
'oacute': '\u00F3',
50+
'ocirc': '\u00F4',
51+
'ograve': '\u00F2',
52+
'otilde': '\u00F5',
53+
'ouml': '\u00F6',
54+
'uacute': '\u00FA',
55+
'ucirc': '\u00FB',
56+
'ugrave': '\u00F9',
57+
'uuml': '\u00FC',
58+
});
559

660
export enum HtmlTagContentType {
761
RAW_TEXT,
@@ -11,54 +65,72 @@ export enum HtmlTagContentType {
1165

1266
export class HtmlTagDefinition {
1367
private closedByChildren: {[key: string]: boolean} = {};
14-
public closedByParent: boolean;
68+
public closedByParent: boolean = false;
1569
public requiredParent: string;
1670
public implicitNamespacePrefix: string;
1771
public contentType: HtmlTagContentType;
1872

1973
constructor({closedByChildren, requiredParent, implicitNamespacePrefix, contentType}: {
20-
closedByChildren?: string[],
74+
closedByChildren?: string,
2175
requiredParent?: string,
2276
implicitNamespacePrefix?: string,
2377
contentType?: HtmlTagContentType
2478
} = {}) {
25-
if (isPresent(closedByChildren)) {
26-
closedByChildren.forEach(tagName => this.closedByChildren[tagName] = true);
79+
if (isPresent(closedByChildren) && closedByChildren.length > 0) {
80+
closedByChildren.split(',').forEach(tagName => this.closedByChildren[tagName.trim()] = true);
81+
this.closedByParent = true;
2782
}
28-
this.closedByParent = isPresent(closedByChildren) && closedByChildren.length > 0;
2983
this.requiredParent = requiredParent;
3084
this.implicitNamespacePrefix = implicitNamespacePrefix;
3185
this.contentType = isPresent(contentType) ? contentType : HtmlTagContentType.PARSABLE_DATA;
3286
}
3387

34-
requireExtraParent(currentParent: string) {
88+
requireExtraParent(currentParent: string): boolean {
3589
return isPresent(this.requiredParent) &&
36-
(isBlank(currentParent) || this.requiredParent != currentParent.toLocaleLowerCase());
90+
(isBlank(currentParent) || this.requiredParent != currentParent.toLowerCase());
3791
}
3892

39-
isClosedByChild(name: string) {
93+
isClosedByChild(name: string): boolean {
4094
return normalizeBool(this.closedByChildren['*']) ||
4195
normalizeBool(this.closedByChildren[name.toLowerCase()]);
4296
}
4397
}
4498

45-
// TODO: Fill this table using
46-
// https://github.com/greim/html-tokenizer/blob/master/parser.js
47-
// and http://www.w3.org/TR/html51/syntax.html#optional-tags
99+
// see http://www.w3.org/TR/html51/syntax.html#optional-tags
100+
// This implementation does not fully conform to the HTML5 spec.
48101
var TAG_DEFINITIONS: {[key: string]: HtmlTagDefinition} = {
49-
'link': new HtmlTagDefinition({closedByChildren: ['*']}),
50-
'ng-content': new HtmlTagDefinition({closedByChildren: ['*']}),
51-
'img': new HtmlTagDefinition({closedByChildren: ['*']}),
52-
'input': new HtmlTagDefinition({closedByChildren: ['*']}),
53-
'p': new HtmlTagDefinition({closedByChildren: ['p']}),
54-
'tr': new HtmlTagDefinition({closedByChildren: ['tr'], requiredParent: 'tbody'}),
55-
'col': new HtmlTagDefinition({closedByChildren: ['col'], requiredParent: 'colgroup'}),
102+
'link': new HtmlTagDefinition({closedByChildren: '*'}),
103+
'ng-content': new HtmlTagDefinition({closedByChildren: '*'}),
104+
'img': new HtmlTagDefinition({closedByChildren: '*'}),
105+
'input': new HtmlTagDefinition({closedByChildren: '*'}),
106+
'hr': new HtmlTagDefinition({closedByChildren: '*'}),
107+
'br': new HtmlTagDefinition({closedByChildren: '*'}),
108+
'wbr': new HtmlTagDefinition({closedByChildren: '*'}),
109+
'p': new HtmlTagDefinition({
110+
closedByChildren:
111+
'address,article,aside,blockquote,div,dl,fieldset,footer,form,h1,h2,h3,h4,h5,h6,header,hgroup,hr,main,nav,ol,p,pre,section,table,ul'
112+
}),
113+
'thead': new HtmlTagDefinition({closedByChildren: 'tbody,tfoot'}),
114+
'tbody': new HtmlTagDefinition({closedByChildren: 'tbody,tfoot'}),
115+
'tfoot': new HtmlTagDefinition({closedByChildren: 'tbody'}),
116+
'tr': new HtmlTagDefinition({closedByChildren: 'tr', requiredParent: 'tbody'}),
117+
'td': new HtmlTagDefinition({closedByChildren: 'td,th'}),
118+
'th': new HtmlTagDefinition({closedByChildren: 'td,th'}),
119+
'col': new HtmlTagDefinition({closedByChildren: 'col', requiredParent: 'colgroup'}),
56120
'svg': new HtmlTagDefinition({implicitNamespacePrefix: 'svg'}),
57121
'math': new HtmlTagDefinition({implicitNamespacePrefix: 'math'}),
122+
'li': new HtmlTagDefinition({closedByChildren: 'li'}),
123+
'dt': new HtmlTagDefinition({closedByChildren: 'dt,dd'}),
124+
'dd': new HtmlTagDefinition({closedByChildren: 'dt,dd'}),
125+
'rb': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
126+
'rt': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
127+
'rtc': new HtmlTagDefinition({closedByChildren: 'rb,rtc,rp'}),
128+
'rp': new HtmlTagDefinition({closedByChildren: 'rb,rt,rtc,rp'}),
129+
'optgroup': new HtmlTagDefinition({closedByChildren: 'optgroup'}),
58130
'style': new HtmlTagDefinition({contentType: HtmlTagContentType.RAW_TEXT}),
59131
'script': new HtmlTagDefinition({contentType: HtmlTagContentType.RAW_TEXT}),
60132
'title': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT}),
61-
'textarea': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT})
133+
'textarea': new HtmlTagDefinition({contentType: HtmlTagContentType.ESCAPABLE_RAW_TEXT}),
62134
};
63135

64136
var DEFAULT_TAG_DEFINITION = new HtmlTagDefinition();

0 commit comments

Comments
 (0)
X Tutup