@@ -73,10 +73,13 @@ const $LT = 60;
7373const $EQ = 61 ;
7474const $GT = 62 ;
7575const $QUESTION = 63 ;
76- const $A = 65 ;
77- const $Z = 90 ;
7876const $LBRACKET = 91 ;
7977const $RBRACKET = 93 ;
78+ const $A = 65 ;
79+ const $F = 70 ;
80+ const $X = 88 ;
81+ const $Z = 90 ;
82+
8083const $a = 97 ;
8184const $f = 102 ;
8285const $z = 122 ;
@@ -102,7 +105,6 @@ class ControlFlowError {
102105// See http://www.w3.org/TR/html51/syntax.html#writing
103106class _HtmlTokenizer {
104107 private input : string ;
105- private inputLowercase : string ;
106108 private length : number ;
107109 // Note: this is always lowercase!
108110 private peek : number = - 1 ;
@@ -117,7 +119,6 @@ class _HtmlTokenizer {
117119
118120 constructor ( private file : ParseSourceFile ) {
119121 this . input = file . content ;
120- this . inputLowercase = file . content . toLowerCase ( ) ;
121122 this . length = file . content . length ;
122123 this . _advance ( ) ;
123124 }
@@ -133,16 +134,16 @@ class _HtmlTokenizer {
133134 while ( this . peek !== $EOF ) {
134135 var start = this . _getLocation ( ) ;
135136 try {
136- if ( this . _attemptChar ( $LT ) ) {
137- if ( this . _attemptChar ( $BANG ) ) {
138- if ( this . _attemptChar ( $LBRACKET ) ) {
137+ if ( this . _attemptCharCode ( $LT ) ) {
138+ if ( this . _attemptCharCode ( $BANG ) ) {
139+ if ( this . _attemptCharCode ( $LBRACKET ) ) {
139140 this . _consumeCdata ( start ) ;
140- } else if ( this . _attemptChar ( $MINUS ) ) {
141+ } else if ( this . _attemptCharCode ( $MINUS ) ) {
141142 this . _consumeComment ( start ) ;
142143 } else {
143144 this . _consumeDocType ( start ) ;
144145 }
145- } else if ( this . _attemptChar ( $SLASH ) ) {
146+ } else if ( this . _attemptCharCode ( $SLASH ) ) {
146147 this . _consumeTagClose ( start ) ;
147148 } else {
148149 this . _consumeTagOpen ( start ) ;
@@ -205,50 +206,66 @@ class _HtmlTokenizer {
205206 this . column ++ ;
206207 }
207208 this . index ++ ;
208- this . peek = this . index >= this . length ? $EOF : StringWrapper . charCodeAt ( this . inputLowercase ,
209- this . index ) ;
209+ this . peek = this . index >= this . length ? $EOF : StringWrapper . charCodeAt ( this . input , this . index ) ;
210210 }
211211
212- private _attemptChar ( charCode : number ) : boolean {
212+ private _attemptCharCode ( charCode : number ) : boolean {
213213 if ( this . peek === charCode ) {
214214 this . _advance ( ) ;
215215 return true ;
216216 }
217217 return false ;
218218 }
219219
220- private _requireChar ( charCode : number ) {
220+ private _attemptCharCodeCaseInsensitive ( charCode : number ) : boolean {
221+ if ( compareCharCodeCaseInsensitive ( this . peek , charCode ) ) {
222+ this . _advance ( ) ;
223+ return true ;
224+ }
225+ return false ;
226+ }
227+
228+ private _requireCharCode ( charCode : number ) {
221229 var location = this . _getLocation ( ) ;
222- if ( ! this . _attemptChar ( charCode ) ) {
230+ if ( ! this . _attemptCharCode ( charCode ) ) {
223231 throw this . _createError ( unexpectedCharacterErrorMsg ( this . peek ) , location ) ;
224232 }
225233 }
226234
227- private _attemptChars ( chars : string ) : boolean {
235+ private _attemptStr ( chars : string ) : boolean {
228236 for ( var i = 0 ; i < chars . length ; i ++ ) {
229- if ( ! this . _attemptChar ( StringWrapper . charCodeAt ( chars , i ) ) ) {
237+ if ( ! this . _attemptCharCode ( StringWrapper . charCodeAt ( chars , i ) ) ) {
230238 return false ;
231239 }
232240 }
233241 return true ;
234242 }
235243
236- private _requireChars ( chars : string ) {
244+ private _attemptStrCaseInsensitive ( chars : string ) : boolean {
245+ for ( var i = 0 ; i < chars . length ; i ++ ) {
246+ if ( ! this . _attemptCharCodeCaseInsensitive ( StringWrapper . charCodeAt ( chars , i ) ) ) {
247+ return false ;
248+ }
249+ }
250+ return true ;
251+ }
252+
253+ private _requireStr ( chars : string ) {
237254 var location = this . _getLocation ( ) ;
238- if ( ! this . _attemptChars ( chars ) ) {
255+ if ( ! this . _attemptStr ( chars ) ) {
239256 throw this . _createError ( unexpectedCharacterErrorMsg ( this . peek ) , location ) ;
240257 }
241258 }
242259
243- private _attemptUntilFn ( predicate : Function ) {
260+ private _attemptCharCodeUntilFn ( predicate : Function ) {
244261 while ( ! predicate ( this . peek ) ) {
245262 this . _advance ( ) ;
246263 }
247264 }
248265
249- private _requireUntilFn ( predicate : Function , len : number ) {
266+ private _requireCharCodeUntilFn ( predicate : Function , len : number ) {
250267 var start = this . _getLocation ( ) ;
251- this . _attemptUntilFn ( predicate ) ;
268+ this . _attemptCharCodeUntilFn ( predicate ) ;
252269 if ( this . index - start . offset < len ) {
253270 throw this . _createError ( unexpectedCharacterErrorMsg ( this . peek ) , start ) ;
254271 }
@@ -273,10 +290,10 @@ class _HtmlTokenizer {
273290 private _decodeEntity ( ) : string {
274291 var start = this . _getLocation ( ) ;
275292 this . _advance ( ) ;
276- if ( this . _attemptChar ( $HASH ) ) {
277- let isHex = this . _attemptChar ( $x ) ;
293+ if ( this . _attemptCharCode ( $HASH ) ) {
294+ let isHex = this . _attemptCharCode ( $x ) || this . _attemptCharCode ( $X ) ;
278295 let numberStart = this . _getLocation ( ) . offset ;
279- this . _attemptUntilFn ( isDigitEntityEnd ) ;
296+ this . _attemptCharCodeUntilFn ( isDigitEntityEnd ) ;
280297 if ( this . peek != $SEMICOLON ) {
281298 throw this . _createError ( unexpectedCharacterErrorMsg ( this . peek ) , this . _getLocation ( ) ) ;
282299 }
@@ -291,7 +308,7 @@ class _HtmlTokenizer {
291308 }
292309 } else {
293310 let startPosition = this . _savePosition ( ) ;
294- this . _attemptUntilFn ( isNamedEntityEnd ) ;
311+ this . _attemptCharCodeUntilFn ( isNamedEntityEnd ) ;
295312 if ( this . peek != $SEMICOLON ) {
296313 this . _restorePosition ( startPosition ) ;
297314 return '&' ;
@@ -315,7 +332,7 @@ class _HtmlTokenizer {
315332 var parts = [ ] ;
316333 while ( true ) {
317334 tagCloseStart = this . _getLocation ( ) ;
318- if ( this . _attemptChar ( firstCharOfEnd ) && attemptEndRest ( ) ) {
335+ if ( this . _attemptCharCode ( firstCharOfEnd ) && attemptEndRest ( ) ) {
319336 break ;
320337 }
321338 if ( this . index > tagCloseStart . offset ) {
@@ -330,18 +347,18 @@ class _HtmlTokenizer {
330347
331348 private _consumeComment ( start : ParseLocation ) {
332349 this . _beginToken ( HtmlTokenType . COMMENT_START , start ) ;
333- this . _requireChar ( $MINUS ) ;
350+ this . _requireCharCode ( $MINUS ) ;
334351 this . _endToken ( [ ] ) ;
335- var textToken = this . _consumeRawText ( false , $MINUS , ( ) => this . _attemptChars ( '->' ) ) ;
352+ var textToken = this . _consumeRawText ( false , $MINUS , ( ) => this . _attemptStr ( '->' ) ) ;
336353 this . _beginToken ( HtmlTokenType . COMMENT_END , textToken . sourceSpan . end ) ;
337354 this . _endToken ( [ ] ) ;
338355 }
339356
340357 private _consumeCdata ( start : ParseLocation ) {
341358 this . _beginToken ( HtmlTokenType . CDATA_START , start ) ;
342- this . _requireChars ( 'cdata [') ;
359+ this . _requireStr ( 'CDATA [') ;
343360 this . _endToken ( [ ] ) ;
344- var textToken = this . _consumeRawText ( false , $RBRACKET , ( ) => this . _attemptChars ( ']>' ) ) ;
361+ var textToken = this . _consumeRawText ( false , $RBRACKET , ( ) => this . _attemptStr ( ']>' ) ) ;
345362 this . _beginToken ( HtmlTokenType . CDATA_END , textToken . sourceSpan . end ) ;
346363 this . _endToken ( [ ] ) ;
347364 }
@@ -367,7 +384,7 @@ class _HtmlTokenizer {
367384 } else {
368385 nameStart = nameOrPrefixStart ;
369386 }
370- this . _requireUntilFn ( isNameEnd , this . index === nameStart ? 1 : 0 ) ;
387+ this . _requireCharCodeUntilFn ( isNameEnd , this . index === nameStart ? 1 : 0 ) ;
371388 var name = this . input . substring ( nameStart , this . index ) ;
372389 return [ prefix , name ] ;
373390 }
@@ -381,16 +398,16 @@ class _HtmlTokenizer {
381398 }
382399 var nameStart = this . index ;
383400 this . _consumeTagOpenStart ( start ) ;
384- lowercaseTagName = this . inputLowercase . substring ( nameStart , this . index ) ;
385- this . _attemptUntilFn ( isNotWhitespace ) ;
401+ lowercaseTagName = this . input . substring ( nameStart , this . index ) . toLowerCase ( ) ;
402+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
386403 while ( this . peek !== $SLASH && this . peek !== $GT ) {
387404 this . _consumeAttributeName ( ) ;
388- this . _attemptUntilFn ( isNotWhitespace ) ;
389- if ( this . _attemptChar ( $EQ ) ) {
390- this . _attemptUntilFn ( isNotWhitespace ) ;
405+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
406+ if ( this . _attemptCharCode ( $EQ ) ) {
407+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
391408 this . _consumeAttributeValue ( ) ;
392409 }
393- this . _attemptUntilFn ( isNotWhitespace ) ;
410+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
394411 }
395412 this . _consumeTagOpenEnd ( ) ;
396413 } catch ( e ) {
@@ -416,11 +433,11 @@ class _HtmlTokenizer {
416433
417434 private _consumeRawTextWithTagClose ( lowercaseTagName : string , decodeEntities : boolean ) {
418435 var textToken = this . _consumeRawText ( decodeEntities , $LT , ( ) => {
419- if ( ! this . _attemptChar ( $SLASH ) ) return false ;
420- this . _attemptUntilFn ( isNotWhitespace ) ;
421- if ( ! this . _attemptChars ( lowercaseTagName ) ) return false ;
422- this . _attemptUntilFn ( isNotWhitespace ) ;
423- if ( ! this . _attemptChar ( $GT ) ) return false ;
436+ if ( ! this . _attemptCharCode ( $SLASH ) ) return false ;
437+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
438+ if ( ! this . _attemptStrCaseInsensitive ( lowercaseTagName ) ) return false ;
439+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
440+ if ( ! this . _attemptCharCode ( $GT ) ) return false ;
424441 return true ;
425442 } ) ;
426443 this . _beginToken ( HtmlTokenType . TAG_CLOSE , textToken . sourceSpan . end ) ;
@@ -453,27 +470,27 @@ class _HtmlTokenizer {
453470 this . _advance ( ) ;
454471 } else {
455472 var valueStart = this . index ;
456- this . _requireUntilFn ( isNameEnd , 1 ) ;
473+ this . _requireCharCodeUntilFn ( isNameEnd , 1 ) ;
457474 value = this . input . substring ( valueStart , this . index ) ;
458475 }
459476 this . _endToken ( [ this . _processCarriageReturns ( value ) ] ) ;
460477 }
461478
462479 private _consumeTagOpenEnd ( ) {
463- var tokenType =
464- this . _attemptChar ( $SLASH ) ? HtmlTokenType . TAG_OPEN_END_VOID : HtmlTokenType . TAG_OPEN_END ;
480+ var tokenType = this . _attemptCharCode ( $SLASH ) ? HtmlTokenType . TAG_OPEN_END_VOID :
481+ HtmlTokenType . TAG_OPEN_END ;
465482 this . _beginToken ( tokenType ) ;
466- this . _requireChar ( $GT ) ;
483+ this . _requireCharCode ( $GT ) ;
467484 this . _endToken ( [ ] ) ;
468485 }
469486
470487 private _consumeTagClose ( start : ParseLocation ) {
471488 this . _beginToken ( HtmlTokenType . TAG_CLOSE , start ) ;
472- this . _attemptUntilFn ( isNotWhitespace ) ;
489+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
473490 var prefixAndName ;
474491 prefixAndName = this . _consumePrefixAndName ( ) ;
475- this . _attemptUntilFn ( isNotWhitespace ) ;
476- this . _requireChar ( $GT ) ;
492+ this . _attemptCharCodeUntilFn ( isNotWhitespace ) ;
493+ this . _requireCharCode ( $GT ) ;
477494 this . _endToken ( prefixAndName ) ;
478495 }
479496
@@ -534,11 +551,19 @@ function isTextEnd(code: number): boolean {
534551}
535552
536553function isAsciiLetter ( code : number ) : boolean {
537- return code >= $a && code <= $z ;
554+ return code >= $a && code <= $z || code >= $A && code <= $Z ;
538555}
539556
540557function isAsciiHexDigit ( code : number ) : boolean {
541- return code >= $a && code <= $f || code >= $0 && code <= $9 ;
558+ return code >= $a && code <= $f || code >= $A && code <= $F || code >= $0 && code <= $9 ;
559+ }
560+
561+ function compareCharCodeCaseInsensitive ( code1 : number , code2 : number ) : boolean {
562+ return toUpperCaseCharCode ( code1 ) == toUpperCaseCharCode ( code2 ) ;
563+ }
564+
565+ function toUpperCaseCharCode ( code : number ) : number {
566+ return code >= $a && code <= $z ? code - $a + $A : code ;
542567}
543568
544569function mergeTextTokens ( srcTokens : HtmlToken [ ] ) : HtmlToken [ ] {
0 commit comments