feat(HtmlLexer): allow "<" in text tokens

vicb · vicb · commit aecf68117a7d · 2015-12-08T02:39:17.000Z
fixes #5550
diff --git a/modules/angular2/src/compiler/html_lexer.ts b/modules/angular2/src/compiler/html_lexer.ts
@@ -6,6 +6,7 @@ import {
   CONST_EXPR,
   serializeEnum
 } from 'angular2/src/facade/lang';
+import {ListWrapper} from 'angular2/src/facade/collection';
 import {ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan} from './parse_util';
 import {getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES} from './html_tags';
 
@@ -161,7 +162,7 @@ class _HtmlTokenizer {
     }
     this._beginToken(HtmlTokenType.EOF);
     this._endToken([]);
-    return new HtmlTokenizeResult(this.tokens, this.errors);
+    return new HtmlTokenizeResult(mergeTextTokens(this.tokens), this.errors);
   }
 
   private _getLocation(): ParseLocation {
@@ -374,21 +375,37 @@ class _HtmlTokenizer {
   }
 
   private _consumeTagOpen(start: ParseLocation) {
-    this._attemptUntilFn(isNotWhitespace);
-    var nameStart = this.index;
-    this._consumeTagOpenStart(start);
-    var lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
-    this._attemptUntilFn(isNotWhitespace);
-    while (this.peek !== $SLASH && this.peek !== $GT) {
-      this._consumeAttributeName();
+    let savedPos = this._savePosition();
+    let lowercaseTagName;
+    try {
+      this._attemptUntilFn(isNotWhitespace);
+      var nameStart = this.index;
+      this._consumeTagOpenStart(start);
+      lowercaseTagName = this.inputLowercase.substring(nameStart, this.index);
       this._attemptUntilFn(isNotWhitespace);
-      if (this._attemptChar($EQ)) {
+      while (this.peek !== $SLASH && this.peek !== $GT) {
+        this._consumeAttributeName();
+        this._attemptUntilFn(isNotWhitespace);
+        if (this._attemptChar($EQ)) {
+          this._attemptUntilFn(isNotWhitespace);
+          this._consumeAttributeValue();
+        }
         this._attemptUntilFn(isNotWhitespace);
-        this._consumeAttributeValue();
       }
-      this._attemptUntilFn(isNotWhitespace);
+      this._consumeTagOpenEnd();
+    } catch (e) {
+      if (e instanceof ControlFlowError) {
+        // When the start tag is invalid, assume we want a "<"
+        this._restorePosition(savedPos);
+        // Back to back text tokens are merged at the end
+        this._beginToken(HtmlTokenType.TEXT, start);
+        this._endToken(['<']);
+        return;
+      }
+
+      throw e;
     }
-    this._consumeTagOpenEnd();
+
     var contentTokenType = getHtmlTagDefinition(lowercaseTagName).contentType;
     if (contentTokenType === HtmlTagContentType.RAW_TEXT) {
       this._consumeRawTextWithTagClose(lowercaseTagName, false);
@@ -470,13 +487,20 @@ class _HtmlTokenizer {
     this._endToken([this._processCarriageReturns(parts.join(''))]);
   }
 
-  private _savePosition(): number[] { return [this.peek, this.index, this.column, this.line]; }
+  private _savePosition(): number[] {
+    return [this.peek, this.index, this.column, this.line, this.tokens.length];
+  }
 
   private _restorePosition(position: number[]): void {
     this.peek = position[0];
     this.index = position[1];
     this.column = position[2];
     this.line = position[3];
+    let nbTokens = position[4];
+    if (nbTokens < this.tokens.length) {
+      // remove any extra tokens
+      this.tokens = ListWrapper.slice(this.tokens, 0, nbTokens);
+    }
   }
 }
 
@@ -516,3 +540,21 @@ function isAsciiLetter(code: number): boolean {
 function isAsciiHexDigit(code: number): boolean {
   return code >= $a && code <= $f || code >= $0 && code <= $9;
 }
+
+function mergeTextTokens(srcTokens: HtmlToken[]): HtmlToken[] {
+  let dstTokens = [];
+  let lastDstToken: HtmlToken;
+  for (let i = 0; i < srcTokens.length; i++) {
+    let token = srcTokens[i];
+    if (isPresent(lastDstToken) && lastDstToken.type == HtmlTokenType.TEXT &&
+        token.type == HtmlTokenType.TEXT) {
+      lastDstToken.parts[0] += token.parts[0];
+      lastDstToken.sourceSpan.end = token.sourceSpan.end;
+    } else {
+      lastDstToken = token;
+      dstTokens.push(lastDstToken);
+    }
+  }
+
+  return dstTokens;
+}
diff --git a/modules/angular2/test/compiler/html_lexer_spec.ts b/modules/angular2/test/compiler/html_lexer_spec.ts
@@ -192,15 +192,6 @@ export function main() {
             ]);
       });
 
-      it('should report missing name after <', () => {
-        expect(tokenizeAndHumanizeErrors('<'))
-            .toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:1']]);
-      });
-
-      it('should report missing >', () => {
-        expect(tokenizeAndHumanizeErrors('<name'))
-            .toEqual([[HtmlTokenType.TAG_OPEN_START, 'Unexpected character "EOF"', '0:5']]);
-      });
     });
 
     describe('attributes', () => {
@@ -335,20 +326,6 @@ export function main() {
             ]);
       });
 
-      it('should report missing value after =', () => {
-        expect(tokenizeAndHumanizeErrors('<name a='))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:8']]);
-      });
-
-      it('should report missing end quote for \'', () => {
-        expect(tokenizeAndHumanizeErrors('<name a=\''))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
-      });
-
-      it('should report missing end quote for "', () => {
-        expect(tokenizeAndHumanizeErrors('<name a="'))
-            .toEqual([[HtmlTokenType.ATTR_VALUE, 'Unexpected character "EOF"', '0:9']]);
-      });
     });
 
     describe('closing tags', () => {
@@ -448,6 +425,36 @@ export function main() {
         expect(tokenizeAndHumanizeSourceSpans('a'))
             .toEqual([[HtmlTokenType.TEXT, 'a'], [HtmlTokenType.EOF, '']]);
       });
+
+      it('should allow "<" in text nodes', () => {
+        expect(tokenizeAndHumanizeParts('{{ a < b ? c : d }}'))
+            .toEqual([[HtmlTokenType.TEXT, '{{ a < b ? c : d }}'], [HtmlTokenType.EOF]]);
+
+        expect(tokenizeAndHumanizeSourceSpans('<p>a<b</p>'))
+            .toEqual([
+              [HtmlTokenType.TAG_OPEN_START, '<p'],
+              [HtmlTokenType.TAG_OPEN_END, '>'],
+              [HtmlTokenType.TEXT, 'a<b'],
+              [HtmlTokenType.TAG_CLOSE, '</p>'],
+              [HtmlTokenType.EOF, ''],
+            ]);
+      });
+
+      // TODO(vicb): make the lexer aware of Angular expressions
+      // see https://github.com/angular/angular/issues/5679
+      it('should parse valid start tag in interpolation', () => {
+        expect(tokenizeAndHumanizeParts('{{ a <b && c > d }}'))
+            .toEqual([
+              [HtmlTokenType.TEXT, '{{ a '],
+              [HtmlTokenType.TAG_OPEN_START, null, 'b'],
+              [HtmlTokenType.ATTR_NAME, null, '&&'],
+              [HtmlTokenType.ATTR_NAME, null, 'c'],
+              [HtmlTokenType.TAG_OPEN_END],
+              [HtmlTokenType.TEXT, ' d }}'],
+              [HtmlTokenType.EOF]
+            ]);
+      });
+
     });
 
     describe('raw text', () => {