EVOLUTION-MANAGER

Edit File: lexer.js

/*
 * lexer.js
 *
 * Copyright (C) 2009-13 by RStudio, Inc.
 *
 * This program is licensed to you under the terms of version 3 of the
 * GNU Affero General Public License. This program is distributed WITHOUT
 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
 *
 */
var assert = require('assert');
var util = require('util');
var _ = require('underscore');

/**
 * Token type constants
 */
var TT = exports.TT = {
  WORD: 'TT_WORD',
  OPENBRACE: 'TT_OPENBRACE',
  CLOSEBRACE: 'TT_CLOSEBRACE',
  TERM: 'TT_TERM',
  WS: 'TT_WS',
  COMMENT: 'TT_COMMENT',
  EOD: 'TT_EOD'
};

/**
 * Character classification constants
 */
var c_ = 1;
var C_ALPHA = c_++;
var C_DIGIT = c_++;
var C_OPENBRACE = c_++;    // {
var C_CLOSEBRACE = c_++;   // }
var C_SEMICOLON = c_++;    // ;
var C_HASH = c_++;         // #
var C_SQUOTE = c_++;       // '
var C_DQUOTE = c_++;       // "
var C_BACKSLASH = c_++;
var C_EOL = c_++;
var C_WS = c_++;           // whitespace
var C_CONTROL = c_++;
var C_OTHER = c_ + 100;
var C_EOD = c_ + 101;
delete c_;

function Token(type, content, position) {
  this.type = type;
  this.content = content;
  this.position = position;
}

(function() {
}).call(Token.prototype);

exports.Lexer = Lexer;
/**
 * @param {String} data The config data.
 * @param {String} [pathHint] The path to the config data's file (for error
 *   logging/debugging purposes).
 */
function Lexer(data, pathHint) {
  this.$data = data.replace(/\r/, ''); // Strip carriage return
  this.$pathHint = pathHint;

// Pointer into this.$data that indicates where the next character is.
  // This value must never be changed except by $nextChar(), which knows
  // how to keep $line and $col in sync.
  this.$pos = 0;
  // 1-based row and column
  this.$line = 1;
  this.$col = 1;
}

(function() {
  /**
   * Return the next token, or null if done.
   *
   * Will throw if a control character or unterminated quoted string is
   * encountered. The exception will have a `position` property that is an
   * object with line and col properties (1-based).
   */
  this.nextToken = function() {
    // This is the position of the next character, and thus the starting
    // position of the new token
    var tokenStart = new Position(
        this.$line, this.$col, this.$pos, this.$pathHint);

// String value indicating the logical token value--may differ from the
    // source characters (for example quoting/escaping are performed)
    var content;
    // One of the TT.XXX constants
    var type = 0;

// General approach: We can tell by peeking at the first character what
    // kind of token we're going to match. Then call $matchXXX methods that
    // know how to consume those tokens.
    try {
      var c = this.$peekChar();
      switch (this.$classify(c)) {
        case C_ALPHA:
        case C_DIGIT:
        case C_OTHER:
        case C_BACKSLASH:
          type = TT.WORD;
          content = this.$matchWord();
          break;
        case C_SEMICOLON:
          type = TT.TERM;
          content = this.$nextChar();
          break;
        case C_OPENBRACE:
          type = TT.OPENBRACE;
          content = this.$nextChar();
          break;
        case C_CLOSEBRACE:
          type = TT.CLOSEBRACE;
          content = this.$nextChar();
          break;
        case C_EOL:
          type = TT.WS;
          content = this.$nextChar();
          break;
        case C_WS:
          type = TT.WS;
          content = this.$matchWhitespace();
          break;
        case C_HASH:
          type = TT.COMMENT;
          content = this.$matchComment();
          break;
        case C_SQUOTE:
          type = TT.WORD;
          content = this.$matchQuoted();
          break;
        case C_DQUOTE:
          type = TT.WORD;
          content = this.$matchQuoted();
          break;
        case C_CONTROL:
          throw new Error('Invalid character detected');
        case C_EOD:
          type = TT.EOD;
          content = '';
          break;
      }

assert(type);

return new Token(type, content, tokenStart);

} catch (ex) {
      ex.position = tokenStart;
      throw ex;
    }
  };

this.$matchWord = function() {
    var result = "";
    while (true) {
      switch (this.$classify(this.$peekChar())) {
        case C_ALPHA:
        case C_DIGIT:
        case C_OTHER:
        case C_BACKSLASH:
          result += this.$nextChar();
          break;
        default:  // Note: Includes C_EOD
          return result;
      }
    }
  };

this.$matchWhitespace = function() {
    // Eat one or more whitespace characters
    return this.$consumeRegex(/[ \t\r]+/g);
  };

this.$matchComment = function() {
    // Discard the leading #
    var hash = this.$nextChar();
    assert.equal(hash, '#');
    // Eat # to end of line
    return this.$consumeRegex(/[^\n]*/g);
  };

this.$matchQuoted = function() {
    var value = '';
    var nextChar;
    var quot = this.$nextChar();
    assert(quot === '"' || quot === "'", '$matchQuoted called incorrectly');
    while (true) {
      // Eat all characters that aren't special with respect to quoted strings;
      // that includes everything but single/double quotes and backslash
      value += this.$consumeRegex(/[^'"\\]*/g);
      switch (this.$peekChar()) {
        case '"':
        case "'":
          if (this.$peekChar() !== quot) {
            // False alarm, this is not the same kind of quote character that
            // was used to start this quoted string
            value += this.$nextChar();
          } else {
            // All done--discard the close quote and return!
            this.$nextChar();
            return value;
          }
          break;
        case '\\':
          this.$nextChar(); // consume backslash
          if (this.$eod())
            throw new Error('Closing ' + quot + ' character was not found');
          else
            value += this.$nextChar();
          break;
        case null:
          throw new Error('Closing ' + quot + ' character was not found');
      }
    }
  };

this.$consumeRegex = function(re) {
    re.lastIndex = this.$pos;
    var m = re.exec(this.$data);
    assert(m);
    assert.equal(m.index, this.$pos);
    this.$advanceBy(m[0].length);
    return m[0];
  }

/**
   * Get the next char and advance the cursor.
   */
  this.$nextChar = function() {
    if (this.$eod())
      return null;
    var c = this.$data.charAt(this.$pos++);
    if (c === '\n') {
      this.$line++;
      this.$col = 1;
    } else {
      this.$col++;
    }
    return c;
  };

/**
   * Skip over n characters.
   *
   * This uses a slow implementation of calling $nextChar() n times rather than
   * just `this.$pos += n` because $line and $col need to be kept in sync with
   * $pos. There are faster ways than this of course, but would take more code
   * than seems worth it.
   */
  this.$advanceBy = function(n) {
    assert(n >= 0);

var c;
    for (var i = 0; i < n; i++) {
      c = this.$nextChar();
      assert.notEqual(c, null);
    }
  };

/**
   * Get the next character but don't advance the cursor.
   */
  this.$peekChar = function() {
    if (this.$eod())
      return null;
    return this.$data.charAt(this.$pos);
  };

// Return true if we're done
  this.$eod = function() {
    return this.$pos < 0 || this.$pos >= this.$data.length;
  };

}).call(Lexer.prototype);