EVOLUTION-MANAGER

Edit File: Token.h

#ifndef SOURCETOOLS_TOKENIZATION_TOKEN_H
#define SOURCETOOLS_TOKENIZATION_TOKEN_H

#include <cstring>
#include <cstdio>

#include <vector>
#include <string>
#include <map>
#include <sstream>

#include <sourcetools/core/core.h>
#include <sourcetools/tokenization/Registration.h>
#include <sourcetools/collection/Position.h>
#include <sourcetools/cursor/TextCursor.h>

namespace sourcetools {
namespace tokens {

class Token
{
private:
  typedef cursors::TextCursor TextCursor;
  typedef collections::Position Position;

public:

Token()
    : begin_(NULL),
      end_(NULL),
      offset_(0),
      type_(INVALID)
  {
  }

explicit Token(TokenType type)
    : begin_(NULL),
      end_(NULL),
      offset_(0),
      type_(type)
  {
  }

Token(const Position& position)
    : begin_(NULL),
      end_(NULL),
      offset_(0),
      position_(position),
      type_(INVALID)
  {
  }

Token(const TextCursor& cursor, TokenType type, std::size_t length)
    : begin_(cursor.begin() + cursor.offset()),
      end_(cursor.begin() + cursor.offset() + length),
      offset_(cursor.offset()),
      position_(cursor.position()),
      type_(type)
  {
  }

const char* begin() const { return begin_; }
  const char* end() const { return end_; }
  std::size_t offset() const { return offset_; }
  std::size_t size() const { return end_ - begin_; }

std::string contents() const
  {
    return std::string(begin_, end_);
  }

bool contentsEqual(const char* string)
  {
    return std::strcmp(begin_, string);
  }

bool contentsEqual(const std::string& string) const
  {
    if (string.size() != size())
      return false;

return std::memcmp(begin_, string.c_str(), size()) == 0;
  }

const Position& position() const { return position_; }
  std::size_t row() const { return position_.row; }
  std::size_t column() const { return position_.column; }

TokenType type() const { return type_; }
  bool isType(TokenType type) const { return type_ == type; }

private:
  const char* begin_;
  const char* end_;
  std::size_t offset_;

Position position_;
  TokenType type_;
};

inline bool isBracket(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_MASK);
}

inline bool isLeftBracket(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_LEFT_MASK);
}

inline bool isRightBracket(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_BRACKET_RIGHT_MASK);
}

inline bool isComplement(TokenType lhs, TokenType rhs)
{
  static const TokenType mask =
    SOURCE_TOOLS_BRACKET_BIT | SOURCE_TOOLS_BRACKET_LEFT_BIT | SOURCE_TOOLS_BRACKET_RIGHT_BIT;

if (SOURCE_TOOLS_CHECK_MASK((lhs | rhs), mask))
    return SOURCE_TOOLS_LOWER_BITS(lhs, 4) == SOURCE_TOOLS_LOWER_BITS(rhs, 4);

return false;
}

inline TokenType complement(TokenType type)
{
  static const TokenType mask =
    SOURCE_TOOLS_BRACKET_LEFT_BIT | SOURCE_TOOLS_BRACKET_RIGHT_BIT;

return type ^ mask;
}

inline bool isKeyword(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_KEYWORD_MASK);
}

inline bool isControlFlowKeyword(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_KEYWORD_CONTROL_FLOW_MASK);
}

inline bool isOperator(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_OPERATOR_MASK);
}

inline bool isUnaryOperator(const Token& token)
{
  return SOURCE_TOOLS_CHECK_MASK(token.type(), SOURCE_TOOLS_OPERATOR_UNARY_MASK);
}

inline bool isNonUnaryOperator(const Token& token)
{
  return isOperator(token) && !isUnaryOperator(token);
}

inline bool isComparisonOperator(const Token& token)
{
  switch (token.type())
  {
  case OPERATOR_AND_SCALAR:
  case OPERATOR_AND_VECTOR:
  case OPERATOR_OR_SCALAR:
  case OPERATOR_OR_VECTOR:
  case OPERATOR_EQUAL:
  case OPERATOR_NOT_EQUAL:
  case OPERATOR_LESS:
  case OPERATOR_LESS_OR_EQUAL:
  case OPERATOR_GREATER:
  case OPERATOR_GREATER_OR_EQUAL:
    return true;
  default:
    return false;
  }
}

inline bool isWhitespace(const Token& token)
{
  return token.type() == WHITESPACE;
}

inline bool isComment(const Token& token)
{
  return token.type() == COMMENT;
}

inline bool isSymbol(const Token& token)
{
  return token.type() == SYMBOL;
}

inline bool isEnd(const Token& token)
{
  return token.type() == END;
}

inline bool isString(const Token& token)
{
  return token.type() == STRING;
}

inline bool isSymbolic(const Token& token)
{
  static const TokenType mask = SYMBOL | NUMBER | STRING;
  return (token.type() & mask) != 0;
}

inline bool isNumeric(const Token& token)
{
  return (token.type() & NUMBER) != 0;
}

inline bool isCallOperator(const Token& token)
{
  return token.type() == LPAREN ||
         token.type() == LBRACKET ||
         token.type() == LDBRACKET;
}

inline bool isAssignmentOperator(const Token& token)
{
  switch (token.type())
  {
  case OPERATOR_ASSIGN_LEFT:
  case OPERATOR_ASSIGN_LEFT_COLON:
  case OPERATOR_ASSIGN_LEFT_EQUALS:
  case OPERATOR_ASSIGN_LEFT_PARENT:
  case OPERATOR_ASSIGN_RIGHT:
  case OPERATOR_ASSIGN_RIGHT_PARENT:
    return true;
  default:
    return false;
  }
}

namespace detail {

inline bool isHexDigit(char c)
{
  if (c >= '0' && c <= '9')
    return true;
  else if (c >= 'a' && c <= 'f')
    return true;
  else if (c >= 'A' && c <= 'F')
    return true;
  return false;
}

inline int hexValue(char c)
{
  if (c >= '0' && c <= '9')
    return c - '0';
  else if (c >= 'a' && c <= 'f')
    return c - 'a' + 10;
  else if (c >= 'A' && c <= 'F')
    return c - 'A' + 10;

return 0;
}

// Parses an octal escape sequence, e.g. '\012'.
inline bool parseOctal(const char*& it, char*& output)
{
  // Check for opening escape
  if (*it != '\\')
    return false;

// Check for number following
  char lookahead = *(it + 1);
  if (lookahead < '0' || lookahead > '7')
    return false;
  ++it;

// Begin parsing. Consume up to three numbers.
  unsigned char result = 0;
  const char* end = it + 3;
  for (; it != end; ++it)
  {
    char ch = *it;
    if ('0' <= ch && ch <= '7')
      result = 8 * result + ch - '0';
    else
      break;
  }

// Assign result, and return.
  *output++ = result;
  return true;
}

// Parse a hex escape sequence, e.g. '\xFF'.
inline bool parseHex(const char*& it, char*& output)
{
  // Check for opening escape.
  if (*it != '\\')
    return false;

if (*(it + 1) != 'x')
    return false;

if (!isHexDigit(*(it + 2)))
    return false;

// Begin parsing.
  it += 2;
  unsigned char value = 0;
  const char* end = it + 2;
  for (; it != end; ++it)
  {
    int result = hexValue(*it);
    if (result == 0)
      break;
    value = 16 * value + result;
  }

*output++ = value;
  return true;
}

// Parse a unicode escape sequence.
inline bool parseUnicode(const char*& it, char*& output)
{
  if (*it != '\\')
    return false;

char lookahead = *(it + 1);
  int size;
  if (lookahead == 'u')
    size = 4;
  else if (lookahead == 'U')
    size = 8;
  else
    return false;

// Clone the input iterator (only set it on success)
  const char* clone = it;
  clone += 2;

// Check for e.g. '\u{...}'
  //                   ^
  bool delimited = *clone == '{';
  clone += delimited;

// Check for a hex digit.
  if (!isHexDigit(*clone))
    return false;

// Begin parsing hex digits
  wchar_t value = 0;
  const char* end = clone + size;
  for (; clone != end; ++clone)
  {
    if (!isHexDigit(*clone))
      break;

int hex = hexValue(*clone);
    value = 16 * value + hex;
  }

// Eat a closing '}' if we had a starting '{'.
  if (delimited)
  {
    if (*clone != '}')
      return false;
    ++clone;
  }

std::mbstate_t state;
  std::memset(&state, 0, sizeof(state));
  std::size_t bytes = std::wcrtomb(output, value, &state);
  if (bytes == static_cast<std::size_t>(-1))
    return false;

// Update iterator state
  it = clone;
  output += bytes;
  return true;
}

} // namespace detail

inline std::string stringValue(const char* begin, const char* end)
{
  if (begin == end)
    return std::string();

std::size_t n = end - begin;
  scoped_array<char> buffer(new char[n + 1]);

const char* it = begin;
  char* output = buffer;

while (it < end)
  {
    if (*it == '\\')
    {
      if (detail::parseOctal(it, output) ||
          detail::parseHex(it, output) ||
          detail::parseUnicode(it, output))
      {
        continue;
      }

// Handle the rest
      ++it;
      switch (*it)
      {
      case 'a':  *output++ = '\a'; break;
      case 'b':  *output++ = '\b'; break;
      case 'f':  *output++ = '\f'; break;
      case 'n':  *output++ = '\n'; break;
      case 'r':  *output++ = '\r'; break;
      case 't':  *output++ = '\t'; break;
      case 'v':  *output++ = '\v'; break;
      case '\\': *output++ = '\\'; break;
      default:   *output++ = *it;  break;
      }
      ++it;
    }
    else
    {
      *output++ = *it++;
    }
  }

// Ensure null termination, just in case
  *output++ = '\0';

// Construct the result string and return
  std::string result(buffer, output - buffer);
  return result;
}

inline std::string stringValue(const Token& token)
{
  switch (token.type())
  {
  case STRING:
    return stringValue(token.begin() + 1, token.end() - 1);
  case SYMBOL:
    if (*token.begin() == '`')
      return stringValue(token.begin() + 1, token.end() - 1);
  default:
    return stringValue(token.begin(), token.end());
  }
}

} // namespace tokens

inline std::string toString(tokens::TokenType type)
{
  using namespace tokens;

if (type == INVALID)    return "invalid";
  else if (type == END)        return "end";
  else if (type == EMPTY)      return "empty";
  else if (type == MISSING)    return "missing";
  else if (type == SEMI)       return "semi";
  else if (type == COMMA)      return "comma";
  else if (type == SYMBOL)     return "symbol";
  else if (type == COMMENT)    return "comment";
  else if (type == WHITESPACE) return "whitespace";
  else if (type == STRING)     return "string";
  else if (type == NUMBER)     return "number";

else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_BRACKET_MASK))
    return "bracket";
  else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_KEYWORD_MASK))
    return "keyword";
  else if (SOURCE_TOOLS_CHECK_MASK(type, SOURCE_TOOLS_OPERATOR_MASK))
    return "operator";

return "unknown";
}

inline std::string toString(const tokens::Token& token)
{
  std::string contents;
  if (token.isType(tokens::END))
    contents = "<END>";
  else if (token.isType(tokens::EMPTY))
    contents = "<empty>";
  else if (token.isType(tokens::MISSING))
    contents = "<missing>";
  else
    contents = token.contents();

static const int N = 1024;
  if (contents.size() > N / 2)
    contents = contents.substr(0, N / 2);
  char buff[N];
  std::sprintf(buff,
               "[%4lu:%4lu]: %s",
               static_cast<unsigned long>(token.row()),
               static_cast<unsigned long>(token.column()),
               contents.c_str());
  return buff;
}

inline std::ostream& operator<<(std::ostream& os, const tokens::Token& token)
{
  return os << toString(token);
}

inline std::ostream& operator<<(std::ostream& os, const std::vector<tokens::Token>& tokens)
{
  for (std::vector<tokens::Token>::const_iterator it = tokens.begin();
       it != tokens.end();
       ++it)
  {
    os << *it << std::endl;
  }

return os;
}

} // namespace sourcetools

#endif /* SOURCETOOLS_TOKENIZATION_TOKEN_H */