// lexer2.h see license.txt for copyright and terms of use // 2nd phase lexical analyzer; see lexer2.txt #ifndef __LEXER2_H #define __LEXER2_H #include "lexer1.h" // Lexer1 #include "srcloc.h" // SourceLoc (r) #include "strtable.h" // StringRef, StringTable #include "useract.h" // SemanticValue #include "lexerint.h" // LexerInterface class CCLang; // cc_lang.h // this enumeration defines the terminal symbols that the parser // deals with enum Lexer2TokenType { // I've avoided collapsing these onto fewer lines because // it makes systematic modification (search & replace) // more difficult // end of file L2_EOF=0, // non-keyword name L2_NAME, // classified name (for e.g. cdecl2) L2_TYPE_NAME, L2_VARIABLE_NAME, // literals L2_INT_LITERAL, L2_FLOAT_LITERAL, L2_STRING_LITERAL, L2_CHAR_LITERAL, // dsw: user-defined qualifiers L2_UDEF_QUAL, // keywords L2_ASM, L2_AUTO, L2_BREAK, L2_BOOL, L2_CASE, L2_CATCH, L2_CDECL, L2_CHAR, L2_CLASS, L2_CONST, L2_CONST_CAST, L2_CONTINUE, L2_DEFAULT, L2_DELETE, L2_DO, L2_DOUBLE, L2_DYNAMIC_CAST, L2_ELSE, L2_ENUM, L2_EXPLICIT, L2_EXPORT, L2_EXTERN, L2_FALSE, L2_FLOAT, L2_FOR, L2_FRIEND, L2_GOTO, L2_IF, L2_INLINE, L2_INT, L2_LONG, L2_MUTABLE, L2_NAMESPACE, L2_NEW, L2_OPERATOR, L2_PASCAL, L2_PRIVATE, L2_PROTECTED, L2_PUBLIC, L2_REGISTER, L2_REINTERPRET_CAST, L2_RETURN, L2_SHORT, L2_SIGNED, L2_SIZEOF, L2_STATIC, L2_STATIC_CAST, L2_STRUCT, L2_SWITCH, L2_TEMPLATE, L2_THIS, L2_THROW, L2_TRUE, L2_TRY, L2_TYPEDEF, L2_TYPEID, L2_TYPENAME, L2_UNION, L2_UNSIGNED, L2_USING, L2_VIRTUAL, L2_VOID, L2_VOLATILE, L2_WCHAR_T, L2_WHILE, // operators L2_LPAREN, L2_RPAREN, L2_LBRACKET, L2_RBRACKET, L2_ARROW, L2_COLONCOLON, L2_DOT, L2_BANG, L2_TILDE, L2_PLUS, L2_MINUS, L2_PLUSPLUS, L2_MINUSMINUS, L2_AND, L2_STAR, L2_DOTSTAR, L2_ARROWSTAR, L2_SLASH, L2_PERCENT, L2_LEFTSHIFT, L2_RIGHTSHIFT, L2_LESSTHAN, L2_LESSEQ, L2_GREATERTHAN, L2_GREATEREQ, L2_EQUALEQUAL, L2_NOTEQUAL, L2_XOR, L2_OR, L2_ANDAND, L2_OROR, L2_QUESTION, L2_COLON, L2_EQUAL, L2_STAREQUAL, L2_SLASHEQUAL, L2_PERCENTEQUAL, L2_PLUSEQUAL, L2_MINUSEQUAL, L2_ANDEQUAL, L2_XOREQUAL, L2_OREQUAL, L2_LEFTSHIFTEQUAL, L2_RIGHTSHIFTEQUAL, L2_COMMA, L2_ELLIPSIS, L2_SEMICOLON, L2_LBRACE, L2_RBRACE, // GNU extensions L2___ATTRIBUTE__, L2___FUNCTION__, L2___LABEL__, L2___PRETTY_FUNCTION__, L2___TYPEOF__, // my extensions L2_OWNER, // dummy terminals used for precedence games L2_PREFER_REDUCE, L2_PREFER_SHIFT, // theorem prover extensions L2_THMPRV_ASSERT, L2_THMPRV_ASSUME, L2_THMPRV_INVARIANT, L2_IMPLIES, L2_THMPRV_PRE, L2_THMPRV_POST, L2_THMPRV_LET, L2_THMPRV_ATTR, L2_THMPRV_FORALL, L2_THMPRV_EXISTS, L2_THMPRV_PURE_ASSERT, L2_THMPRV_BIND, L2_THMPRV_DECL, L2_THMPRV_PREDICATE, L2_NUM_TYPES }; // yield name as above from the int value char const *l2Tok2String(Lexer2TokenType type); // represent a unit of input to the parser class Lexer2Token { public: // kind of token Lexer2TokenType type; // semantic value; 'sval' is essentially an owner until the parser // reads it, and then it's a serf; so in fact it's treated as a serf // throughout, with corresponding leak potential union { int intValue; // for L2_INT_LITERALs // this is an owner pointer.. I'll fix this when I overhaul L2 float *floatValue; // for L2_FLOAT_LITERALs char charValue; // for L2_CHAR_LITERALs StringRef strValue; // for L2_NAMEs and L2_STRING_LITERALs; refers to Lexer2::idTable SemanticValue sval; // union with above means we can extract from this }; // TODO: handle strings with embedded nulls // where token appears, or where macro reference which produced it appears SourceLoc loc; // macro definition that produced this token, or NULL Lexer1Token *sourceMacro; // (serf) public: Lexer2Token(Lexer2TokenType type, SourceLoc loc); ~Lexer2Token(); // debugging void print() const; string toString(bool asSexp=false) const; string toStringType(bool asSexp, Lexer2TokenType type) const; string unparseString() const; // return the source text that generated this token }; // lexing state class Lexer2 : public LexerInterface { private: // locally-created string table, if we're not given one explicitly StringTable *myIdTable; public: // language options CCLang ⟨ // storage of all the identifiers we encounter StringTable &idTable; // output token stream ObjList tokens; // for appending new tokens ObjListMutator tokensMut; // for reading the token stream ObjListIter currentToken; private: // copy from currentToken to LexerInterface fields void copyFields(); // shared piece of ctor void init(); public: Lexer2(CCLang &lang); // table is created locally Lexer2(CCLang &lang, StringTable &externalTable); // table given externally ~Lexer2(); SourceLoc startLoc() const; void addToken(Lexer2Token *tok) { tokensMut.append(tok); } void addEOFToken() { addToken(new Lexer2Token(L2_EOF, SL_UNKNOWN)); } // reset the 'currentToken' so the parser can begin reading tokens void beginReading(); // get next token static void nextToken(Lexer2 *ths); // LexerInterface functions virtual NextTokenFunc getTokenFunc() const; virtual string tokenDesc() const; virtual string tokenKindDesc(int kind) const; }; // interface to 2nd phase lexical analysis // (will change; for now I'm only going to process single files) void lexer2_lex(Lexer2 &dest, Lexer1 const &src, char const *fname); // parser's interface to lexer2 (experimental) extern Lexer2Token const *yylval; // semantic value for returned token, or NULL for L2_EOF extern "C" { Lexer2TokenType lexer2_gettoken(); } #endif // __LEXER2_H