// gramlex.h see license.txt for copyright and terms of use // GrammarLexer: a c++ lexer class for use with Flex's generated c++ scanner // this lexer class is used both for parsing both AST and grammar descriptions; // they differ in their .lex description, but their lexing state is the same #ifndef __GRAMLEX_H #define __GRAMLEX_H // This included file is part of the Flex distribution. It is // installed in /usr/include on my Linux machine. By including it, we // get the declaration of the yyFlexLexer class. Note that the file // that flex generates, gramlex.yy.cc, also #includes this file. // Perhaps also worth mentioning: I'm developing this with flex 2.5.4. // // update: This approach was too problematic. I've taken to distributing // FlexLexer.h myself. #include "sm_flexlexer.h" // yyFlexLexer #include // istream // token code definitions #define TOK_EOF 0 // better name #define TOK_INCLUDE 1 // not seen by parser // other includes #include "str.h" // string #include "objlist.h" // ObjList #include "srcloc.h" // SourceLoc #include "embedded.h" // EmbeddedLang #include "strtable.h" // StringTable, StringRef // this class just holds the lexer state so it is properly encapsulated // (and therefore, among other things, re-entrant) class GrammarLexer : public yyFlexLexer, public ReportError { public: // types enum Constants { lexBufferSize = 4096, // size of new lex buffers }; // return true if the given token code is one of those representing // embedded text typedef bool (*isEmbedTok)(int tokCode); // error reporter that uses fileState instead of tokenStartLoc class AltReportError : public ReportError { GrammarLexer &lexer; public: AltReportError(GrammarLexer &L) : lexer(L) {} virtual void reportError(rostring msg); virtual void reportWarning(rostring msg); }; friend class AltReportError; public: // data // exposed so a user-provided 'embedded' can use it AltReportError altReporter; private: // data // state of a file we were or are lexing struct FileState { SourceLoc loc; // location in the file istream *source; // (owner?) source stream yy_buffer_state *bufstate; // (owner?) flex's internal buffer state public: FileState(rostring filename, istream *source); ~FileState(); FileState(FileState const &obj); FileState& operator= (FileState const &obj); }; FileState fileState; // state for file we're lexing now ObjList fileStack; // stack of files we will return to SourceLoc tokenStartLoc; // location of start of current token // support for embedded code char embedStart; // if nonzero, punctuation that triggers // embedded processing char embedFinish; // which character ends the embedded section int embedMode; // TOK_FUNDECL_BODY or TOK_FUN_BODY EmbeddedLang *embedded; // (owner) the processor isEmbedTok embedTokTest; // for printing diagnostics bool allowInit; // true if embedded can have an initializer int prevState; // so /**/ doesn't change start state int prevToken; // last token code yielded (ugly hack) public: // data // todo: can eliminate commentStartLine in favor of tokenStartLoc? //int commentStartLine; // for reporting unterminated C comments int integerLiteral; // to store number literal value StringRef stringLiteral; // string in quotes, minus the quotes StringRef includeFileName; // name in an #include directive // defined in the base class, FlexLexer: // const char *YYText(); // start of matched text // int YYLeng(); // number of matched characters StringTable &strtable; // string table // count of errors encountered int errors; private: // funcs // disallowed GrammarLexer(GrammarLexer const &); // called to advance the column count void advCol(int n) { fileState.loc = sourceLocManager->advCol(fileState.loc, n); } // called when a newline is encountered void newLine() { fileState.loc = sourceLocManager->advLine(fileState.loc); } // adds a string with only the specified # of chars; writes (but // then restores) a null terminator if necessary, so 'str' isn't const StringRef addString(char *str, int len) const; // nominally true if 'ch' equals 'embedFinish', but with a niggle bool embedFinishMatches(char ch) const; public: // funcs // create a new lexer that will read from to named stream, // or stdin if it is NULL GrammarLexer(isEmbedTok embedTokTest, StringTable &strtable, char const *fname = "", istream * /*owner*/ source = NULL, EmbeddedLang * /*owner*/ embedded = NULL /*i.e. assume C lexics*/); // clean up ~GrammarLexer(); // get current token as a string StringRef curToken() const; int curLen() const { return const_cast(this)->YYLeng(); } // current token's embedded text StringRef curFuncBody() const; StringRef curDeclBody() const { return curFuncBody(); } // implementation artifact StringRef curDeclName() const; // read the next token and return its code; returns TOK_EOF for end of file; // this function is defined in flex's output source code; this one // *does* return TOK_INCLUDE virtual int yylex(); // similar to yylex, but process TOK_INCLUDE internally int yylexInc(); // begin an embedded sequence void beginEmbed(char finish, int mode, int initNest = 0) { embedded->reset(initNest); embedFinish = finish; embedMode = mode; } // info about location of current token char const *curFname() const { return sourceLocManager->getFile(tokenStartLoc); } int curLine() const { return sourceLocManager->getLine(tokenStartLoc); } int curCol() const { return sourceLocManager->getCol(tokenStartLoc); } SourceLoc curLoc() const { return tokenStartLoc; } string curLocStr() const; // string with file/line/col // error reporting; called by the lexer code void err(rostring msg) { reportError(msg); } // msg should not include a newline void errorUnterminatedComment(); void errorMalformedInclude(); void errorIllegalCharacter(char ch); void printError(SourceLoc loc, rostring msg); void printWarning(SourceLoc loc, rostring msg); // for processing includes void recursivelyProcess(rostring fname, istream * /*owner*/ source); void popRecursiveFile(); bool hasPendingFiles() const; // ReportError funcs virtual void reportError(rostring msg); virtual void reportWarning(rostring msg); }; #endif // __GRAMLEX_H