#ifndef lexer_h #define lexer_h #include #include #include #include #include #include namespace parser { struct node_t; struct lexer_t; struct error_t : std::exception /* All parse errors result in this class being thrown. */ { error_t( lexer_t& lexer, const std::string& text_, bool dont_call_peeknext=false); error_t( lexer_t& lexer, const parser::node_t& node, bool dont_call_peeknext=false); virtual ~error_t() throw() {} virtual const char* what() const throw(); private: std::string text; }; std::string LoadFile( const char* filename); /* Loads the specified file into a string, and returns this string by value. Might be better to take a ref to a string to avoid the copy. */ struct lexer_mark_t; struct lexer_t { explicit lexer_t( const std::string& filename, const char* text, // not copied. bool autoblocks=false, bool verbose=false, bool show_failed_terminals=false, bool show_all_errorpos=false, bool show_startpos0=false, bool no_cplusplus0=false ); explicit lexer_t( const std::string& filename, bool autoblocks=false, bool verbose=false, bool show_failed_terminals=false, bool show_all_errorpos=false, bool show_startpos0=false, bool no_cplusplus0=false ); /* The `show_*' parameters effect how much detail is included in parse errors. See implementation of AddPosInfo in lexer.cpp for the details. */ friend struct lexer_mark_t; void Backtrack( const lexer_mark_t& lexmark); // next two never return NULL. They return a pointer to a node on the heap, allocated with new. node_t* GetNext(); // Returns next token, and consumes it. node_t* PeekNext(); // Returns the next token, and leaves lexer in current state. node_t* PeekNext0(); // internal_t only, to allow extra indirection for autoblocks. void Skip( const node_t* node); // Checks that node is same as PeekNext() (only in debug builds), and consumes it. const char* GetPos() const; // Returns pointer to current position in input text. int GetLineNumber( const char* p=NULL) const; // Returns line number in input. Takes #line directives into account. int GetColumnNumber( const char* p=NULL) const; // Returns column number in input. /* if p is NULL, then the current position is used. The above two fns work when only p is after the last-seen #line directive and before any subsequent #line directive. Otherwise they return incorrect info.*/ std::string GetLine( const char* p=NULL) const; // Returns (by value) a copy of the line containing the current position. const std::string& GetFilename() const; // Returns filename of current position in input. Takes #line directives into account. const char* GetLinePos( const char* p=NULL) const; struct BracketNesting { BracketNesting( const char* text, const char* pos, const std::type_info& closetype); const std::type_info* closetype; const char* pos; int indentation; }; bool show_failed_terminals; bool show_all_errorpos; bool show_startpos; bool check_indentation; bool autoblocks; int current_indentation; // only used if autoblocks is true; std::vector< BracketNesting> indentation_nesting; struct FailedInfo { const std::type_info* type; // type that was expected. const char* description; // description of type that was expected. const char* backtrackpos; // where lexer would backtrack to if this parse failed. only makes sense if a lexer_mark_t is active. const char* pos; // position in input where failure occurred. FailedInfo( const std::type_info& type0, const char* description0, const char* pos0, const char* backtrackpos0); }; friend inline bool operator < ( const FailedInfo& a, const FailedInfo& b) { if ( a.backtrackpos != b.backtrackpos) return a.backtrackpos < b.backtrackpos; if ( a.pos != b.pos) return a.pos < b.pos; return a.type < b.type; } typedef std::set< FailedInfo> FailedTerminals; const FailedTerminals& GetFailedTerminals() const { return this->failed; } /* Whenever the parser calls TryPeek<>/TryGet<> and the next token doesn't match the requested type, a FailedInfo can be created, which encapsulates information about the failed type, and the position at which it wasn't found. this->failed contains all of these FailedInfo's, ordered by their position in the lexer's input text. This information is used by AddPosInfo in lexer.cpp to generate very detailed information about what made a parse fail. See make test46 runs cmm on deliberately incorrect syntax with different error-generating levels. */ void AddFailed( const std::type_info& ti, const char* description); void ClearFailed(); private: //lexer_t( const lexer_t& rhs); //lexer_t& operator = ( const lexer_t& rhs); // does shallow copy. should probably not use operator= here. private: void Init( const std::string& filename_, const char* text_, bool autoblocks_, bool verbose_, bool show_failed_terminals0, bool show_all_errorpos0, bool show_startpos0, bool no_cplusplus); const char* pos; // Current position in text. const char* text; // Pointer to start of (zero terminated) text. const char* end; // end of text. std::string text_we_own; /* this->text points to this if we own the text. we use a shared ptr so that copying lexer_t is fast. */ const char* nextpos; // where to increment when GetNext() is called. std::string filename; public: bool verbose; private: friend node_t* peeknext_internal( lexer_t& lexer); std::vector< const char*> backtrackpositions; // next two identify most recent #line directive. Initially linechar points to start of text and line0 is zero. const char* linechar0; int line0; node_t* peeked; // Most recent node returned. Cached here so that multiple calls to PeekNext() are fast. bool no_cplusplus; // if true, lexer doesn't look for C++ keywords. FailedTerminals failed; #ifdef NDEBUG inline void Check() const {} #else void Check() const; // does simple assertions on state of lexer. #endif lexer_t::lexer_t(parser::lexer_t const&) { abort(); } }; struct lexer_mark_t { explicit lexer_mark_t( lexer_t& lexer); // stores pos of lexer. void Backtrack(); // restores lexer to original state when the lexer_mark_t was created. error if called more than once. ~lexer_mark_t(); // if Backtrack hasn't been called, removes all failed terminals since lexer_mark_t was made. Else does nothing. private: lexer_t& lexer; //lexer_t lexer0; // wastefull to have whole copy of lexer here. bool have_backtracked; bool show_failed_terminals; bool show_all_errorpos; bool show_startpos; bool check_indentation; bool autoblocks; int current_indentation; std::vector< lexer_t::BracketNesting> indentation_nesting; const char* pos; const char* text; const char* end; const char* nextpos; bool verbose; std::vector< const char*> backtrackpositions; const char* linechar0; int line0; node_t* peeked; bool no_cplusplus; private: lexer_mark_t& operator = ( const lexer_mark_t&); lexer_mark_t( const lexer_mark_t&); }; template< class T> T* TryPeek( lexer_t& lexer) /* if_t next token is of type T, returns next token, else returns NULL. */ { T* ret = dynamic_cast< T*>( lexer.PeekNext()); if ( !ret) lexer.AddFailed( typeid( T), T::static_description()); return ret; } template< class T> T* TryGet( lexer_t& lexer) { T* ret = TryPeek< T>( lexer); if ( !ret) return NULL; lexer.GetNext(); return ret; } template< class T> T* Get( lexer_t& lexer) { T* ret = TryPeek< T>( lexer); if ( !ret) throw error_t( lexer, std::string( "Parse error - expecting `") + T::static_description() + "'"); lexer.GetNext(); return ret; } } #endif