#ifdef WIN32 #pragma warning(disable: 4786) // Stop warnings about truncated identifiers in debug info #endif #include #include #include #include #include #include "lexer.h" #include "parser.h" #include "exceptionstream.h" #include "debug.h" // (1) turns debug output off, (0) turns debug output on. //#define debug (0) ? (void)std::cerr : std::cerr using namespace parser; parser::lexer_t::FailedInfo::FailedInfo( const std::type_info& type0, const char* description0, const char* pos0, const char* backtrackpos0) : type( &type0), description( (description0) ? description0 : ""), backtrackpos( backtrackpos0), pos( pos0) { //debug << "Created FailedInfo: backtrackpos=" << ((void*)backtrackpos) << ", pos=" << ((void*)pos) << "\n"; assert( pos>=backtrackpos); } namespace { std::ostream& OutputLinePos( std::ostream& out, int column, const std::string& line) /* Prints a `^' in column `column', taking care to match the tabs in `line'. Tabs are assumed to align to the next 8-character column. */ { if ( column > (int) line.size()) { std::cerr << "** correcting internal error - column " << column << " in line of length " << line.size() << "\n"; column = line.size(); } for ( int i=0, col=0; idescription() << "'\n"; } else { if ( lexer.GetFailedTerminals().empty()) out << "No known failed terminals"; else { const char* last_error_pos = NULL; const char* last_linestart = NULL; const char* last_backtrackpos = NULL; std::string line; lexer_t::FailedTerminals::const_iterator it; if ( lexer.show_all_errorpos) { it = lexer.GetFailedTerminals().begin(); } else { /* We show only the last of failed terminals that have same pos. Probably a way of doing this using algorithms, but I can't be bothered writing all that mem_fn_ref stuff... */ const char* lastpos = NULL; for ( lexer_t::FailedTerminals::const_iterator it2 = lexer.GetFailedTerminals().begin(); it2!=lexer.GetFailedTerminals().end(); ++it2) { if ( it2->pos != lastpos) { lastpos = it2->pos; it = it2; } } } for ( /**/; it!=lexer.GetFailedTerminals().end(); ++it) { const char* backtrackpos = it->backtrackpos; const char* linestart = lexer.GetLinePos( it->pos); if ( backtrackpos != last_backtrackpos || it==lexer.GetFailedTerminals().begin()) { //const char* backtrackpos_linestart = lexer.GetLinePos( backtrackpos); std::string backtrackpos_line = lexer.GetLine( backtrackpos); if ( lexer.show_startpos) { out << "\nstarting at:\n" << backtrackpos_line << "\n"; //debug << __FILE__ << ":" << __LINE__ << ", linepos=" << backtrackpos-linestart << ", it->backtracepos=" << ((void*)it->backtrackpos) << ", it->pos=" << ((void*)it->pos) << "\n"; OutputLinePos( out, backtrackpos-linestart, backtrackpos_line); } last_backtrackpos = backtrackpos; } if ( linestart != last_linestart || it==lexer.GetFailedTerminals().begin()) { line = lexer.GetLine( it->pos); if ( lexer.show_startpos) { out << "\nat:\n"; } out << line; last_linestart = linestart; // should probably output interveening lines too if errors are long way apart. } if ( it->pos != last_error_pos) { out << "\n"; //debug << __FILE__ << ":" << __LINE__ << ", linepos=" << it->pos - linestart << "\n"; OutputLinePos( out, it->pos - linestart, line); last_error_pos = it->pos; } if ( lexer.show_failed_terminals) { out << " `" << it->description << "'"; } } } } return out; } } namespace parser { error_t::error_t( lexer_t& lexer, const std::string& text_, bool dont_call_peeknext) : text() { //debug << "error_t::error_t:\n"; //debug_output_backtraces(); std::stringstream ss; AddPosInfo( ss, lexer, text_, dont_call_peeknext); ss << "\n"; #ifndef NDEBUG ss << "Backtrace is:\n"; debug_output_backtraces( ss); #endif this->text = ss.str(); } error_t::error_t( lexer_t& lexer, const parser::node_t& node, bool dont_call_peeknext) : text( NULL) { //debug << "error_t::error_t:\n"; //debug_output_backtraces(); std::stringstream ss; std::string message = "Can't handle node: " + node.output_tostring(); AddPosInfo( ss, lexer, message, dont_call_peeknext); this->text = ss.str(); } const char* error_t::what() const throw() { return this->text.c_str(); } } namespace { void EatWhite( const char*& text) { while ( *text && isspace( *text)) ++text; } bool IsAl_( char c) { return c=='_' || isalpha( c); } bool IsAlNum_( char c) { return c=='_' || isalnum( c); } inline int StartsWith( const char* text, const char* first) /* if_t text starts with the complete string 'first', returns length of first. Else returns 0. Eg. StartsWith( "abcdefghijk", "abc") returns 3. */ { //Profile::HierScope prof( "StartsWith"); int len; for ( len=0; *text==*first && *text; ++text, ++first, ++len) {} if ( *first==0) return len; else return 0; } bool Match( const char*& text, const char* first) /* if_t text starts with first, increments text by length of first and returns true. Else returns false with text unchanged. Eg. char* text="abcdefghijklm"; char* first="abc"; Match( text, first) increments text by 3, and returns true. */ { int len = StartsWith( text, first); text += len; return (len>0) ? true : false; } bool MatchAlNum_( const char*& text, const char* name) /* As Match, but only succeeds if next character in text is not alphanumeric or '_'. Used for matching identifiers: eg text="abcdef.foo", name="abcdef" - succeeds text="abcdef.foo", name="abc" - fails. */ { int len = StartsWith( text, name); if ( len && !IsAlNum_( text[len])) { text += len; return true; } return false; } keyword_t* GetAlNumKeyword( const char*& text, bool no_cplusplus) /* Looks for an alphanumeric keyword that matches the start of text, with the following character being non-alphanumeric_. if_t one is found, text is incremented to point to next unused character.*/ { if ( text[0]==0) return NULL; const char* begin = text; #define JOIN( a, b) a ## b #define g( id, name) if ( MatchAlNum_( text, name)) return new JOIN( keyword_, id)( begin, text); #define f( name) g( name, #name) extern void cmm_pragma_detailedparse_off(); { f( struct) f( private) f( enum) f( for) f( while) f( const) f( volatile) f( char) f( int) f( double) f( float) f( long) f( short) f( typedef) f( inline) f( extern) f( signed) f( unsigned) f( static) f( union) f( return) f( if) f( else) f( switch) f( case) f( default) f( do) f( mutable) f( register) f( void) f( asm) f( sizeof) if (!no_cplusplus) { f( bool) f( class) f( protected) f( public) f( virtual) f( operator) f( namespace) f( template) f( typename) f( using) f( new) f( throw) f( delete) f( friend) f( explicit) f( catch) f( try) } } extern void cmm_pragma_detailedparse_on(); #undef f #undef g #undef JOIN return NULL; } keyword_t* GetNonAlNumKeyword( const char*& text) /* Looks for a non-alphanumeric keyword that matches start of text. if_t one is found, text is incremented to point to next unused character. */ { if ( text[0]==0) return NULL; const char* begin = text; #define k( type) if ( Match( text, type::name_t())) return new type( begin, text); #define JOIN( a, b) a ## b extern void cmm_pragma_detailedparse_off(); { k( keyword_COLONCOLON); k( keyword_COLON); k( keyword_OPENROUND); k( keyword_CLOSEROUND); k( keyword_OPENSQUARE); k( keyword_CLOSESQUARE); k( keyword_OPENCURLY); k( keyword_CLOSECURLY); k( keyword_QUESTION); k( keyword_SEMICOLON); k( keyword_COMMA); k( keyword_DOTDOTDOT); k( keyword_DOT); k( keyword_ARROW); k( keyword_AMPERSAND); // Next few keywords can have a '=' after them. Have to look for '*=' before '*' for example: #define keq( name)\ k( JOIN( name, EQ))\ k( name) keq( keyword_GTGT); keq( keyword_GT); keq( keyword_LTLT); keq( keyword_LT); keq( keyword_TILDE); keq( keyword_EQ); keq( keyword_NOT); keq( keyword_AMPAMP); keq( keyword_AMP); keq( keyword_AND); keq( keyword_OROR); keq( keyword_OR); keq( keyword_XOR); keq( keyword_DIV); keq( keyword_STAR); keq( keyword_PLUSPLUS); keq( keyword_MINUSMINUS); keq( keyword_PLUS); keq( keyword_MINUS); keq( keyword_PERCENT); } extern void cmm_pragma_detailedparse_on(); #undef keq #undef k #undef JOIN #undef k0 return NULL; } const char* GetLineStart( const char* start, const char* pos) { const char* ret; assert( pos>=start); if ( pos==start) return start; for ( const char* s=pos-1; ; --s) { if ( *s=='\n') { ret = s+1; break; } if ( s==start) { ret = start; break; } } return ret; } int GetIndentation( const char* start, const char* pos) /* we assume tabs are to multiples of 4 characters. */ { const char* s = GetLineStart( start, pos); for ( int i=0, indentation=0; ; ++i) { if ( s[i]==0 || !isspace( s[i])) return indentation; if ( s[i]=='\t') indentation = ( indentation + 5) / 4 * 4; else indentation += 1; } } bool IsFirstNonSpaceOnLine( const char* start, const char* pos) { if ( pos==start) return true; assert( pos > start); for ( --pos; ; --pos) { if ( pos==start || *pos=='\n') return true; if ( !isspace( *pos)) return false; } } } namespace parser { std::string LoadFile( const char* filename) { if ( 0==strcmp( filename, "-")) { //in3 << std::cin.rdbuf(); std::string ret; for(;;) { int c = getchar(); if ( c==EOF) break; ret += c; } return ret; } else { /*`in2 << in.rdbuf()' hangs with VC++'s STL, and takes ages in some implementations of stringstream, so we always do it the long way: */ #if 1 FILE* in2 = fopen( filename, "r"); if ( !in2) throw exception_stream() << "Can't open `" << filename << "' for reading"; std::string ret; for( unsigned int i=0;; ++i) { int c = getc( in2); //if ( (i%100000)==0) std::cerr << i << ": " << c << "\n"; if ( c==EOF) break; /* Have to override std::string's allocator strategy to be like std::vector, otherwise it's painfully slow for large input files. */ if ( ret.capacity() <= i) ret.reserve( ret.capacity() * 2); ret += static_cast< char>( c); } return ret; #else /*std::ifstream in( filename); if ( !in) throw exception_stream() << "Can't open `" << filename << "' for reading"; std::stringstream in2; in2 << in.rdbuf(); return in2.str();*/ #endif } } void lexer_t::Init( const std::string& filename_, const char* text_, bool autoblocks_, bool verbose_, bool show_failed_terminals0, bool show_all_errorpos0, bool show_startpos0, bool no_cplusplus0 ) { show_failed_terminals = show_failed_terminals0; show_all_errorpos = show_all_errorpos0; show_startpos = show_startpos0; check_indentation = false; autoblocks = autoblocks_; current_indentation = 0; pos = text_; text = text_; end = text_+strlen( text_); nextpos = NULL; filename = filename_; verbose = verbose_; linechar0 = text_; line0 = 1; peeked = NULL; no_cplusplus = no_cplusplus0; this->backtrackpositions.push_back( this->text); } lexer_t::lexer_t( const std::string& filename_, const char* text_, bool autoblocks_, bool verbose_, bool show_failed_terminals0, bool show_all_errorpos0, bool show_startpos0, bool no_cplusplus0 ) : show_failed_terminals( false), show_all_errorpos( false), show_startpos( false), check_indentation( false), autoblocks( false), current_indentation( 0), indentation_nesting(), pos( NULL), text( NULL), end( NULL), text_we_own(), nextpos( NULL), filename(), verbose( false), backtrackpositions(), linechar0( NULL), line0( 0), peeked( NULL), no_cplusplus( false), failed() { Init( filename_, text_, autoblocks_, verbose_, show_failed_terminals0, show_all_errorpos0, show_startpos0, no_cplusplus0); } lexer_t::lexer_t( const std::string& filename_, bool autoblocks_, bool verbose_, bool show_failed_terminals0, bool show_all_errorpos0, bool show_startpos0, bool no_cplusplus0 ) : show_failed_terminals( false), show_all_errorpos( false), show_startpos( false), check_indentation( false), autoblocks( false), current_indentation( 0), indentation_nesting(), pos( NULL), text( NULL), end( NULL), text_we_own( LoadFile( filename_.c_str())), nextpos( NULL), filename(), verbose( false), backtrackpositions(), linechar0( NULL), line0( 0), peeked( NULL), no_cplusplus( false), failed() { Init( filename_, text_we_own.c_str(), autoblocks_, verbose_, show_failed_terminals0, show_all_errorpos0, show_startpos0, no_cplusplus0); } void lexer_t::Check() const { assert( this->pos >= this->text); assert( this->pos <= end); assert( this->linechar0 >= this->text); assert( this->linechar0 <= end); assert( this->pos >= this->linechar0); assert( this->nextpos==NULL || (this->nextpos>=this->text && this->nextpos<=this->end)); } int lexer_t::GetLineNumber( const char* p) const { if ( !p) p = this->pos; Check(); int l = line0; for ( const char* c=this->linechar0; cpos; Check(); return p - GetLineStart( this->text, p); } const std::string& lexer_t::GetFilename() const { Check(); return this->filename; } const char* lexer_t::GetLinePos( const char* p) const { if ( !p) p = this->pos; Check(); return GetLineStart( this->text, p); } std::string lexer_t::GetLine( const char* p) const { Check(); const char* s = this->GetLinePos( p); std::string ss; for ( ; *s && *s!='\n'; ++s) ss += *s; //debug << "GetLine returning " << ss << "\n"; return ss; } const char* lexer_t::GetPos() const { Check(); assert( this->pos==this->text || *this->pos==0 || !isspace( *this->pos)); return this->pos; } lexer_t::BracketNesting::BracketNesting( const char* text, const char* pos_, const std::type_info& closetype_) : closetype( &closetype_), pos( pos_), indentation( 0) { this->indentation = GetIndentation( text, pos_); } node_t* lexer_t::PeekNext() { Check(); if ( this->peeked) return this->peeked; /*static Profile::PNamedStatistics profstats( "PeekNext"); Profile::PStatScope prof( profstats);*/ EatWhite( this->pos); if ( autoblocks) { if ( this->verbose) { char buffer[5]=""; strncat( buffer, this->pos, 4); std::cerr << "Looking at " << buffer << "\n"; } int new_indentation = GetIndentation( this->text, this->pos); /* we only attempt to invent a '{' or '}' if we are at top level or in a {...} block. So we don't insert '{' or '}' when inside (...) for example. */ if ( new_indentation != this->current_indentation && ( this->indentation_nesting.empty() || *this->indentation_nesting.back().closetype==typeid( keyword_CLOSECURLY)) && ( this->pos[0]!='#')) { if ( new_indentation > this->current_indentation) { if ( this->verbose) std::cerr << "*pos=" << *pos << "*********inserting `{'\n"; this->peeked = new keyword_OPENCURLY; this->peeked->set_text( "{"); current_indentation = new_indentation; } else if ( new_indentation < this->current_indentation && !this->indentation_nesting.empty() && new_indentation < this->indentation_nesting.back().indentation ) { if ( this->verbose) std::cerr << "*pos=" << *pos << "**********inserting `}'\n"; this->peeked = new keyword_CLOSECURLY; /* Ensure that we have a newline between an inserted '}' and a #define. This spoils the line numbering (should be poss to fix), but won't occur very often.*/ if ( *this->pos=='#') this->peeked->set_text( "}\n"); else this->peeked->set_text( "}"); if ( this->indentation_nesting.size()==1) current_indentation = 0; else { assert( this->indentation_nesting.size() >= 2); current_indentation = this->indentation_nesting.end()[-2].indentation; } } else { assert( false); } } } if ( this->peeked) { /* from autoblocks code, so for the next lexical item, we need to start looking from the current position. */ this->nextpos = this->pos; Check(); } else { const char* itemend = this->pos; // modified when item is found, to point to end of item. const char* begin = itemend; if ( *itemend==0) { this->peeked = new endoffile_t(); } else if ( begin[0]=='/' && begin[1]=='*') { for (;; ++itemend) { if ( itemend[0]=='*' && itemend[1]=='/') { itemend += 2; break; } if ( !itemend[0]) throw error_t( *this, "Unmatched C-style comment", true); } this->peeked = new c_comment_t; //std::cerr << "Found C comment\n"; } else if ( begin[0]=='/' && begin[1]=='/') { for (;; ++itemend) { if ( itemend[0]=='\n') { ++itemend; break; } if ( !itemend[0]) break; } this->peeked = new cpp_comment_t; //std::cerr << "Found C++ comment\n"; } else if ( begin[0]=='"' || begin[0]=='\'' || ( begin[0]=='L' && ( begin[1]=='"' || begin[1]=='\''))) { char quote = begin[0]; if ( quote!='"' && quote!='\'') { ++itemend; quote=begin[1]; } for(;;) { for ( ++itemend; *itemend!=quote; ++itemend) { if ( *itemend==0) throw error_t( *this, "End of file reached inside string"); if ( *itemend=='\\') ++itemend; } ++itemend; // now points to char after closing quote. assert( itemend[-1] == quote); // look for another string after white space. const char* c=itemend; for ( ; c && isspace( *c); ++c) {} if ( *c==quote) { itemend = c; continue; } else { break; } } this->peeked = new string_t;//( begin, end); } else if ( ( this->peeked = GetAlNumKeyword( itemend, this->no_cplusplus))) { } else if ( ( this->peeked = GetNonAlNumKeyword( itemend))) { } else if ( IsAl_( *itemend)) { for ( ++itemend; *itemend!=0 && IsAlNum_( *itemend); ++itemend) {} this->peeked = new identifier_t; } else if ( StartsWith( itemend, "0x")) { for ( itemend+=2; *itemend!=0 && isxdigit( *itemend); ++itemend) {} for ( ; *itemend!=0 && isalpha( *itemend); ++itemend) {} this->peeked = new number_t; } else if ( isdigit( *itemend)) { // look for digits followed by any characters (e.g. 1234ul) for ( ++itemend; *itemend!=0 && isdigit( *itemend); ++itemend) {} for ( ; *itemend!=0 && isalpha( *itemend); ++itemend) {} this->peeked = new number_t; } else if ( *begin=='#' && (IsFirstNonSpaceOnLine( this->text, begin))) { /* This handles the following two cases: # line [] [anything else] # [] [anything else] The second format is output by the GNU C Preprocessor. See http://gcc.gnu.org/onlinedocs/cpp_1.html#SEC44 */ Check(); assert( begin <= this->end); assert( this->pos == begin); std::string line = this->GetLine(); //debug << "Looking at preproc line:\n" << line; for ( const char* linestart=begin;;) { itemend = strchr( linestart, '\n'); if ( itemend && itemend>linestart && itemend[-1]=='\\') { // last char on line is back-slash. linestart = itemend + 1; continue; } if (!itemend) itemend = this->end; assert( itemend <= this->end); EatWhite( itemend); assert( itemend <= this->end); break; } /* Make temporary buffer for use by scanf. we're assuming that std::vector's storage is contiguous, which isn't strictly guaranteed (yet). */ std::vector< char> newfilename_buffer( 1+line.size()); char* newfilename = &newfilename_buffer[0]; int n; int linenumber; n = sscanf( line.c_str(), "# line %i \"%s\"", &linenumber, newfilename); if ( !n) n = sscanf( line.c_str(), "# %i \"%s\"", &linenumber, newfilename); //std::cerr << "n=" << n << "\n"; if ( n==0) // not #line - some other preprocessor directive. { hash_t* hash = new hash_t; hash->begin = begin; hash->end = itemend; this->peeked = hash; //debug << "Found hash: " << *hash; } else { hash_line_t* hashline = new hash_line_t; hashline->linenumber = linenumber; this->line0 = linenumber; //std::cerr << "new linenumber: " << this->line0 << "\n"; if ( n>=2) // we read a filename { // replace '\\' by '\' in newfilename const char* from = newfilename; char* to = newfilename; for ( from=newfilename, to=newfilename;; ++from, ++to) { if ( from[0]=='\\' && from[1]=='\\') ++from; *to = *from; if ( *to == 0) break; } this->filename = newfilename; hashline->filename = newfilename; // takes copy of string pointed to by newfilename //std::cerr << "new filename: " << this->filename << "\n"; } this->peeked = hashline; //debug << "Found hashline: " << *this->peeked; } Check(); } else { //throw error_t( *this, "Unrecognised lexer item"); } if ( !this->peeked) throw error_t( *this, "lexer_t error", true); this->peeked->begin = begin; this->peeked->end = itemend; this->nextpos = itemend; Check(); //debug << "Found " << typeid( *this->peeked).name() << ": " << this->peeked->end-this->peeked->begin << " `" << *this->peeked << "'\n"; //std::cerr << "lexer_t now at line " << this->GetLineNumber() << "\n"; if (this->peeked->end <= this->pos && !dynamic_cast< endoffile_t*>( this->peeked)) { std::cerr << "this->peeked->end-this->pos=" << this->peeked->end-this->pos << "\n"; std::cerr << "this->pos-this->text=" << this->pos-this->text << "\n"; throw std::logic_error( "internal_t lexer error: 1. end <= pos\n"); } } if ( check_indentation || autoblocks) { if ( false) {} else if ( typeid( *this->peeked)==typeid( keyword_OPENCURLY)) indentation_nesting.push_back( BracketNesting( this->text, this->pos, typeid( keyword_CLOSECURLY))); else if ( typeid( *this->peeked)==typeid( keyword_OPENSQUARE)) indentation_nesting.push_back( BracketNesting( this->text, this->pos, typeid( keyword_CLOSESQUARE))); else if ( typeid( *this->peeked)==typeid( keyword_OPENROUND)) indentation_nesting.push_back( BracketNesting( this->text, this->pos, typeid( keyword_CLOSEROUND))); else if ( typeid( *this->peeked)==typeid( keyword_CLOSECURLY) || typeid( *this->peeked)==typeid( keyword_CLOSESQUARE) || typeid( *this->peeked)==typeid( keyword_CLOSEROUND) ) { if ( check_indentation) { if ( indentation_nesting.size()==0 || typeid( *this->peeked)!=*indentation_nesting.back().closetype) { AddPosInfo( std::cerr, *this, "Unmatched close bracket", true); } int indentation_open = indentation_nesting.back().indentation; int indentation_close = GetIndentation( this->text, this->peeked->begin); if ( indentation_open != indentation_close) { std::cerr << "Warning: mismatched indentation of lines containing open/close brackets:\n"; std::stringstream buffer1; buffer1 << "Line containing opening bracket is indented by " << indentation_open; const char* oldpos = this->pos; this->pos = indentation_nesting.back().pos; AddPosInfo( std::cerr, *this, buffer1.str(), true, false); //std::cerr << "\n"; this->pos = oldpos; std::stringstream buffer2; buffer2 << "Line containing closing bracket is indented by " << indentation_close; AddPosInfo( std::cerr, *this, buffer2.str(), true, false); //std::cerr << "\n"; } } if ( indentation_nesting.size()>0) indentation_nesting.pop_back(); } } Check(); return this->peeked; } node_t* lexer_t::GetNext() { Check(); /*if ( this->verbose) { static clock_t t = 0; if ( clock()-t > 4*CLOCKS_PER_SEC) std::cerr << "lexer_t pos is " << this->GetFilename() << ": " << this->GetLineNumber() << "\n"; t = clock(); }*/ node_t* node = this->PeekNext(); if ( this->verbose) { debug0 << "lexer_t read token " << typeid( *node).name() << "`" << node->output_compact_tostring() << "', end-begin=" << node->end - node->begin << "\n"; } this->peeked = NULL; if ( node->end <= this->pos && !dynamic_cast< const endoffile_t*>( node)) { if ( autoblocks && ( typeid( *node)==typeid( keyword_OPENCURLY) || typeid( *node)==typeid( keyword_CLOSECURLY) ) ) { // this code could be an extra one inserted by the autoblocks code, // in which case the node pos is allowed to be outside of the input text. } else { std::cerr << typeid( *node).name() << ": " << *node << "\n"; throw std::logic_error( "internal_t lexer error: 2. end <= pos\n"); } } Check(); this->pos = this->nextpos; Check(); EatWhite( this->pos); Check(); if ( hash_line_t* hashline = dynamic_cast< hash_line_t*>( node)) { this->linechar0 = hashline->end; } else if ( simplenode_t* simplenode=dynamic_cast< simplenode_t*>( node)) { // add all trailing #... lines or comments to end as hidden node. // Note that gcc -O3 seems to generate incorrect code here, so //std::cerr << "Found simple node - looking for trailing #line/comments. this->peeked=" << ((void*)this->peeked) << "\n"; //std::cerr << "this->pos=" << ((void*)this->pos) << ", node->end=" << ((void*)node->end) << "\n"; //std::cerr << "Next node is " << ((void*) next) << ", type " << typeid( *node).name() << "\n"; for ( comment_or_hash_t* commentorhash = dynamic_cast< comment_or_hash_t*>( this->PeekNext()); commentorhash; commentorhash = dynamic_cast< comment_or_hash_t*>( this->PeekNext()) ) { //debug << "Found trailing coment/hash: " << *commentorhash << "\n"; if ( !simplenode->extra) { simplenode->extra = new misc_t; simplenode->extra->begin = commentorhash->begin; } simplenode->extra->end = commentorhash->end; //std::cerr << "found trailing # or comment: " << *simplenode_t->extra; this->peeked = NULL; //this->pos = commentorhash->end; Check(); this->pos = this->nextpos; Check(); EatWhite( this->pos); Check(); if ( hash_line_t* trailing_hashline = dynamic_cast< hash_line_t*>( commentorhash)) { this->linechar0 = trailing_hashline->end; } } if ( simplenode->extra) { //debug << "FOund extra " << (this->pos-node->end) << ", total node is: `" << *node << "'"; } } else { if ( dynamic_cast< comment_or_hash_t*>( this->PeekNext())) { //debug << "Can't handle comment/hash after node: " << typeid( *node).name() << ": " << *node << "\n"; } } //debug << "lexer_t returning token `" << *node << "'\n"; //debug << *node; //std::cerr << "Line " << GetLineNumber() << "\n"; Check(); //debug << "lexer_t::GetNext() returning " << *node << "\n"; //debug << "End\n"; return node; } void lexer_t::Skip( const node_t* node) { (void) node; // unused in non-debug builds. Check(); assert( this->peeked); assert( node == this->peeked); this->GetNext(); } void lexer_t::AddFailed( const std::type_info& ti, const char* description) { Check(); if ( !this->show_all_errorpos) { if ( !this->failed.empty()) { //assert( this->failed.size()==1); if ( this->pos < this->failed.begin()->pos) return; // we only keep one failed pos if !this->detailederrors if ( !this->show_failed_terminals || this->pos > this->failed.begin()->pos) this->failed.clear(); } } //std::cerr << "Adding failure " << description << " at " << (this->pos-this->text) << "\n"; assert ( !this->backtrackpositions.empty()); this->failed.insert( FailedInfo( ti, description, this->pos, this->backtrackpositions.back())); Check(); } void lexer_t::ClearFailed() { this->failed.clear(); } lexer_mark_t::lexer_mark_t( lexer_t& lexer_) : lexer( lexer_), lexer0( lexer_), have_backtracked( false) { lexer.backtrackpositions.push_back( lexer.GetPos()); } void lexer_mark_t::Backtrack() { /* We allow multiple calls to this function. used by the parser e.g. in TryGetFnParams(). otherwise we could do: assert( !this->have_backtracked) */ this->have_backtracked = true; // copy all except the failed terminals. lexer_t::FailedTerminals failedterminals = this->lexer.GetFailedTerminals(); this->lexer = this->lexer0; this->lexer.failed = failedterminals; debug0 << "Backtracked to " << this->lexer.pos[0] << this->lexer.pos[1] << this->lexer.pos[2] << this->lexer.pos[3] << " peeked=" << *this->lexer.peeked << "\n"; } lexer_mark_t::~lexer_mark_t() { if ( !have_backtracked) { assert( lexer.backtrackpositions.back() == this->lexer0.GetPos()); lexer.backtrackpositions.pop_back(); /* Used to have this code to remove knowledge of all failed terminals after the current position. But these failed terminals are actually what the failedterminal system is all about - giving maximum info about what happened in a failed parse.*/ /*for ( lexer_t::FailedTerminals::iterator it=this->lexer.failed.begin(); it!=this->lexer.failed.end();) { if ( it->pos > this->lexer0.pos) { lexer_t::FailedTerminals::iterator next = it; ++next; this->lexer.failed.erase( it); it = next; } else ++it; }*/ /* The following failes to compile with g++ 2.95.2 STLport 4.1b2 - remove_if seems to return const_iterator? */ /*IfLater iflater( this->lexer0.pos); this->lexer.failed.erase( std::remove_if( this->lexer.failed.begin(), this->lexer.failed.end(), iflater ), this->lexer.failed.end() );*/ } } }