Lex.cpp
Go to the documentation of this file.
1 #include "Lex.hpp"
2 
3 #include "../Defs.hpp"
4 
5 #include "../Basics/SS.hpp"
6 #include "../Instancing/Instance.hpp"
7 
8 #include <cctype>
9 
10 namespace iv
11 {
12 
13 Lex::Lex( Instance * inst ) :
14  cm( inst, this, "Lex" ),
15  inst( inst ),
16  _single_line( false ),
17  _numbers_enabled( true ),
18  in( nullptr ),
19  line( 0 ),
20  column( 0 ),
21  prev_line( 0 ),
22  prev_column( 0 ),
23  failed( false ),
24  newlines( 0 ),
25  newline_skipping( true ),
26  token( Eof )
27 {
28 }
29 
31 {
32  return this->inst;
33 }
34 
35 void Lex::DefineKeyword( const char * keyword )
36 {
37  this->keywords.insert( keyword );
38 }
39 
40 void Lex::DefineOperator( const char * op )
41 {
42  this->operators.insert( op );
43 }
44 
46 {
47  this->newline_skipping = false;
48 }
49 
51 {
52  this->newline_skipping = true;
53 }
54 
55 void Lex::numbers_enabled( bool enabled )
56 {
57  this->_numbers_enabled = enabled;
58 }
59 
60 void Lex::Init( std::istream & in, bool single_line )
61 {
62  this->cm.log( SRC_INFO, Defs::Log::Lex, "Init", ( single_line ? " single_line" : "" ), "." );
63 
64  this->_single_line = single_line;
65 
66  this->in = &in;
67  this->line = 0;
68  this->column = 0;
69  this->failed = false;
70 
71  this->PrecomputeOperators();
72 
73  this->NextChar();
74  this->NextToken();
75 }
76 
77 void Lex::Close()
78 {
79  this->cm.log( SRC_INFO, Defs::Log::Lex, "Close." );
80 
81  this->in = nullptr;
82  this->token = Eof;
83  this->newlines = 0;
84 }
85 
86 //------------------------------------------ lexan ----------------------------------------------------------------
87 void Lex::NextChar()
88 {
89  if( this->failed || !this->in || !this->in->good() )
90  {
91  this->c = std::char_traits< char >::eof();
92  return;
93  }
94 
95  this->c = this->in->get();
96 
97  this->column++;
98  if( this->c == '\n' )
99  {
100  this->line++;
101  this->column = 0;
102  }
103 
104  if( !this->in->eof() && this->in->fail() )
105  {
106  this->c = std::char_traits< char >::eof();
107  this->Error( "Input error: Error reading input stream." );
108  this->failed = true;
109  this->Close();
110  }
111 }
112 
113 void Lex::PrecomputeOperators()
114 {
115  for( auto & op : this->operators )
116  {
117  this->operators_begins.insert( op[0] );
118 
119  std::string prefix;
120  for( char a : op )
121  {
122  prefix += a;
123  this->operators_prefixes.insert( prefix );
124  }
125  }
126 }
127 
128 void Lex::ReadNumber( bool negative )
129 {
130  std::string str;
131 
132  if( negative )
133  str += '-';
134 
135  // read numbers
136  while( this->c >= '0' && this->c <= '9' )
137  {
138  str += this->c;
139  this->NextChar();
140  }
141 
142  if( this->c != '.' )
143  { // Integer
144  this->token = Integer;
145  this->token_str = str;
146  }
147  else
148  { // Double
149  str += '.';
150  this->NextChar();
151 
152  // read numbers after dot
153  while( this->c >= '0' && this->c <= '9' )
154  {
155  str += this->c;
156  this->NextChar();
157  }
158 
159  if( this->c == 'e' )
160  {
161  this->NextChar();
162  if( this->c == '-' )
163  {
164  str += this->c;
165  NextChar();
166  }
167 
168  if( this->c < '0' || this->c > '9' )
169  {
170  this->Error( "Lex error: No number after 'e' in floating point literal." );
171  this->failed = true;
172  this->Close();
173  return;
174  }
175 
176  while( this->c >= '0' && this->c <= '9' )
177  {
178  str += this->c;
179  this->NextChar();
180  }
181  }
182 
183  this->token = Double;
184  this->token_str = str;
185  }
186 
187  if( !this->isspace( this->c ) && !this->operators_begins.count( this->c ) )
188  {
189  this->Error( "Lex error: Number must be followed by a whitespace character or by an operator." );
190  this->failed = true;
191  this->Close();
192  return;
193  }
194 }
195 
196 void Lex::ReadIdentifier()
197 {
198  char first = this->c;
199  this->NextChar();
200 
201  std::string str;
202  str += first;
203 
204  if( this->operators_begins.count( first ) )
205  {
206  while( this->c != std::char_traits< char >::eof() && this->operators_prefixes.count( str + (char)this->c ) )
207  {
208  str += this->c;
209  this->NextChar();
210  }
211 
212  this->token = Operator;
213  this->token_str = str;
214  return;
215  }
216  else
217  {
218  while( this->c != std::char_traits< char >::eof() && !this->isspace( this->c ) && !this->operators_begins.count( this->c ) )
219  {
220  str += this->c;
221  this->NextChar();
222  }
223 
224  this->token_str = str;
225  if( this->keywords.count( str ) )
226  this->token = Keyword;
227  else
228  this->token = Name;
229  return;
230  }
231 }
232 
233 bool Lex::isspace( char c )
234 {
235  return c==' ' || c=='\t' || c=='\r' || c=='\n';
236 }
237 
238 void Lex::NextToken()
239 {
240  this->prev_line = this->line;
241  this->prev_column = this->column;
242  this->NextTokenImpl();
243  this->cm.log( SRC_INFO, Defs::Log::Lex, "NextToken: ", this->TokenToStr( this->token, this->token_str.c_str() ), " ", this->token_str );
244 }
245 
246 void Lex::NextTokenImpl()
247 {
248  // reset things
249  this->newlines = 0;
250  this->tested_tokens.clear();
251  this->tested_keywords.clear();
252  this->tested_operators.clear();
253 
254  // read next token
255  while( 1 )
256  {
257  if( this->failed )
258  { // fail
259  return;
260  }
261  else if( this->c == std::char_traits< char >::eof() )
262  { // eof
263  this->token = Eof;
264  this->token_str = "";
265  this->NextChar();
266  return;
267  }
268  else if( this->c == '\n' )
269  { // newline
270  this->newlines++;
271  if( this->_single_line )
272  {
273  this->Close();
274  this->newlines = 1;
275  return;
276  }
277  else
278  {
279  this->NextChar();
280  }
281  }
282  else if( this->c == '#' )
283  { // comment
284  while( this->c != '\n' && this->c != std::char_traits< char >::eof() )
285  this->NextChar();
286  }
287  else if( this->isspace( this->c ) )
288  { // whitespace
289  this->NextChar();
290  }
291  else if( this->_numbers_enabled && !this->operators_begins.count( '-' ) && this->c == '-' )
292  {
293  this->NextChar();
294  this->ReadNumber( true );
295  return;
296  }
297  else if( this->_numbers_enabled && this->c >= '0' && this->c <= '9' )
298  { // number
299  this->ReadNumber( false );
300  return;
301  }
302  else if( this->c == '"' || this->c == '\'' )
303  { // string
304  std::string str;
305  char end = this->c;
306  this->NextChar();
307  bool escaped = false;
308  while( this->c != end || escaped )
309  {
310  if( escaped )
311  {
312  escaped = false;
313  if( this->c == '\\' )
314  str += '\\';
315  else if( this->c == 'n' )
316  str += '\n';
317  else if( this->c == 'r' )
318  str += '\r';
319  else if( this->c == 't' )
320  str += '\t';
321  else if( this->c == '"' )
322  str += '"';
323  else if( this->c != std::char_traits< char >::eof() )
324  {
325  this->Error( SS() << "Lex error: Invalid escaped character in string: \\" << this->c << "." << SS::c_str() );
326  this->failed = true;
327  this->Close();
328  return;
329  }
330  }
331  else
332  {
333  if( this->c == std::char_traits< char >::eof() )
334  {
335  this->Error( "Lex error: Unexpected end of input, string not closed." );
336  this->failed = true;
337  this->Close();
338  return;
339  }
340  else if( this->c == '\n' )
341  {
342  this->Error( "Lex error: Unexpected end of line, string not closed." );
343  this->failed = true;
344  this->Close();
345  return;
346  }
347  else if( end == '"' && this->c == '\\' )
348  {
349  escaped = true;
350  }
351  else
352  {
353  str += this->c;
354  }
355  }
356 
357  this->NextChar();
358  }
359 
360  this->NextChar();
361  this->token = String;
362  this->token_str = str;
363  return;
364  }
365  else
366  { // something else
367  this->ReadIdentifier();
368  return;
369  }
370  }
371 }
372 
373 //------------------------------------------ parser -------------------------------------------
374 
375 bool Lex::IsNext( Token token )
376 {
377  if( this->failed )
378  return token==Eof;
379 
380  this->tested_tokens.push_back( token );
381 
382  if( !this->newline_skipping && this->newlines && token != Newline )
383  return false;
384 
385  if( token == Newline )
386  return this->newlines;
387  else if( token == Double && this->token == Integer )
388  return true;
389  else
390  return this->token == token;
391 }
392 
393 bool Lex::IsNextKeyword( const char * keyword )
394 {
395  if( this->failed )
396  return false;
397 
398  this->tested_keywords.push_back( keyword );
399 
400  if( !this->newline_skipping && this->newlines )
401  return false;
402 
403  return this->token == Keyword && this->token_str == keyword;
404 }
405 
406 bool Lex::IsNextOperator( const char * op )
407 {
408  if( this->failed )
409  return false;
410 
411  this->tested_operators.push_back( op );
412 
413  if( !this->newline_skipping && this->newlines )
414  return false;
415 
416  return this->token == Operator && this->token_str == op;
417 }
418 
420 {
421  if( !this->newline_skipping && this->newlines )
422  return Newline;
423 
424  return this->token;
425 }
426 
428 {
429  if( !this->newline_skipping && this->newlines )
430  return "\n";
431 
432  return this->token_str;
433 }
434 
436 {
437  if( this->failed )
438  return;
439 
440  this->ParserFail();
441 }
442 
443 void Lex::Accept( Token token )
444 {
445  if( this->failed )
446  return;
447 
448  if( !this->newline_skipping && token != Newline && this->newlines )
449  {
450  this->tested_tokens.push_back( token );
451  this->ParserFail();
452  return;
453  }
454 
455  if( this->token == token )
456  {
457  this->NextToken();
458  return;
459  }
460  else if( this->token == Integer && token == Double )
461  {
462  this->NextToken();
463  return;
464  }
465  else if( token == Newline && this->newlines )
466  {
467  this->newlines--;
468  return;
469  }
470  else
471  {
472  this->tested_tokens.push_back( token );
473  this->ParserFail();
474  return;
475  }
476 }
477 
478 void Lex::AcceptKeyword( const char * keyword )
479 {
480  if( !this->keywords.count( keyword ) )
481  {
482  this->Error( SS()<<"Trying to accept keyword '"<<keyword<<"' that was not registered using method Lex::DefineKeyword."<<SS::c_str() );
483  return;
484  }
485 
486  if( this->failed )
487  return;
488 
489  if( !this->newline_skipping && this->newlines )
490  {
491  this->tested_keywords.push_back( keyword );
492  this->ParserFail();
493  return;
494  }
495 
496  if( this->token == Keyword && this->token_str == keyword )
497  {
498  this->NextToken();
499  }
500  else
501  {
502  this->tested_keywords.push_back( keyword );
503  this->ParserFail();
504  return;
505  }
506 }
507 
508 void Lex::AcceptOperator( const char * op )
509 {
510  if( !this->operators.count( op ) )
511  {
512  this->Error( SS()<<"Trying to accept operator '"<<op<<"' that was not registered using method Lex::DefineOperator."<<SS::c_str() );
513  return;
514  }
515 
516  if( this->failed )
517  return;
518 
519  if( !this->newline_skipping && this->newlines )
520  {
521  this->tested_operators.push_back( op );
522  this->ParserFail();
523  return;
524  }
525 
526  if( this->token == Operator && this->token_str == op )
527  {
528  this->NextToken();
529  }
530  else
531  {
532  this->tested_operators.push_back( op );
533  this->ParserFail();
534  return;
535  }
536 }
537 
538 std::string Lex::AcceptString()
539 {
540  if( this->failed )
541  return "";
542 
543  if( !this->newline_skipping && this->newlines )
544  {
545  this->tested_tokens.push_back( String );
546  this->ParserFail();
547  return "";
548  }
549 
550  if( this->token == String )
551  {
552  std::string result = this->token_str;
553  this->NextToken();
554  return result;
555  }
556  else
557  {
558  this->tested_tokens.push_back( String );
559  this->ParserFail();
560  return "";
561  }
562 }
563 
564 std::string Lex::AcceptName()
565 {
566  if( this->failed )
567  return "";
568 
569  if( !this->newline_skipping && this->newlines )
570  {
571  this->tested_tokens.push_back( Name );
572  this->ParserFail();
573  return "";
574  }
575 
576  if( this->token == Name )
577  {
578  auto result = this->token_str;
579  this->NextToken();
580  return result;
581  }
582  else
583  {
584  this->tested_tokens.push_back( Name );
585  this->ParserFail();
586  return "";
587  }
588 }
589 
591 {
592  if( this->failed )
593  return 0;
594 
595  if( !this->newline_skipping && this->newlines )
596  {
597  this->tested_tokens.push_back( Integer );
598  this->ParserFail();
599  return 0;
600  }
601 
602  if( this->token == Integer )
603  {
604  auto result = atoi( this->token_str.c_str() );
605  this->NextToken();
606  return result;
607  }
608  else
609  {
610  this->tested_tokens.push_back( Integer );
611  this->ParserFail();
612  return 0;
613  }
614 }
615 
617 {
618  if( this->failed )
619  return 0.0;
620 
621  if( !this->newline_skipping && this->newlines )
622  {
623  this->tested_tokens.push_back( Double );
624  this->ParserFail();
625  return 0.0;
626  }
627 
628  if( this->token == Double )
629  {
630  auto result = atof( this->token_str.c_str() );
631  this->NextToken();
632  return result;
633  }
634  else if( this->token == Integer )
635  {
636  auto result = atoi( this->token_str.c_str() );
637  this->NextToken();
638  return double( result );
639  }
640  else
641  {
642  this->tested_tokens.push_back( Double );
643  this->ParserFail();
644  return 0.0;
645  }
646 }
647 
648 
649 int Lex::GetLine() const
650 {
651  return this->prev_line;
652 }
653 
654 int Lex::GetColumn() const
655 {
656  return this->prev_column;
657 }
658 
659 std::string Lex::TokenToStr( Token token, const char * str )
660 {
661  switch( token )
662  {
663  case Keyword:
664  return SS()<<"Keyword( "<<str<<" )"<<SS::str();
665  case Operator:
666  return SS()<<"Operator( "<<str<<" )"<<SS::str();
667  case Newline:
668  return "Newline";
669  case Eof:
670  return "Eof";
671  case String:
672  return SS()<<"String \""<<str<<"\""<<SS::str();
673  case Name:
674  return SS()<<"Name( "<<str<<" )"<<SS::str();
675  case Integer:
676  return SS()<<"Integer "<<str<<SS::str();
677  case Double:
678  return SS()<<"Double "<<str<<SS::str();
679  default:
680  return "-unknown-";
681  }
682 }
683 
684 void Lex::LogicFail( const char * message )
685 {
686  if( this->failed )
687  return;
688 
689  this->Error( message );
690  this->failed = true;
691 }
692 
693 void Lex::ParserFail( const char * prefix )
694 {
695  // we only report the first error
696  if( this->failed )
697  return;
698 
699  std::string current_token_string;
700  if( !this->newline_skipping && this->newlines )
701  current_token_string = this->TokenToStr( Newline, "\n" );
702  else
703  current_token_string = this->TokenToStr( this->token, this->token_str.c_str() );
704 
705  SS ss;
706  if( prefix )
707  ss << prefix << ": ";
708  ss << "Unexpected token " << current_token_string << ". Expecting one of these tokens: ";
709 
710  bool first = true;
711 
712  for( Token t : this->tested_tokens )
713  {
714  if( first )
715  first = false;
716  else
717  ss << ", ";
718 
719  ss << this->TokenToStr( t, "" );
720  }
721 
722  for( auto & str : this->tested_keywords )
723  {
724  if( first )
725  first = false;
726  else
727  ss << ", ";
728 
729  ss << this->TokenToStr( Keyword, str.c_str() );
730  }
731 
732  for( auto & str : this->tested_operators )
733  {
734  if( first )
735  first = false;
736  else
737  ss << ", ";
738 
739  ss << this->TokenToStr( Operator, str.c_str() );
740  }
741 
742  ss << ".";
743  this->Error( ss << SS::c_str() );
744 
745  this->failed = true;
746 }
747 
749 {
750  return this->failed;
751 }
752 
753 void Lex::Error( const char * msg )
754 {
755  this->cm.warning( SRC_INFO, msg );
756 }
757 
758 void Lex::Recover( Token token )
759 {
760  if( !this->failed )
761  return;
762 
763  if( !this->in )
764  return;
765 
766  this->failed = false;
767 
768  if( token == Newline )
769  {
770  while( this->newlines==0 && this->token != Eof )
771  this->NextToken();
772  }
773  else if( token == Double )
774  {
775  while( this->token != Double && this->token != Integer && this->token != Eof )
776  this->NextToken();
777  }
778  else
779  {
780  while( this->token != token && this->token != Eof )
781  this->NextToken();
782  }
783 }
784 
785 void Lex::RecoverKeyword( const char * keyword )
786 {
787  if( !this->failed )
788  return;
789 
790  if( !this->in )
791  return;
792 
793  this->failed = false;
794 
795  while( !( this->token == Keyword && this->token_str == keyword ) && this->token != Eof )
796  this->NextToken();
797 }
798 
799 void Lex::RecoverOperator( const char * op )
800 {
801  if( !this->failed )
802  return;
803 
804  if( !this->in )
805  return;
806 
807  this->failed = false;
808 
809  while( !( this->token == Operator && this->token_str == op ) && this->token != Eof )
810  this->NextToken();
811 }
812 
813 }