Back to index

supertuxkart  0.5+dfsg1
lexer.cpp
Go to the documentation of this file.
00001 //  $Id: lexer.cpp 2111 2008-05-31 07:04:30Z cosmosninja $
00002 //
00003 //  TuxKart - a fun racing game with go-kart
00004 //  Copyright (C) 2004 Matthias Braun <matze@braunis.de>
00005 //  code in this file based on lispreader from Mark Probst
00006 //
00007 //  This program is free software; you can redistribute it and/or
00008 //  modify it under the terms of the GNU General Public License
00009 //  as published by the Free Software Foundation; either version 2
00010 //  of the License, or (at your option) any later version.
00011 //
00012 //  This program is distributed in the hope that it will be useful,
00013 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 //  GNU General Public License for more details.
00016 //
00017 //  You should have received a copy of the GNU General Public License
00018 //  along with this program; if not, write to the Free Software
00019 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00020 #include <sstream>
00021 #include <stdexcept>
00022 #include <cstring>
00023 
00024 #include "lexer.hpp"
00025 #include "translation.hpp"
00026 #if defined(WIN32) && !defined(__CYGWIN__)
00027 #  define snprintf _snprintf
00028 #endif
00029 namespace lisp
00030 {
00031 
00032     class EOFException
00033         {}
00034     ;
00035 
00036     Lexer::Lexer(std::istream& newstream)
00037             : m_stream(newstream), m_is_eof(false)
00038     {
00039         try
00040         {
00041             // trigger a refill of the m_buffer
00042             m_c = 0;
00043             m_buffer_end = m_c + 1;
00044             nextChar();
00045         }
00046         catch(EOFException& e)
00047         {(void)e;  // avoid 'unreferenced local variable' warning
00048         }
00049     }
00050 
00051 //-----------------------------------------------------------------------------
00052 
00053     Lexer::~Lexer()
00054     {}
00055 
00056 //-----------------------------------------------------------------------------
00057 
00058     void
00059     Lexer::nextChar()
00060     {
00061         ++m_c;
00062         if(m_c >= m_buffer_end)
00063         {
00064             if(m_is_eof)
00065                 throw EOFException();
00066             m_stream.read(m_buffer, LEXER_BUFFER_SIZE);
00067             std::streamsize n = m_stream.gcount();
00068 
00069             m_c = m_buffer;
00070             m_buffer_end = m_buffer + n;
00071 
00072             // the following is a hack that appends an additional ' ' at the end of
00073             // the file to avoid problems when parsing symbols/elements and a sudden
00074             // EOF. This is faster than relying on unget and IMO also nicer.
00075             if(n < LEXER_BUFFER_SIZE || n == 0)
00076             {
00077                 *m_buffer_end = ' ';
00078                 ++m_buffer_end;
00079                 m_is_eof = true;
00080             }
00081         }
00082     }
00083 
00084 //-----------------------------------------------------------------------------
00085 
00086     Lexer::TokenType
00087     Lexer::getNextToken()
00088     {
00089         static const char* delims = "\"();";
00090 
00091         try
00092         {
00093             while(isspace(*m_c))
00094             {
00095                 nextChar();
00096                 if(*m_c == '\n')
00097                     ++m_line_number;
00098             };
00099 
00100             m_token_length = 0;
00101 
00102             switch(*m_c)
00103             {
00104             case ';': // comment
00105                 while(true)
00106                 {
00107                     nextChar();
00108                     if(*m_c == '\n')
00109                     {
00110                         ++m_line_number;
00111                         break;
00112                     }
00113                 }
00114                 return getNextToken(); // and again
00115             case '(':
00116                 nextChar();
00117                 return TOKEN_OPEN_PAREN;
00118             case ')':
00119                 nextChar();
00120                 return TOKEN_CLOSE_PAREN;
00121             case '"': // string
00122                 {
00123                     const int STARTLINE = m_line_number;
00124                     try
00125                     {
00126                         while(1)
00127                         {
00128                             nextChar();
00129                             if(*m_c == '"')
00130                                 break;
00131 
00132                             if(*m_c == '\\')
00133                             {
00134                                 nextChar();
00135                                 switch(*m_c)
00136                                 {
00137                                 case 'n':
00138                                     *m_c = '\n';
00139                                     break;
00140                                 case 't':
00141                                     *m_c = '\t';
00142                                     break;
00143                                 }
00144                             }
00145                             if(m_token_length < MAX_TOKEN_LENGTH)
00146                                 m_token_string[m_token_length++] = *m_c;
00147                         }
00148                         m_token_string[m_token_length] = 0;
00149                     }
00150                     catch(EOFException& )
00151                     {
00152                         char msg[MAX_ERROR_MESSAGE_LENGTH];
00153                         snprintf(msg, sizeof(msg),
00154                                  "Parse error in line %d: EOF while parsing string.",
00155                                  STARTLINE);
00156                         throw std::runtime_error(msg);
00157                     }
00158                     nextChar();
00159                     return TOKEN_STRING;
00160                 }
00161             case '#': // constant
00162                 try
00163                 {
00164                     nextChar();
00165 
00166                     while(isalnum(*m_c) || *m_c == '_')
00167                     {
00168                         if(m_token_length < MAX_TOKEN_LENGTH)
00169                             m_token_string[m_token_length++] = *m_c;
00170                         nextChar();
00171                     }
00172                     m_token_string[m_token_length] = 0;
00173                 }
00174                 catch(EOFException& )
00175                 {
00176                     char msg[MAX_ERROR_MESSAGE_LENGTH];
00177                     snprintf(msg, sizeof(msg), 
00178                              "Parse Error in line %d: EOF while parsing constant.",
00179                              m_line_number);
00180                     throw std::runtime_error(msg);
00181                 }
00182 
00183                 if(strcmp(m_token_string, "t") == 0)
00184                     return TOKEN_TRUE;
00185                 if(strcmp(m_token_string, "f") == 0)
00186                     return TOKEN_FALSE;
00187 
00188                 // this would be the place to add more sophisticated handling of
00189                 // constants
00190 
00191                 {
00192                     char msg[MAX_ERROR_MESSAGE_LENGTH];
00193                     snprintf(msg, sizeof(msg), 
00194                              "Parse Error in line %d: Unknown constant '%s'.",
00195                              m_line_number, m_token_string);
00196                     throw std::runtime_error(msg);
00197                 }
00198 
00199             case '_': // can be begin translation
00200                 try
00201               {
00202                   nextChar();
00203                   if(*m_c == '(')
00204                   {
00205                     nextChar();
00206                     return TOKEN_TRANSLATION;
00207                   }
00208                   m_token_string[m_token_length++] = '_';
00209                   // Fall through to symbol handling
00210               }  
00211                 catch(EOFException& )
00212                 {
00213               }
00214             default:
00215                 if(isdigit(*m_c) || *m_c == '-')
00216                 {
00217                     bool have_nondigits = false;
00218                     bool have_digits = false;
00219                     int have_floating_point = 0;
00220 
00221                     do
00222                     {
00223                         if(isdigit(*m_c))
00224                             have_digits = true;
00225                         else if(*m_c == '.')
00226                             ++have_floating_point;
00227                         else if(isalnum(*m_c) || *m_c == '_')
00228                             have_nondigits = true;
00229 
00230                         if(m_token_length < MAX_TOKEN_LENGTH)
00231                             m_token_string[m_token_length++] = *m_c;
00232 
00233                         nextChar();
00234                     }
00235                     while(!isspace(*m_c) && !strchr(delims, *m_c));
00236 
00237                     m_token_string[m_token_length] = 0;
00238 
00239                     // no nextChar
00240 
00241                     if(have_nondigits || !have_digits || have_floating_point > 1)
00242                         return TOKEN_SYMBOL;
00243                     else if(have_floating_point == 1)
00244                         return TOKEN_REAL;
00245                     else
00246                         return TOKEN_INTEGER;
00247                 }
00248                 else
00249                 {
00250                     do
00251                     {
00252                         if(m_token_length < MAX_TOKEN_LENGTH)
00253                             m_token_string[m_token_length++] = *m_c;
00254                         nextChar();
00255                     }
00256                     while(!isspace(*m_c) && !strchr(delims, *m_c));
00257                     m_token_string[m_token_length] = 0;
00258 
00259                     // no nextChar
00260 
00261                     return TOKEN_SYMBOL;
00262                 }
00263             }
00264         }
00265         catch(EOFException& )
00266         {
00267             return TOKEN_EOF;
00268         }
00269     }
00270 
00271 } // end of namespace lisp
00272