Back to index

wims  3.65+svn20090927
Lexer.java
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2007-2008 Mihai Preda.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *      http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 package org.javia.arity;
00018 
00019 class Lexer {
00020     static final int
00021         ADD = 1, 
00022         SUB = 2, 
00023         MUL = 3, 
00024         DIV = 4, 
00025         MOD = 5,
00026         UMIN   = 6, 
00027         POWER  = 7, 
00028         FACT   = 8,
00029         NUMBER = 9, 
00030         CONST  = 10,
00031         CALL   = 11, 
00032         COMMA  = 12, 
00033         LPAREN = 13, 
00034         RPAREN = 14,
00035         END    = 15,
00036         SQRT   = 16;
00037         
00038     static final Token
00039         TOK_ADD    = new Token(ADD, 3, Token.LEFT, VM.ADD),
00040         TOK_SUB    = new Token(SUB, 3, Token.LEFT, VM.SUB),
00041 
00042         TOK_MUL    = new Token(MUL, 4, Token.LEFT, VM.MUL),
00043         TOK_DIV    = new Token(DIV, 4, Token.LEFT, VM.DIV),
00044         TOK_MOD    = new Token(MOD, 4, Token.LEFT, VM.MOD),
00045 
00046         TOK_UMIN   = new Token(UMIN, 5, Token.PREFIX, VM.UMIN),
00047 
00048         TOK_POWER  = new Token(POWER, 6, Token.RIGHT, VM.POWER),
00049         TOK_FACT   = new Token(FACT,  7, Token.SUFIX, VM.FACT),
00050         TOK_SQRT   = new Token(SQRT,  8, Token.PREFIX, VM.SQRT),
00051 
00052         TOK_LPAREN = new Token(LPAREN, 1, Token.PREFIX, 0),
00053         TOK_RPAREN = new Token(RPAREN, 2, 0, 0),
00054         TOK_COMMA  = new Token(COMMA,  1, 0, 0),
00055         TOK_END    = new Token(END,    0, 0, 0),
00056 
00057         TOK_NUMBER = new Token(NUMBER, 20, 0, 0),
00058         TOK_CONST  = new Token(CONST,  20, 0, 0);
00059 
00060     private static final String WHITESPACE = " \n\r\t";
00061     private static final char END_MARKER = '$';
00062     private char[] input = new char[32];
00063     private int pos;
00064     private SyntaxException exception;
00065 
00066     Lexer(SyntaxException exception) {
00067         this.exception = exception;
00068         init("");
00069     }
00070 
00071     void scan(String str, TokenConsumer consumer) throws SyntaxException {
00072         init(str);
00073         consumer.start();
00074         Token token;
00075         do {
00076             int savePos = pos;
00077             token = nextToken();
00078             token.position = savePos;
00079             consumer.push(token);
00080         } while (token != TOK_END);
00081     }
00082 
00083     void init(String str) {
00084         exception.expression = str;
00085         int len = str.length();
00086         if (input.length < len + 1) {
00087             input = new char[len+1];
00088         }
00089         str.getChars(0, len, input, 0);
00090         input[len] = END_MARKER;
00091         pos = 0;
00092     }
00093 
00094     Token nextToken() throws SyntaxException {
00095         while (WHITESPACE.indexOf(input[pos]) != -1) {
00096             ++pos;
00097         }
00098 
00099         char c = input[pos];
00100         int begin = pos++;
00101 
00102         switch (c) {
00103         case '!': return TOK_FACT;
00104         case END_MARKER: return TOK_END;
00105         case '%': return TOK_MOD;
00106         case '(': return TOK_LPAREN;
00107         case ')': return TOK_RPAREN;
00108         case '*': return TOK_MUL;
00109         case '+': return TOK_ADD;
00110         case ',': return TOK_COMMA;
00111         case '-': return TOK_SUB;
00112         case '/': return TOK_DIV;
00113         }
00114         if (c == '^') { 
00115             return TOK_POWER;
00116         }
00117 
00118         int p  = pos;
00119         if (('0' <= c && c <= '9') || c == '.') {
00120             while (('0' <= c && c <= '9') || c == '.' || c == 'E' || c == 'e') {
00121                 if ((c == 'E' || c == 'e') && input[p] == '-') { //accept '-' only after E
00122                     ++p; 
00123                 }
00124                 c = input[p++];
00125             } 
00126             pos = p-1;
00127             String nbStr = String.valueOf(input, begin, p-1-begin);
00128             try {
00129                 // parse single dot as 0
00130                 if (nbStr.equals(".")) {
00131                     return TOK_NUMBER.setValue(0);
00132                 } else {
00133                     double numberValue = Double.parseDouble(nbStr);
00134                     return TOK_NUMBER.setValue(numberValue);
00135                 }
00136             } catch (NumberFormatException e) {
00137                 throw exception.set("invalid number '" + nbStr + "'", begin);
00138             }
00139         } else if (('a' <= c && c <= 'z') ||
00140                    ('A' <= c && c <= 'Z')) {
00141             do {
00142                 c = input[p++];
00143             } while (('a' <= c && c <= 'z') ||
00144                      ('A' <= c && c <= 'Z') ||
00145                      ('0' <= c && c <= '9'));
00146             String nameValue = String.valueOf(input, begin, p-1-begin);
00147             while (WHITESPACE.indexOf(c) != -1) {
00148                 c = input[p++];
00149             }
00150             if (c == '(') {
00151                 pos = p;
00152                 return (new Token(CALL, 0, Token.PREFIX, 0)).setAlpha(nameValue);
00153             } else {
00154                 pos = p-1;                
00155                 return TOK_CONST.setAlpha(nameValue);
00156             }
00157         } else if ((c >= '\u0391' && c <= '\u03a9') || (c >= '\u03b1' && c <= '\u03c9')) {
00158             return TOK_CONST.setAlpha(""+c);
00159         } else { 
00160             switch (c) {
00161             case '^':
00162                 return TOK_POWER;
00163             case '\u00d7':
00164                 return TOK_MUL;
00165             case '\u00f7':
00166                 return TOK_DIV;
00167             case '\u2212':
00168                 return TOK_SUB;
00169             case '\u221a':
00170                 return TOK_SQRT;
00171             default:
00172                 throw exception.set("invalid character '" + c + "'", begin); 
00173             }
00174         }
00175     }
00176 }