kspread

formula.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 2003,2004 Ariya Hidayat <ariya@kde.org>
00003    Copyright (C) 2005 Tomas Mecir <mecirt@gmail.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "formula.h"
00022 
00023 #include "kspread_cell.h"
00024 #include "kspread_sheet.h"
00025 #include "kspread_doc.h"
00026 #include "kspread_util.h"
00027 #include "kspread_value.h"
00028 
00029 #include "valuecalc.h"
00030 #include "valueconverter.h"
00031 #include "valueparser.h"
00032 
00033 #include "functions.h"
00034 
00035 #include <limits.h>
00036 
00037 #include <qregexp.h>
00038 #include <qstring.h>
00039 #include <qvaluevector.h>
00040 #include <qvaluestack.h>
00041 
00042 #include <klocale.h>
00043 
00044 /*
00045   To understand how this formula engine works, please refer to the documentation
00046   in file DESIGN.html.
00047 
00048   Useful references:
00049    - "Principles of Compiler Design", A.V.Aho, J.D.Ullman, Addison Wesley, 1978
00050    - "Writing Interactive Compilers and Interpreters", P.J. Brown,
00051      John Wiley and Sons, 1979.
00052    - "The Theory and Practice of Compiler Writing", J.Tremblay, P.G.Sorenson,
00053      McGraw-Hill, 1985.
00054    - "The Java(TM) Virtual Machine Specification", T.Lindholm, F.Yellin,
00055      Addison-Wesley, 1997.
00056    - "Java Virtual Machine", J.Meyer, T.Downing, O'Reilly, 1997.
00057 
00058  */
00059 
00060 
00061  /*
00062 TODO - features:
00063  - handle Intersection
00064  - cell reference is made relative (absolute now)
00065  - shared formula (different owner, same data)
00066  - relative internal representation (independent of owner)
00067  - OASIS support
00068 TODO - optimizations:
00069  - handle initial formula marker = (and +)
00070  - reuse constant already in the pool
00071  - reuse references already in the pool
00072  - expression optimization (e.g. 1+2+A1 becomes 3+A1)
00073  */
00074 
00075 namespace KSpread
00076 {
00077 
00078 class Opcode
00079 {
00080 public:
00081 
00082   enum { Nop = 0, Load, Ref, Cell, Range, Function, Add, Sub, Neg, Mul, Div,
00083     Pow, Concat, Not, Equal, Less, Greater };
00084 
00085   unsigned type;
00086   unsigned index;
00087 
00088   Opcode(): type(Nop), index(0) {};
00089   Opcode( unsigned t ): type(t), index(0) {};
00090   Opcode( unsigned t, unsigned i ): type(t), index(i) {};
00091 };
00092 
00093 class Formula::Private
00094 {
00095 public:
00096   Formula *formula;
00097   Cell *cell;
00098   Sheet *sheet;
00099   bool dirty;
00100   bool valid;
00101   QString expression;
00102   QValueVector<Opcode> codes;
00103   QValueVector<Value> constants;
00104 };
00105 
00106 class TokenStack : public QValueVector<Token>
00107 {
00108 public:
00109   TokenStack();
00110   bool isEmpty() const;
00111   unsigned itemCount() const;
00112   void push( const Token& token );
00113   Token pop();
00114   const Token& top();
00115   const Token& top( unsigned index );
00116 private:
00117   void ensureSpace();
00118   unsigned topIndex;
00119 };
00120 
00121 }
00122 
00123 using namespace KSpread;
00124 
00125 // for null token
00126 const Token Token::null;
00127 
00128 // helper function: return operator of given token text
00129 // e.g. "*" yields Operator::Asterisk, and so on
00130 Token::Op KSpread::matchOperator( const QString& text )
00131 {
00132   Token::Op result = Token::InvalidOp;
00133 
00134   if( text.length() == 1 )
00135   {
00136     QChar p = text[0];
00137     switch( p.unicode() )
00138     {
00139         case '+': result = Token::Plus; break;
00140         case '-': result = Token::Minus; break;
00141         case '*': result = Token::Asterisk; break;
00142         case '/': result = Token::Slash; break;
00143         case '^': result = Token::Caret; break;
00144         case ',': result = Token::Comma; break;
00145         case ';': result = Token::Semicolon; break;
00146         case '(': result = Token::LeftPar; break;
00147         case ')': result = Token::RightPar; break;
00148         case '&': result = Token::Ampersand; break;
00149         case '=': result = Token::Equal; break;
00150         case '<': result = Token::Less; break;
00151         case '>': result = Token::Greater; break;
00152         case '%': result = Token::Percent; break;
00153         default : result = Token::InvalidOp; break;
00154     }
00155   }
00156 
00157   if( text.length() == 2 )
00158   {
00159     if( text == "<>" ) result = Token::NotEqual;
00160     if( text == "<=" ) result = Token::LessEqual;
00161     if( text == ">=" ) result = Token::GreaterEqual;
00162     if( text == "==" ) result = Token::Equal;
00163   }
00164 
00165   return result;
00166 }
00167 
00168 // helper function: give operator precedence
00169 // e.g. "+" is 1 while "*" is 3
00170 static int opPrecedence( Token::Op op )
00171 {
00172   int prec = -1;
00173   switch( op )
00174   {
00175     case Token::Percent      : prec = 8; break;
00176     case Token::Caret        : prec = 7; break;
00177     case Token::Asterisk     : prec = 5; break;
00178     case Token::Slash        : prec = 6; break;
00179     case Token::Plus         : prec = 3; break;
00180     case Token::Minus        : prec = 3; break;
00181     case Token::Ampersand    : prec = 2; break;
00182     case Token::Equal        : prec = 1; break;
00183     case Token::NotEqual     : prec = 1; break;
00184     case Token::Less         : prec = 1; break;
00185     case Token::Greater      : prec = 1; break;
00186     case Token::LessEqual    : prec = 1; break;
00187     case Token::GreaterEqual : prec = 1; break;
00188     case Token::Semicolon    : prec = 0; break;
00189     case Token::RightPar     : prec = 0; break;
00190     case Token::LeftPar      : prec = -1; break;
00191     default: prec = -1; break;
00192   }
00193   return prec;
00194 }
00195 
00196 // helper function
00197 static Value tokenAsValue( const Token& token )
00198 {
00199   Value value;
00200   if( token.isBoolean() ) value = Value( token.asBoolean() );
00201   else if( token.isInteger() ) value = Value( token.asInteger() );
00202   else if( token.isFloat() ) value = Value( token.asFloat() );
00203   else if( token.isString() ) value = Value( token.asString() );
00204   return value;
00205 }
00206 
00207 /**********************
00208     Token
00209  **********************/
00210 
00211 // creates a token
00212 Token::Token( Type type, const QString& text, int pos )
00213 {
00214   m_type = type;
00215   m_text = text;
00216   m_pos = pos;
00217 }
00218 
00219 // copy constructor
00220 Token::Token( const Token& token )
00221 {
00222   m_type = token.m_type;
00223   m_text = token.m_text;
00224   m_pos = token.m_pos;
00225 }
00226 
00227 // assignment operator
00228 Token& Token::operator=( const Token& token )
00229 {
00230   m_type = token.m_type;
00231   m_text = token.m_text;
00232   m_pos = token.m_pos;
00233   return *this;
00234 }
00235 
00236 bool Token::asBoolean() const
00237 {
00238   if( !isBoolean() ) return false;
00239   return m_text.lower() == "true";
00240   // FIXME check also for i18n version
00241 }
00242 
00243 int Token::asInteger() const
00244 {
00245   if( isInteger() ) return m_text.toInt();
00246   else return 0;
00247 }
00248 
00249 double Token::asFloat() const
00250 {
00251   if( isFloat() ) return m_text.toDouble();
00252   else return 0.0;
00253 }
00254 
00255 QString Token::asString() const
00256 {
00257   if( isString() ) return m_text.mid( 1, m_text.length()-2 );
00258   else return QString::null;
00259 }
00260 
00261 Token::Op Token::asOperator() const
00262 {
00263   if( isOperator() ) return matchOperator( m_text );
00264   else return InvalidOp;
00265 }
00266 
00267 QString Token::sheetName() const
00268 {
00269   if( !isCell() && !isRange() ) return QString::null;
00270   int i = m_text.find( '!' );
00271   if( i < 0 ) return QString();
00272   QString sheet = m_text.left( i );
00273   if( sheet[0] == QChar(39) )
00274     sheet = sheet.mid( 1, sheet.length()-2 );
00275   return sheet;
00276 }
00277 
00278 QString Token::description() const
00279 {
00280   QString desc;
00281 
00282   switch (m_type )
00283   {
00284     case  Boolean:    desc = "Boolean"; break;
00285     case  Integer:    desc = "Integer"; break;
00286     case  Float:      desc = "Float"; break;
00287     case  String:     desc = "String"; break;
00288     case  Identifier: desc = "Identifier"; break;
00289     case  Cell:       desc = "Cell"; break;
00290     case  Range:      desc = "Range"; break;
00291     case  Operator:   desc = "Operator"; break;
00292     default:          desc = "Unknown"; break;
00293   }
00294 
00295   while( desc.length() < 10 ) desc.prepend( ' ' );
00296   desc.prepend( "  " );
00297   desc.prepend( QString::number( m_pos ) );
00298   desc.append( " : " ).append( m_text );
00299 
00300   return desc;
00301 }
00302 
00303 
00304 /**********************
00305     TokenStack
00306  **********************/
00307 
00308 TokenStack::TokenStack(): QValueVector<Token>()
00309 {
00310   topIndex = 0;
00311   ensureSpace();
00312 }
00313 
00314 bool TokenStack::isEmpty() const
00315 {
00316   return topIndex == 0;
00317 }
00318 
00319 unsigned TokenStack::itemCount() const
00320 {
00321   return topIndex;
00322 }
00323 
00324 void TokenStack::push( const Token& token )
00325 {
00326   ensureSpace();
00327   at( topIndex++ ) = token;
00328 }
00329 
00330 Token TokenStack::pop()
00331 {
00332   return (topIndex > 0 ) ? Token( at( --topIndex ) ) : Token();
00333 }
00334 
00335 const Token& TokenStack::top()
00336 {
00337   return top( 0 );
00338 }
00339 
00340 const Token& TokenStack::top( unsigned index )
00341 {
00342   if( topIndex > index )
00343     return at( topIndex-index-1 );
00344   return Token::null;
00345 }
00346 
00347 void TokenStack::ensureSpace()
00348 {
00349   while( topIndex >= size() )
00350     resize( size() + 10 );
00351 }
00352 
00353 /**********************
00354     FormulaPrivate
00355  **********************/
00356 
00357 // helper function: return true for valid identifier character
00358 bool KSpread::isIdentifier( QChar ch )
00359 {
00360   return ( ch.unicode() == '_' ) || (ch.unicode() == '$' ) || ( ch.isLetter() );
00361 }
00362 
00363 
00364 
00365 
00366 /**********************
00367     Formula
00368  **********************/
00369 
00370 // Constructor
00371 
00372 Formula::Formula (Sheet *sheet, Cell *cell)
00373 {
00374   d = new Private;
00375   d->cell = cell;
00376   d->sheet = sheet;
00377   clear();
00378 }
00379 
00380 Formula::Formula()
00381 {
00382   d = new Private;
00383   d->cell = 0;
00384   d->sheet = 0;
00385   clear();
00386 }
00387 
00388 // Destructor
00389 
00390 Formula::~Formula()
00391 {
00392   delete d;
00393 }
00394 
00395 Cell* Formula::cell() const
00396 {
00397   return d->cell;
00398 }
00399 
00400 Sheet* Formula::sheet() const
00401 {
00402   return d->sheet;
00403 }
00404 
00405 // Sets a new expression for this formula.
00406 // note that both the real lex and parse processes will happen later on
00407 // when needed (i.e. "lazy parse"), for example during formula evaluation.
00408 
00409 void Formula::setExpression( const QString& expr )
00410 {
00411   d->expression = expr;
00412   d->dirty = true;
00413   d->valid = false;
00414 }
00415 
00416 // Returns the expression associated with this formula.
00417 
00418 QString Formula::expression() const
00419 {
00420   return d->expression;
00421 }
00422 
00423 // Returns the validity of the formula.
00424 // note: empty formula is always invalid.
00425 
00426 bool Formula::isValid() const
00427 {
00428   if( d->dirty )
00429   {
00430     KLocale* locale = d->cell ? d->cell->locale() : 0;
00431     if ((!locale) && d->sheet)
00432       locale = d->sheet->doc()->locale();
00433     Tokens tokens = scan( d->expression, locale );
00434     if( tokens.valid() )
00435       compile( tokens );
00436     else
00437       d->valid = false;
00438   }
00439   return d->valid;
00440 }
00441 
00442 // Clears everything, also mark the formula as invalid.
00443 
00444 void Formula::clear()
00445 {
00446   d->expression = QString::null;
00447   d->dirty = true;
00448   d->valid = false;
00449   d->constants.clear();
00450   d->codes.clear();
00451 }
00452 
00453 // Returns list of token for the expression.
00454 // this triggers again the lexical analysis step. it is however preferable
00455 // (even when there's small performance penalty) because otherwise we need to
00456 // store parsed tokens all the time which serves no good purpose.
00457 
00458 Tokens Formula::tokens() const
00459 {
00460   KLocale* locale = d->cell ? d->cell->locale() : 0;
00461   if ((!locale) && d->sheet)
00462     locale = d->sheet->doc()->locale();
00463   return scan( d->expression, locale );
00464 }
00465 
00466 Tokens Formula::scan( const QString& expr, KLocale* locale ) const
00467 {
00468   // to hold the result
00469   Tokens tokens;
00470 
00471   // parsing state
00472   enum { Start, Finish, Bad, InNumber, InDecimal, InExpIndicator, InExponent,
00473     InString, InIdentifier, InCell, InRange, InSheetName } state;
00474 
00475   // use locale settings if specified
00476   QString thousand = locale ? locale->thousandsSeparator() : "";
00477   QString decimal = locale ? locale->decimalSymbol() : ".";
00478 
00479   // initialize variables
00480   state = Start;
00481   unsigned int i = 0;
00482   QString ex = expr;
00483   QString tokenText;
00484   int tokenStart = 0;
00485 
00486   // first character must be equal sign (=)
00487   if( ex[0] != '=' )
00488     return tokens;
00489 
00490   // but the scanner should not see this equal sign
00491   ex.remove( 0, 1 );
00492 
00493   // force a terminator
00494   ex.append( QChar() );
00495 
00496   // main loop
00497   while( (state != Bad) && (state != Finish) && (i < ex.length()) )
00498   {
00499     QChar ch = ex[i];
00500 
00501     switch( state )
00502     {
00503 
00504     case Start:
00505 
00506        tokenStart = i;
00507 
00508        // skip any whitespaces
00509        if( ch.isSpace() ) i++;
00510 
00511        // check for number
00512        else if( ch.isDigit() )
00513        {
00514          state = InNumber;
00515        }
00516 
00517        // a string ?
00518        else if ( ch == '"' )
00519        {
00520          tokenText.append( ex[i++] );
00521          state = InString;
00522        }
00523 
00524        // beginning with alphanumeric ?
00525        // could be identifier, cell, range, or function...
00526        else if( isIdentifier( ch ) )
00527        {
00528          state = InIdentifier;
00529        }
00530 
00531        // aposthrophe (') marks sheet name for 3-d cell, e.g 'Sales Q3'!A4
00532        else if ( ch.unicode() == 39 )
00533        {
00534          i++;
00535          state = InSheetName;
00536          tokenText.append( QChar( 39 ) );
00537        }
00538 
00539        // decimal dot ?
00540        else if ( ch == decimal )
00541        {
00542          tokenText.append( ex[i++] );
00543          state = InDecimal;
00544        }
00545 
00546        // terminator character
00547        else if ( ch == QChar::null )
00548           state = Finish;
00549 
00550        // look for operator match
00551        else
00552        {
00553          int op;
00554          QString s;
00555 
00556          // check for two-chars operator, such as '<=', '>=', etc
00557          s.append( ch ).append( ex[i+1] );
00558          op = matchOperator( s );
00559 
00560          // check for one-char operator, such as '+', ';', etc
00561          if( op == Token::InvalidOp )
00562          {
00563            s = QString( ch );
00564            op = matchOperator( s );
00565          }
00566 
00567          // any matched operator ?
00568          if( op != Token::InvalidOp )
00569          {
00570            int len = s.length();
00571            i += len;
00572            tokens.append( Token( Token::Operator, s.left( len ), tokenStart ) );
00573          }
00574          else state = Bad;
00575         }
00576        break;
00577 
00578     case InIdentifier:
00579 
00580        // consume as long as alpha, dollar sign, underscore, or digit
00581        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00582 
00583        // a '!' ? then this must be sheet name, e.g "Sheet4!"
00584        else if( ch == '!' )
00585        {
00586           tokenText.append( ex[i++] );
00587           state = InCell;
00588        }
00589 
00590        // we're done with identifier
00591        else
00592        {
00593          // check for cell reference,  e.g A1, VV123, ...
00594          QRegExp exp("(\\$?)([a-zA-Z]+)(\\$?)([0-9]+)$");
00595          int n = exp.search( tokenText );
00596          if( n >= 0 )
00597            state = InCell;
00598          else
00599          {
00600            bool gotNamed = false;
00601            // check for named areas ...
00602            if (d->sheet) {
00603              const QValueList<Reference> areas = d->sheet->doc()->listArea();
00604              QValueList<Reference>::const_iterator it;
00605              for (it = areas.begin(); it != areas.end(); ++it) {
00606                if ((*it).ref_name.lower() == tokenText.lower()) {
00607                  // we got a named area
00608                  tokens.append (Token (Token::Range, tokenText, tokenStart));
00609                  gotNamed = true;
00610                  break;
00611                 }
00612               }
00613            }
00614            if (!gotNamed)
00615              tokens.append (Token (Token::Identifier, tokenText,
00616                tokenStart));
00617            tokenStart = i;
00618            tokenText = "";
00619            state = Start;
00620          }
00621        }
00622        break;
00623 
00624     case InCell:
00625 
00626        // consume as long as alpha, dollar sign, underscore, or digit
00627        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00628 
00629        // we're done with cell ref, possibly with sheet name (like "Sheet2!B2")
00630        // note that "Sheet2!TotalSales" is also possible, in which "TotalSales" is a named area
00631        else
00632        {
00633 
00634          // check if it's a cell ref like A32, not named area
00635          QString cell;
00636          for( int j = tokenText.length()-1; j>=0; j-- )
00637            if( tokenText[j] == '!' )
00638                break;
00639            else
00640                cell.prepend( tokenText[j] );
00641          QRegExp exp("(\\$?)([a-zA-Z]+)(\\$?)([0-9]+)$");
00642          if( exp.search( cell ) != 0 )
00643          {
00644 
00645            // we're done with named area
00646            // (Tomas) huh? this doesn't seem to check for named areas ...
00647            tokens.append( Token( Token::Range, tokenText, tokenStart ) );
00648            tokenText = "";
00649            state = Start;
00650          }
00651 
00652          else
00653          {
00654 
00655            // so up to now we've got something like A2 or Sheet2!F4
00656            // check for range reference
00657            if( ch == ':' )
00658            {
00659              tokenText.append( ex[i++] );
00660              state = InRange;
00661            }
00662            else
00663            {
00664              // we're done with cell reference
00665              tokens.append( Token( Token::Cell, tokenText, tokenStart ) );
00666              tokenText = "";
00667              state = Start;
00668            }
00669          }
00670        }
00671        break;
00672 
00673     case InRange:
00674 
00675        // consume as long as alpha, dollar sign, underscore, or digit
00676        if( isIdentifier( ch )  || ch.isDigit() ) tokenText.append( ex[i++] );
00677 
00678        // we're done with range reference
00679        else
00680        {
00681          tokens.append( Token( Token::Range, tokenText, tokenStart ) );
00682          tokenText = "";
00683          state = Start;
00684        }
00685        break;
00686 
00687     case InSheetName:
00688 
00689        // consume until '
00690        if( ch.unicode() != 39 ) tokenText.append( ex[i++] );
00691 
00692        else
00693        {
00694          // must be followed by '!', otherwise we have a string in ''
00695          i++;
00696          if( ex[i] == '!' )
00697          {
00698            tokenText.append( ex[i++] );
00699            state = InCell;
00700          }
00701          else {
00702        // this is the same as the check in InIdentifier ... TODO merge
00703            bool gotNamed = false;
00704            // check for named areas ...
00705            if (d->sheet) {
00706          QString txt = tokenText.mid(1, tokenText.length() - 2).lower();
00707              const QValueList<Reference> areas = d->sheet->doc()->listArea();
00708              QValueList<Reference>::const_iterator it;
00709              for (it = areas.begin(); it != areas.end(); ++it) {
00710                if ((*it).ref_name.lower() == txt) {
00711                  // we got a named area
00712                  tokens.append (Token (Token::Range, tokenText, tokenStart));
00713                  gotNamed = true;
00714                  break;
00715                 }
00716               }
00717            }
00718            if (!gotNamed)
00719              tokens.append (Token (Token::Identifier, tokenText,
00720                tokenStart));
00721            tokenStart = i;
00722            tokenText = "";
00723            state = Start;
00724      }
00725        }
00726        break;
00727 
00728     case InNumber:
00729 
00730        // consume as long as it's digit
00731        if( ch.isDigit() ) tokenText.append( ex[i++] );
00732 
00733        // skip thousand separator
00734        else if( !thousand.isEmpty() && ( ch ==thousand[0] ) ) i++;
00735 
00736        // convert decimal separator to '.', also support '.' directly
00737        // we always support '.' because of bug #98455
00738        else if(( !decimal.isEmpty() && ( ch == decimal[0] ) ) || (ch == '.'))
00739        {
00740          tokenText.append( '.' );
00741          i++;
00742          state = InDecimal;
00743        }
00744 
00745        // exponent ?
00746        else if( ch.upper() == 'E' )
00747        {
00748          tokenText.append( 'E' );
00749          i++;
00750          state = InExpIndicator;
00751        }
00752 
00753        // we're done with integer number
00754        else
00755        {
00756          tokens.append( Token( Token::Integer, tokenText, tokenStart ) );
00757          tokenText = "";
00758          state = Start;
00759        };
00760        break;
00761 
00762     case InDecimal:
00763 
00764        // consume as long as it's digit
00765        if( ch.isDigit() ) tokenText.append( ex[i++] );
00766 
00767        // exponent ?
00768        else if( ch.upper() == 'E' )
00769        {
00770          tokenText.append( 'E' );
00771          i++;
00772          state = InExpIndicator;
00773        }
00774 
00775        // we're done with floating-point number
00776        else
00777        {
00778          tokens.append( Token( Token::Float, tokenText, tokenStart ) );
00779          tokenText = "";
00780          state = Start;
00781        };
00782        break;
00783 
00784     case InExpIndicator:
00785 
00786        // possible + or - right after E, e.g 1.23E+12 or 4.67E-8
00787        if( ( ch == '+' ) || ( ch == '-' ) ) tokenText.append( ex[i++] );
00788 
00789        // consume as long as it's digit
00790        else if( ch.isDigit() ) state = InExponent;
00791 
00792        // invalid thing here
00793        else state = Bad;
00794 
00795        break;
00796 
00797     case InExponent:
00798 
00799        // consume as long as it's digit
00800        if( ch.isDigit() ) tokenText.append( ex[i++] );
00801 
00802        // we're done with floating-point number
00803        else
00804        {
00805          tokens.append( Token( Token::Float, tokenText, tokenStart ) );
00806          tokenText = "";
00807          state = Start;
00808        };
00809        break;
00810 
00811     case InString:
00812 
00813        // consume until "
00814        if( ch != '"' ) tokenText.append( ex[i++] );
00815 
00816        else
00817        {
00818          tokenText.append( ch ); i++;
00819          tokens.append( Token( Token::String, tokenText, tokenStart ) );
00820          tokenText = "";
00821          state = Start;
00822        }
00823        break;
00824 
00825     case Bad:
00826     default:
00827        break;
00828     };
00829   };
00830 
00831   if( state == Bad )
00832     tokens.setValid( false );
00833 
00834   return tokens;
00835 }
00836 
00837 // will affect: dirty, valid, codes, constants
00838 void Formula::compile( const Tokens& tokens ) const
00839 {
00840   // initialize variables
00841   d->dirty = false;
00842   d->valid = false;
00843   d->codes.clear();
00844   d->constants.clear();
00845 
00846   // sanity check
00847   if( tokens.count() == 0 ) return;
00848 
00849   TokenStack syntaxStack;
00850   QValueStack<int> argStack;
00851   unsigned argCount = 1;
00852 
00853   for( unsigned i = 0; i <= tokens.count(); i++ )
00854   {
00855     // helper token: InvalidOp is end-of-formula
00856     Token token =  ( i < tokens.count() ) ? tokens[i] : Token( Token::Operator );
00857     Token::Type tokenType = token.type();
00858 
00859     // unknown token is invalid
00860     if( tokenType == Token::Unknown ) break;
00861 
00862     // for constants, push immediately to stack
00863     // generate code to load from a constant
00864     if ( ( tokenType == Token::Integer ) || ( tokenType == Token::Float ) ||
00865     ( tokenType == Token::String ) || ( tokenType == Token::Boolean ) )
00866     {
00867       syntaxStack.push( token );
00868       d->constants.append( tokenAsValue( token ) );
00869       d->codes.append( Opcode( Opcode::Load, d->constants.count()-1 ) );
00870     }
00871 
00872     // for cell, range, or identifier, push immediately to stack
00873     // generate code to load from reference
00874     if( ( tokenType == Token::Cell ) || ( tokenType == Token::Range ) ||
00875     ( tokenType == Token::Identifier ) )
00876     {
00877       syntaxStack.push( token );
00878       d->constants.append( Value( token.text() ) );
00879       if (tokenType == Token::Cell)
00880         d->codes.append( Opcode( Opcode::Cell, d->constants.count()-1 ) );
00881       else if (tokenType == Token::Range)
00882         d->codes.append( Opcode( Opcode::Range, d->constants.count()-1 ) );
00883       else
00884         d->codes.append( Opcode( Opcode::Ref, d->constants.count()-1 ) );
00885     }
00886 
00887     // are we entering a function ?
00888     // if token is operator, and stack already has: id ( arg
00889     if( tokenType == Token::Operator )
00890     if( syntaxStack.itemCount() >= 3 )
00891     {
00892         Token arg = syntaxStack.top();
00893         Token par = syntaxStack.top( 1 );
00894         Token id = syntaxStack.top( 2 );
00895         if( !arg.isOperator() )
00896         if( par.asOperator() == Token::LeftPar )
00897         if( id.isIdentifier() )
00898         {
00899           argStack.push( argCount );
00900           argCount = 1;
00901         }
00902      }
00903 
00904      // special case for percentage
00905     if( tokenType == Token::Operator )
00906     if( token.asOperator() == Token::Percent )
00907     if( syntaxStack.itemCount() >= 1 )
00908     if( !syntaxStack.top().isOperator() )
00909     {
00910       d->constants.append( Value( 0.01 ) );
00911       d->codes.append( Opcode( Opcode::Load, d->constants.count()-1 ) );
00912       d->codes.append( Opcode( Opcode::Mul ) );
00913     }
00914 
00915     // for any other operator, try to apply all parsing rules
00916     if( tokenType == Token::Operator )
00917     if( token.asOperator() != Token::Percent )
00918     {
00919       // repeat until no more rule applies
00920       for( ; ; )
00921       {
00922         bool ruleFound = false;
00923 
00924         // rule for function arguments, if token is ; or )
00925         // id ( arg1 ; arg2 -> id ( arg
00926         if( !ruleFound )
00927         if( syntaxStack.itemCount() >= 5 )
00928         if( ( token.asOperator() == Token::RightPar ) ||
00929         ( token.asOperator() == Token::Semicolon ) )
00930         {
00931           Token arg2 = syntaxStack.top();
00932           Token sep = syntaxStack.top( 1 );
00933           Token arg1 = syntaxStack.top( 2 );
00934           Token par = syntaxStack.top( 3 );
00935           Token id = syntaxStack.top( 4 );
00936           if( !arg2.isOperator() )
00937           if( sep.asOperator() == Token::Semicolon )
00938           if( !arg1.isOperator() )
00939           if( par.asOperator() == Token::LeftPar )
00940           if( id.isIdentifier() )
00941           {
00942             ruleFound = true;
00943             syntaxStack.pop();
00944             syntaxStack.pop();
00945             argCount++;
00946           }
00947         }
00948 
00949         // rule for function last argument:
00950         //  id ( arg ) -> arg
00951         if( !ruleFound )
00952         if( syntaxStack.itemCount() >= 4 )
00953         {
00954           Token par2 = syntaxStack.top();
00955           Token arg = syntaxStack.top( 1 );
00956           Token par1 = syntaxStack.top( 2 );
00957           Token id = syntaxStack.top( 3 );
00958           if( par2.asOperator() == Token::RightPar )
00959           if( !arg.isOperator() )
00960           if( par1.asOperator() == Token::LeftPar )
00961           if( id.isIdentifier() )
00962           {
00963             ruleFound = true;
00964             syntaxStack.pop();
00965             syntaxStack.pop();
00966             syntaxStack.pop();
00967             syntaxStack.pop();
00968             syntaxStack.push( arg );
00969             d->codes.append( Opcode( Opcode::Function, argCount ) );
00970             argCount = argStack.empty() ? 0 : argStack.pop();
00971           }
00972         }
00973 
00974         // rule for function call with parentheses, but without argument
00975         // e.g. "2*PI()"
00976         if( !ruleFound )
00977         if( syntaxStack.itemCount() >= 3 )
00978         {
00979           Token par2 = syntaxStack.top();
00980           Token par1 = syntaxStack.top( 1 );
00981           Token id = syntaxStack.top( 2 );
00982           if( par2.asOperator() == Token::RightPar )
00983           if( par1.asOperator() == Token::LeftPar )
00984           if( id.isIdentifier() )
00985           {
00986             ruleFound = true;
00987             syntaxStack.pop();
00988             syntaxStack.pop();
00989             syntaxStack.pop();
00990             syntaxStack.push( Token( Token::Integer ) );
00991             d->codes.append( Opcode( Opcode::Function, 0 ) );
00992           }
00993         }
00994 
00995         // rule for parenthesis:  ( Y ) -> Y
00996         if( !ruleFound )
00997         if( syntaxStack.itemCount() >= 3 )
00998         {
00999           Token right = syntaxStack.top();
01000           Token y = syntaxStack.top( 1 );
01001           Token left = syntaxStack.top( 2 );
01002           if( right.isOperator() )
01003           if( !y.isOperator() )
01004           if( left.isOperator() )
01005           if( right.asOperator() == Token::RightPar )
01006           if( left.asOperator() == Token::LeftPar )
01007           {
01008             ruleFound = true;
01009             syntaxStack.pop();
01010             syntaxStack.pop();
01011             syntaxStack.pop();
01012             syntaxStack.push( y );
01013           }
01014         }
01015 
01016         // rule for binary operator:  A (op) B -> A
01017         // conditions: precedence of op >= precedence of token
01018         // action: push (op) to result
01019         // e.g. "A * B" becomes "A" if token is operator "+"
01020         // exception: for caret (power operator), if op is another caret
01021         // then the rule doesn't apply, e.g. "2^3^2" is evaluated as "2^(3^2)"
01022         if( !ruleFound )
01023         if( syntaxStack.itemCount() >= 3 )
01024         {
01025           Token b = syntaxStack.top();
01026           Token op = syntaxStack.top( 1 );
01027           Token a = syntaxStack.top( 2 );
01028           if( !a.isOperator() )
01029           if( !b.isOperator() )
01030           if( op.isOperator() )
01031           if( token.asOperator() != Token::LeftPar )
01032           if( token.asOperator() != Token::Caret )
01033           if( opPrecedence( op.asOperator() ) >= opPrecedence( token.asOperator() ) )
01034           {
01035             ruleFound = true;
01036             syntaxStack.pop();
01037             syntaxStack.pop();
01038             syntaxStack.pop();
01039             syntaxStack.push( b );
01040             switch( op.asOperator() )
01041             {
01042               // simple binary operations
01043               case Token::Plus:         d->codes.append( Opcode::Add ); break;
01044               case Token::Minus:        d->codes.append( Opcode::Sub ); break;
01045               case Token::Asterisk:     d->codes.append( Opcode::Mul ); break;
01046               case Token::Slash:        d->codes.append( Opcode::Div ); break;
01047               case Token::Caret:        d->codes.append( Opcode::Pow ); break;
01048               case Token::Ampersand:    d->codes.append( Opcode::Concat ); break;
01049 
01050               // simple value comparisons
01051               case Token::Equal:        d->codes.append( Opcode::Equal ); break;
01052               case Token::Less:         d->codes.append( Opcode::Less ); break;
01053               case Token::Greater:      d->codes.append( Opcode::Greater ); break;
01054 
01055               // NotEqual is Equal, followed by Not
01056               case Token::NotEqual:
01057                 d->codes.append( Opcode::Equal );
01058                 d->codes.append( Opcode::Not );
01059                 break;
01060 
01061               // LessOrEqual is Greater, followed by Not
01062               case Token::LessEqual:
01063                 d->codes.append( Opcode::Greater );
01064                 d->codes.append( Opcode::Not );
01065                 break;
01066 
01067               // GreaterOrEqual is Less, followed by Not
01068               case Token::GreaterEqual:
01069                 d->codes.append( Opcode::Less );
01070                 d->codes.append( Opcode::Not );
01071                 break;
01072               default: break;
01073             };
01074           }
01075          }
01076 
01077          // rule for unary operator:  (op1) (op2) X -> (op1) X
01078          // conditions: op2 is unary
01079          // action: push (op2) to result
01080          // e.g.  "* - 2" becomes "*"
01081          if( !ruleFound )
01082          if( syntaxStack.itemCount() >= 3 )
01083          {
01084             Token x = syntaxStack.top();
01085             Token op2 = syntaxStack.top( 1 );
01086             Token op1 = syntaxStack.top( 2 );
01087             if( !x.isOperator() )
01088             if( op1.isOperator() )
01089             if( op2.isOperator() )
01090             if( ( op2.asOperator() == Token::Plus ) ||
01091                ( op2.asOperator() == Token::Minus ) )
01092             {
01093               ruleFound = true;
01094               syntaxStack.pop();
01095               syntaxStack.pop();
01096               syntaxStack.push( x );
01097               if( op2.asOperator() == Token::Minus )
01098                 d->codes.append( Opcode( Opcode::Neg ) );
01099             }
01100           }
01101 
01102          // auxilary rule for unary operator:  (op) X -> X
01103          // conditions: op is unary, op is first in syntax stack
01104          // action: push (op) to result
01105          if( !ruleFound )
01106          if( syntaxStack.itemCount() == 2 )
01107          {
01108             Token x = syntaxStack.top();
01109             Token op = syntaxStack.top( 1 );
01110             if( !x.isOperator() )
01111             if( op.isOperator() )
01112             if( ( op.asOperator() == Token::Plus ) ||
01113                ( op.asOperator() == Token::Minus ) )
01114             {
01115               ruleFound = true;
01116               syntaxStack.pop();
01117               syntaxStack.pop();
01118               syntaxStack.push( x );
01119               if( op.asOperator() == Token::Minus )
01120                 d->codes.append( Opcode( Opcode::Neg ) );
01121             }
01122           }
01123 
01124         if( !ruleFound ) break;
01125       }
01126 
01127       // can't apply rules anymore, push the token
01128       if( token.asOperator() != Token::Percent )
01129         syntaxStack.push( token );
01130     }
01131   }
01132 
01133   // syntaxStack must left only one operand and end-of-formula (i.e. InvalidOp)
01134   d->valid = false;
01135   if( syntaxStack.itemCount() == 2 )
01136   if( syntaxStack.top().isOperator() )
01137   if( syntaxStack.top().asOperator() == Token::InvalidOp )
01138   if( !syntaxStack.top(1).isOperator() )
01139     d->valid = true;
01140 
01141   // bad parsing ? clean-up everything
01142   if( !d->valid )
01143   {
01144     d->constants.clear();
01145     d->codes.clear();
01146   }
01147 }
01148 
01149 
01150 // Evaluates the formula, returns the result.
01151 
01152 struct stackEntry {
01153   void reset () { row1 = col1 = row2 = col2 = -1; };
01154   Value val;
01155   int row1, col1, row2, col2;
01156 };
01157 
01158 Value Formula::eval() const
01159 {
01160   QValueStack<stackEntry> stack;
01161   stackEntry entry;
01162   unsigned index;
01163   Value val1, val2;
01164   QString c;
01165   QValueVector<Value> args;
01166 
01167   Sheet *sheet = 0;
01168   ValueParser* parser = 0;
01169   ValueConverter* converter = 0;
01170   ValueCalc* calc = 0;
01171 
01172   if (d->sheet)
01173   {
01174     sheet = d->sheet;
01175     converter = sheet->doc()->converter();
01176     calc = sheet->doc()->calc();
01177   }
01178   else
01179   {
01180     parser = new ValueParser( KGlobal::locale() );
01181     converter = new ValueConverter( parser );
01182     calc = new ValueCalc( converter );
01183   }
01184 
01185   Function* function;
01186   FuncExtra fe;
01187   fe.mycol = fe.myrow = 0;
01188   if (d->cell) {
01189     fe.mycol = d->cell->column();
01190     fe.myrow = d->cell->row();
01191   }
01192 
01193   if( d->dirty )
01194   {
01195     Tokens tokens = scan( d->expression );
01196     d->valid = tokens.valid();
01197     if( tokens.valid() )
01198       compile( tokens );
01199   }
01200 
01201   if( !d->valid )
01202     return Value::errorVALUE();
01203 
01204   for( unsigned pc = 0; pc < d->codes.count(); pc++ )
01205   {
01206     Value ret;   // for the function caller
01207     Opcode& opcode = d->codes[pc];
01208     index = opcode.index;
01209     switch( opcode.type )
01210     {
01211       // no operation
01212       case Opcode::Nop:
01213         break;
01214 
01215       // load a constant, push to stack
01216       case Opcode::Load:
01217         entry.reset();
01218         entry.val = d->constants[index];
01219         stack.push (entry);
01220         break;
01221 
01222       // unary operation
01223       case Opcode::Neg:
01224         entry.reset();
01225         entry.val = stack.pop().val;
01226         if (!entry.val.isError()) // do nothing if we got an error
01227           entry.val = calc->mul (entry.val, -1);
01228         stack.push (entry);
01229         break;
01230 
01231       // binary operation: take two values from stack, do the operation,
01232       // push the result to stack
01233       case Opcode::Add:
01234         entry.reset();
01235         val2 = stack.pop().val;
01236         val1 = stack.pop().val;
01237         val2 = calc->add( val1, val2 );
01238         entry.reset();
01239         entry.val = val2;
01240         stack.push (entry);
01241         break;
01242 
01243       case Opcode::Sub:
01244         val2 = stack.pop().val;
01245         val1 = stack.pop().val;
01246         val2 = calc->sub( val1, val2 );
01247         entry.reset();
01248         entry.val = val2;
01249         stack.push (entry);
01250         break;
01251 
01252       case Opcode::Mul:
01253         val2 = stack.pop().val;
01254         val1 = stack.pop().val;
01255         val2 = calc->mul( val1, val2 );
01256         entry.reset();
01257         entry.val = val2;
01258         stack.push (entry);
01259         break;
01260 
01261       case Opcode::Div:
01262         val2 = stack.pop().val;
01263         val1 = stack.pop().val;
01264         val2 = calc->div( val1, val2 );
01265         entry.reset();
01266         entry.val = val2;
01267         stack.push (entry);
01268         break;
01269 
01270       case Opcode::Pow:
01271         val2 = stack.pop().val;
01272         val1 = stack.pop().val;
01273         val2 = calc->pow( val1, val2 );
01274         entry.reset();
01275         entry.val = val2;
01276         stack.push (entry);
01277         break;
01278 
01279       // string concatenation
01280       case Opcode::Concat:
01281         val1 = converter->asString (stack.pop().val);
01282     val2 = converter->asString (stack.pop().val);
01283         if (val1.isError() || val2.isError())
01284       val1 = Value::errorVALUE();
01285     else
01286           val1.setValue( val2.asString().append( val1.asString() ) );
01287         entry.reset();
01288         entry.val = val1;
01289         stack.push (entry);
01290         break;
01291 
01292       // logical not
01293       case Opcode::Not:
01294         val1 = converter->asBoolean (stack.pop().val);
01295         if( val1.isError() )
01296       val1 = Value::errorVALUE();
01297     else
01298       val1.setValue( !val1.asBoolean() );
01299         entry.reset();
01300         entry.val = val1;
01301         stack.push (entry);
01302         break;
01303 
01304       // comparison
01305       case Opcode::Equal:
01306         val1 = stack.pop().val;
01307         val2 = stack.pop().val;
01308         if( !val1.allowComparison( val2 ) )
01309           val1 = Value::errorNA();
01310     else if( val2.compare( val1 ) == 0 )
01311           val1 = Value (true);
01312         else
01313           val1 = Value (false);
01314         entry.reset();
01315         entry.val = val1;
01316         stack.push (entry);
01317         break;
01318 
01319       // less than
01320       case Opcode::Less:
01321         val1 = stack.pop().val;
01322         val2 = stack.pop().val;
01323         if( !val1.allowComparison( val2 ) )
01324           val1 = Value::errorNA();
01325     else if( val2.compare( val1 ) < 0 )
01326           val1 = Value (true);
01327         else
01328           val1 = Value (false);
01329         entry.reset();
01330         entry.val = val1;
01331         stack.push (entry);
01332         break;
01333 
01334       // greater than
01335       case Opcode::Greater:
01336         val1 = stack.pop().val;
01337         val2 = stack.pop().val;
01338         if( !val1.allowComparison( val2 ) )
01339           val1 = Value::errorNA();
01340     else if( val2.compare( val1 ) > 0 )
01341           val1 = Value (true);
01342         else
01343           val1 = Value (false);
01344         entry.reset();
01345         entry.val = val1;
01346         stack.push (entry);
01347         break;
01348 
01349 
01350       case Opcode::Cell:
01351         c = d->constants[index].asString();
01352         val1 = Value::empty();
01353         entry.reset();
01354         if (sheet)
01355         {
01356           Point cell (c, sheet->workbook(), sheet);
01357           if (cell.isValid())
01358           {
01359             val1 = cell.sheet()->value (cell.column(), cell.row());
01360             // store the reference, so we can use it within functions
01361             entry.col1 = entry.col2 = cell.column();
01362             entry.row1 = entry.row2 = cell.row();
01363           }
01364         }
01365         entry.val = val1;
01366         stack.push (entry);
01367         break;
01368 
01369       case Opcode::Range:
01370         c = d->constants[index].asString();
01371         val1 = Value::empty();
01372         entry.reset();
01373         if (sheet)
01374         {
01375           Range range (c, sheet->workbook(), sheet);
01376           if (range.isValid())
01377           {
01378             val1 = range.sheet()->valueRange (range.startCol(), range.startRow(),
01379                 range.endCol(), range.endRow());
01380             // store the reference, so we can use it within functions
01381             entry.col1 = range.startCol();
01382             entry.row1 = range.startRow();
01383             entry.col2 = range.endCol();
01384             entry.row2 = range.endRow();
01385           }
01386         }
01387         entry.val = val1;
01388         stack.push (entry);
01389         break;
01390 
01391       case Opcode::Ref:
01392         val1 = d->constants[index];
01393         entry.reset();
01394         entry.val = val1;
01395         stack.push (entry);
01396         break;
01397 
01398       // calling function
01399       case Opcode::Function:
01400         if( stack.count() < index )
01401           // (Tomas) umm, how could that be ? I mean, the index value
01402           //  is computed from the stack *confused*
01403           return Value::errorVALUE(); // not enough arguments
01404 
01405         args.clear();
01406         fe.ranges.clear ();
01407         fe.ranges.reserve (index);
01408         fe.sheet = sheet;
01409         for( ; index; index-- )
01410         {
01411           stackEntry e = stack.pop();
01412           args.insert (args.begin(), e.val);
01413           // TODO: create and fill a FunctionExtra object, if needed
01414           // problem: we don't know if we need it, as we don't have the
01415           // fuction name yet ...
01416           fe.ranges[index - 1].col1 = e.col1;
01417           fe.ranges[index - 1].row1 = e.row1;
01418           fe.ranges[index - 1].col2 = e.col2;
01419           fe.ranges[index - 1].row2 = e.row2;
01420         }
01421 
01422         // function name as string value
01423         val1 = converter->asString (stack.pop().val);
01424         if( val1.isError() )
01425           return Value::errorVALUE();
01426         function = FunctionRepository::self()->function ( val1.asString() );
01427         if( !function )
01428           return Value::errorVALUE(); // no such function
01429 
01430         ret = function->exec (args, calc, &fe);
01431         entry.reset();
01432         entry.val = ret;
01433         stack.push (entry);
01434 
01435         break;
01436 
01437       default:
01438         break;
01439     }
01440   }
01441 
01442   if (!d->sheet) {
01443     delete parser;
01444     delete converter;
01445     delete calc;
01446   }
01447 
01448   // more than one value in stack ? unsuccesful execution...
01449   if( stack.count() != 1 )
01450     return Value::errorVALUE();
01451 
01452   return stack.pop().val;
01453 
01454 }
01455 
01456 // Debugging aid
01457 
01458 QString Formula::dump() const
01459 {
01460   QString result;
01461 
01462   if( d->dirty )
01463   {
01464     Tokens tokens = scan( d->expression );
01465     compile( tokens );
01466   }
01467 
01468   result = QString("Expression: [%1]\n").arg( d->expression );
01469 #if 0
01470   Value value = eval();
01471   result.append( QString("Result: %1\n").arg(
01472       converter->asString(value).asString() ) );
01473 #endif
01474 
01475   result.append("  Constants:\n");
01476   for( unsigned c = 0; c < d->constants.count(); c++ )
01477   {
01478     QString vtext;
01479     Value val = d->constants[c];
01480     if( val.isString() ) vtext = QString("[%1]").arg( val.asString() );
01481     else if( val.isNumber() ) vtext = QString("%1").arg( val.asFloat() );
01482     else if( val.isBoolean() ) vtext = QString("%1").arg( val.asBoolean() ? "True":"False");
01483     else if( val.isError() ) vtext = "error";
01484     else vtext = "???";
01485     result += QString("    #%1 = %2\n").arg(c).arg( vtext );
01486   }
01487 
01488   result.append("\n");
01489   result.append("  Code:\n");
01490   for( unsigned i = 0; i < d->codes.count(); i++ )
01491   {
01492     QString ctext;
01493     switch( d->codes[i].type )
01494     {
01495       case Opcode::Load:     ctext = QString("Load #%1").arg( d->codes[i].index ); break;
01496       case Opcode::Ref:      ctext = QString("Ref #%1").arg( d->codes[i].index ); break;
01497       case Opcode::Function: ctext = QString("Function (%1)").arg( d->codes[i].index ); break;
01498       case Opcode::Add:      ctext = "Add"; break;
01499       case Opcode::Sub:      ctext = "Sub"; break;
01500       case Opcode::Mul:      ctext = "Mul"; break;
01501       case Opcode::Div:      ctext = "Div"; break;
01502       case Opcode::Neg:      ctext = "Neg"; break;
01503       case Opcode::Concat:   ctext = "Concat"; break;
01504       case Opcode::Pow:      ctext = "Pow"; break;
01505       case Opcode::Equal:    ctext = "Equal"; break;
01506       case Opcode::Not:      ctext = "Not"; break;
01507       case Opcode::Less:     ctext = "Less"; break;
01508       case Opcode::Greater:  ctext = "Greater"; break;
01509       default: ctext = "Unknown"; break;
01510     }
01511     result.append( "   " ).append( ctext ).append("\n");
01512   }
01513 
01514   return result;
01515 }
01516 
01517 QTextStream& operator<<( QTextStream& ts, Formula formula )
01518 {
01519   ts << formula.dump();
01520   return ts;
01521 }
KDE Home | KDE Accessibility Home | Description of Access Keys