ETHZ_Logo RAMSES_Logo_Right   RAMSES   RAMSES_Logo_Left Systems Ecology  
Start    search button      Modules:   A-Z   Function   Layer        QuickRefs:   DM   AuxLib   AuxLibE   SciLib   EasyMW   MW   ISIS   RMSLib

DEFINITION MODULE  Scanner;

  (*******************************************************************

    Module  Scanner     (Version 1.0)

      Copyright (c) 1994-2006 by Dimitrios Gyalistras, Juerg Thoeny and
      ETH Zurich.

    Purpose   Maintains several symbol tables and scans sequentially
              the contents of a string buffer or a file.

    Remarks   - The symbols recognized by the scanner are a set of
                predefined symbols (see CONST-declarations below),
                and (2) the symbols in the currently used symbol
                table.

              - The following definitions hold:

                1.  INTEGER     =  [0-9]+
                2.  LONGINT     =  [0-9]+ "D"
                3.  REAL        =  [0-9]+ "." [0-9]+ (("E"|"e")[+-]?[0-9]+)?
                4.  LONGREAL    =  [0-9]+ "." [0-9]+ (("D"|"d")[+-]?[0-9]+)?
                5.  IDENTIFIER  =  [a-zA-Z]+ [_a-zA-Z0-9]*
                6.  STRING      =  ('.*')|(".*")

                Above definitions are given in regular expression notation,i.e.:
                      exp?           =  0 or 1 occurrence of exp,
                      exp*           =  0..N occurrences of exp,
                      exp+           =  1..N occurrences of exp,
                      "str"          =  mandatory occurrence of string str.
                      (exp)          =  contents of bracket are grouped
                      exp1|exp2      =  exp1 or exp2
                      .              =  any character except newline
                      [minCh-maxCh]  =  a single char from the set of
                                        characters from minCh to maxCh

                NOTE: INTEGER and REAL carry NO SIGN ("+"/"-"). The sign must
                      be scanned separately (sym=specialCharSym).
                      Reading of signed numbers is supported by procedures
                      TryXYZ (see below).

             - White spaces are all characters <=' ' and are skipped.

             - Symbols of the types 1-6 must be separated by white spaces
               or special characters, i.e. can not follow immediately
               upon each other.

             - Special characters are all non-"white space" characters,
               which are not attributed to a symbol of type 1-6.
               (e.g. "+" or "-" before a REAL, "/", ":" etc.)

             - The scanner skips comments starting with "(*" and closing
               with "*)".  Recursive comments are possible.


    Programming

      o Design
        Dimitrios Gyalistras      04/01/1994
        Juerg Thoeny              04/01/1994

      o Implementation
        Dimitrios Gyalistras      04/01/1994


    ETH Zurich
    Systems Ecology
    CHN E 35.1
    Universitaetstrasse 16
    8092 Zurich
    SWITZERLAND

    URLs:
        <mailto:RAMSES@env.ethz.ch>
        <http://www.sysecol.ethz.ch>
        <http://www.sysecol.ethz.ch/SimSoftware/RAMSES>


    Last revision of definition:  20/03/2003  AF

  *******************************************************************)


  CONST
    MaxChars     = 1024;
    LgthResWord  = 32;    (* maximum length of a reserved word in a symbol table *)
    MaxResWords  = 128;   (* maximum number of reserved words per symbol table *)

  TYPE
    SymTable;
    Symbol = INTEGER;
    Str1024 = ARRAY [0..MaxChars-1] OF CHAR;
    Str256 = ARRAY [0..256-1] OF CHAR;

  CONST
    (* symbols common to all symbol tables *)
    nul             =  0;  (* a bad file or number syntax or an overflow occurred *)
    eodSym          = -1;  (* end of data or of file reached *)

    unknownIdent    = -2;  (* token has syntax of an ident, but is not in the symbol table*)
    integerSym      = -3;  (* an integer number *)
    longIntSym      = -4;  (* a longinteger number *)
    realSym         = -5;  (* a real number *)
    longRealSym     = -6;  (* a longreal number *)
    strSym          = -7;  (* a string  *)

    specialCharSym  = -8;  (* a non-"white space" character, given that none of
                             the above symbols can be returned *)



  (* the following type is used ot indicate the reason for sym=nul *)

  TYPE
    ScannerError =
       (
       noError,

       unexpectedEOD,    (* unexpected end of data/file reached *)

       badSeparation,    (* Two symbols of type 1-6 were not separated by a
                            white space or a special character *)

       badRealSyntax,    (* A (long)real number with bad syntax was encountered *)

       badIdentSyntax,   (* Bad syntax within an ident was encountered *)

       numOverflow,      (* sym = (integerSym, longIntSym, realSym, or longRealSym)
                            was detected, but the resulting number was too large *)

       identOverflow,    (* (sym = unknownIdent was detected, but the ident was
                            too long (>LgthResWord) *)

       strOverflow,      (* sym = strSym was detected, but the read string was longer
                            then MaxChars. In this case, curStr will contain only
                            the first MaxChars+1 of the string. *)

       strContainsEOL,   (* sym = strSym was detected, but an EOL-character was
                            found within the string*)

       commentNotClosed  (* a comment was opened and never closed *)

       );

  (*--------------------------------------------------------------------------*)


  VAR
    str        : Str1024;         (* The last scanned token *)

    sym        : Symbol;          (* Symbolic meaning of the token *)


    scanErr    : ScannerError;    (* Will be <>noError if sym=nul *)

    scanErrStr : Str256;          (* Will contain an error text, if error<>noError *)


    int        : INTEGER;
    lint       : LONGINT;         (* Will contain str converted to integer (longInt),
                                  if sym = integerSym (longIntSym). *)

    real       : REAL;
    lreal      : LONGREAL;        (* Will contain str converted to real (longreal),
                                  if sym = realSym (longRealSym). *)

    whiteChFollows: BOOLEAN ;     (* Will be true, if the symbol is followed by
                                  a white character *)


  (*--------------------------------------------------------------------------*)


  PROCEDURE IsIdentifier( str: ARRAY OF CHAR ): BOOLEAN;
    (*
      Returns TRUE if str has an identifier syntax.
    *)


  PROCEDURE IsAllowedResWord( str: ARRAY OF CHAR ): BOOLEAN;
    (*
      Returns TRUE if str has an identifier syntax, and its
      length is less than LgthResWord.
    *)


  VAR
    notDeclaredSymTable: SymTable; (* read only *)


  PROCEDURE NewSymTable( VAR symTbl: SymTable;
                         VAR errTxt: ARRAY OF CHAR ):BOOLEAN ;
    (*
      Instanciates a new symbol table. If this is not possible,
      FALSE is returned and an error message is given in errTxt
      If symTbl already exists, its contents are cleared.
    *)


  PROCEDURE RemoveSymTable( VAR symTbl: SymTable );
    (*
      Removes symTbl if it exists.
    *)


  PROCEDURE InsertSymbol( symTbl    : SymTable;
                          resWrd    : ARRAY OF CHAR;
                          sym       : Symbol;
                          VAR errTxt: ARRAY OF CHAR):BOOLEAN ;
    (*
      Inserts reserved word resWrd associated with symbol sym
      in symTbl. If this is not possible, FALSE is returned and
      an error message is given in errTxt.
    *)


  (*--------------------------------------------------------------------------*)


  PROCEDURE InitFileScan(fileName  : ARRAY OF CHAR;
                         symTbl    : SymTable;
                         VAR errTxt: ARRAY OF CHAR): BOOLEAN;
    (*
      Prepare scanning of file fileName using symbol table
      symTbl. From here on, calling GetSym (see below) will
      operate on this file this file.
      NOTE: The file is opened for scanning and is left open until
      StopScanner is called succesfully.
    *)


  PROCEDURE InitBufferScan(VAR buff  : ARRAY OF CHAR;
                           symTbl    : SymTable;
                           VAR errTxt: ARRAY OF CHAR): BOOLEAN;
    (*
      Prepare scanning of buffer buff using symbol table
      symTbl.  From here on, calling GetSym (see below) will
      operate on this buffer.  NOTE: The buffer variable passed
      should persist after calling this procedure, as long as
      its contents are scanned.  Its content may adhere
      to the end of line convention according to the currently
      running platform (as returned by routines RunsOnAUnixMachine,
      RunsOnAnIBMPC, and RunsOnAUnixMachine from DMSystem) or
      may mark end of lines with the Modula-2 symbol EOL from
      module DMFiles in any combination.
    *)


  PROCEDURE StopScanner;
    (*
      Closes a file possibly opened by a call to InitFileScan and
      disables GetSym (see below), i.e. sym=nul is always returned.
      NOTE: This procedure must be called before calling Init...Scan
      a second time.
    *)


  (*--------------------------------------------------------------------------*)


  PROCEDURE SetSymTable( symTbl : SymTable;
                         VAR errTxt: ARRAY OF CHAR): BOOLEAN;
    (*
      Sets the currently used symbol table for scanning to symTbl.
      A call to this procedure will have no effect, if the scanner
      is currently stopped.
    *)


  PROCEDURE CurSymTable(): SymTable;
    (*
      Returns the current symbol table.
    *)


  PROCEDURE SetCaseSensitivity( cs: BOOLEAN );
    (*
      Sets whether identifiers are scanned case sensitively.
      The default is TRUE.
      NOTE: In case that case sensitivity is switched off, make
      sure that reserved words in the symbol table contain ONLY
      capitalized characters, otherwise the scanner will not
      operate correctly!
    *)


  (*--------------------------------------------------------------------------*)


  PROCEDURE GetSym;
    (*
      Returns the next symbol read from the current input (file or
      buffer) into the variable sym and sets the variables str,
      int or real to the newest value(s).
    *)


  PROCEDURE ReGetSym;
    (*
      Forces that next call to GetSym will return the last read symbol.
      The 2dn next call to GetSym will then continue normally.
    *)


  PROCEDURE GetResWord( symTbl: SymTable; sym: Symbol;
                        VAR str: ARRAY OF CHAR );
    (*
      Returns a descriptor of the predefined symbol sym (in this case
      symTbl is irrelevant) or the reserved word for the symbol sym
      from table symTbl.
    *)


  PROCEDURE GetLineCount( VAR lineNr: LONGINT ) ;
    (*
      Returns line of last scanned symbol.
    *)


  PROCEDURE GetCharCount( VAR charPos: INTEGER );
    (*
      Returns position of last read character on current line.
    *)


 (*--------------------------------------------------------------------------*)

  (*
    The following procedures are particularly usefull for reading
    data files.
    All procedures attempt to read a respective predefined symbol
    followed by a white character.  If succesfull, the result is
    retuned in the formal parameter.
    The respective variables (i.e. str, int, lint, real, lreal).
    NOTE: Signed or unsigned numbers are recognized.
          The respective symbol is also returned in variable sym.
          The global variables int, lint, real, lreal (see above)
          will however NOT have the correct sign.
  *)

  PROCEDURE TryGetString ( VAR ss: ARRAY OF CHAR ):BOOLEAN;

  PROCEDURE TryGetInteger( VAR ii: INTEGER ):BOOLEAN;
  PROCEDURE TryGetLongInt( VAR li: LONGINT ):BOOLEAN;


  PROCEDURE TryGetReal    ( VAR rr: REAL     ):BOOLEAN;
  PROCEDURE TryGetLongReal( VAR lr: LONGREAL ):BOOLEAN;


 (*--------------------------------------------------------------------------*)


  PROCEDURE TryGetText( untilSym: Symbol; VAR txt: ARRAY OF CHAR ):INTEGER;
  (*
    Reads into variable txt all text found from the current
    position until symbol untilSym.
    Returns -1 if EOD was reached before finding untilSym,
    0 if everything was all right, and +1 if reading is stopped
    because txt is full.
  *)


END Scanner.

  Contact RAMSES@env.ethz.ch Last updated: 25-Jul-2011 [Top of page]