DEFINITION MODULE Scanner;
(*******************************************************************
Module Scanner (Version 1.0)
Copyright (c) 1994-2006 by Dimitrios Gyalistras, Juerg Thoeny and
ETH Zurich.
Purpose Maintains several symbol tables and scans sequentially
the contents of a string buffer or a file.
Remarks - The symbols recognized by the scanner are a set of
predefined symbols (see CONST-declarations below),
and (2) the symbols in the currently used symbol
table.
- The following definitions hold:
1. INTEGER = [0-9]+
2. LONGINT = [0-9]+ "D"
3. REAL = [0-9]+ "." [0-9]+ (("E"|"e")[+-]?[0-9]+)?
4. LONGREAL = [0-9]+ "." [0-9]+ (("D"|"d")[+-]?[0-9]+)?
5. IDENTIFIER = [a-zA-Z]+ [_a-zA-Z0-9]*
6. STRING = ('.*')|(".*")
Above definitions are given in regular expression notation,i.e.:
exp? = 0 or 1 occurrence of exp,
exp* = 0..N occurrences of exp,
exp+ = 1..N occurrences of exp,
"str" = mandatory occurrence of string str.
(exp) = contents of bracket are grouped
exp1|exp2 = exp1 or exp2
. = any character except newline
[minCh-maxCh] = a single char from the set of
characters from minCh to maxCh
NOTE: INTEGER and REAL carry NO SIGN ("+"/"-"). The sign must
be scanned separately (sym=specialCharSym).
Reading of signed numbers is supported by procedures
TryXYZ (see below).
- White spaces are all characters <=' ' and are skipped.
- Symbols of the types 1-6 must be separated by white spaces
or special characters, i.e. can not follow immediately
upon each other.
- Special characters are all non-"white space" characters,
which are not attributed to a symbol of type 1-6.
(e.g. "+" or "-" before a REAL, "/", ":" etc.)
- The scanner skips comments starting with "(*" and closing
with "*)". Recursive comments are possible.
Programming
o Design
Dimitrios Gyalistras 04/01/1994
Juerg Thoeny 04/01/1994
o Implementation
Dimitrios Gyalistras 04/01/1994
ETH Zurich
Systems Ecology
CHN E 35.1
Universitaetstrasse 16
8092 Zurich
SWITZERLAND
URLs:
<mailto:RAMSES@env.ethz.ch>
<http://www.sysecol.ethz.ch>
<http://www.sysecol.ethz.ch/SimSoftware/RAMSES>
Last revision of definition: 20/03/2003 AF
*******************************************************************)
CONST
MaxChars = 1024;
LgthResWord = 32; (* maximum length of a reserved word in a symbol table *)
MaxResWords = 128; (* maximum number of reserved words per symbol table *)
TYPE
SymTable;
Symbol = INTEGER;
Str1024 = ARRAY [0..MaxChars-1] OF CHAR;
Str256 = ARRAY [0..256-1] OF CHAR;
CONST
(* symbols common to all symbol tables *)
nul = 0; (* a bad file or number syntax or an overflow occurred *)
eodSym = -1; (* end of data or of file reached *)
unknownIdent = -2; (* token has syntax of an ident, but is not in the symbol table*)
integerSym = -3; (* an integer number *)
longIntSym = -4; (* a longinteger number *)
realSym = -5; (* a real number *)
longRealSym = -6; (* a longreal number *)
strSym = -7; (* a string *)
specialCharSym = -8; (* a non-"white space" character, given that none of
the above symbols can be returned *)
(* the following type is used ot indicate the reason for sym=nul *)
TYPE
ScannerError =
(
noError,
unexpectedEOD, (* unexpected end of data/file reached *)
badSeparation, (* Two symbols of type 1-6 were not separated by a
white space or a special character *)
badRealSyntax, (* A (long)real number with bad syntax was encountered *)
badIdentSyntax, (* Bad syntax within an ident was encountered *)
numOverflow, (* sym = (integerSym, longIntSym, realSym, or longRealSym)
was detected, but the resulting number was too large *)
identOverflow, (* (sym = unknownIdent was detected, but the ident was
too long (>LgthResWord) *)
strOverflow, (* sym = strSym was detected, but the read string was longer
then MaxChars. In this case, curStr will contain only
the first MaxChars+1 of the string. *)
strContainsEOL, (* sym = strSym was detected, but an EOL-character was
found within the string*)
commentNotClosed (* a comment was opened and never closed *)
);
(*--------------------------------------------------------------------------*)
VAR
str : Str1024; (* The last scanned token *)
sym : Symbol; (* Symbolic meaning of the token *)
scanErr : ScannerError; (* Will be <>noError if sym=nul *)
scanErrStr : Str256; (* Will contain an error text, if error<>noError *)
int : INTEGER;
lint : LONGINT; (* Will contain str converted to integer (longInt),
if sym = integerSym (longIntSym). *)
real : REAL;
lreal : LONGREAL; (* Will contain str converted to real (longreal),
if sym = realSym (longRealSym). *)
whiteChFollows: BOOLEAN ; (* Will be true, if the symbol is followed by
a white character *)
(*--------------------------------------------------------------------------*)
PROCEDURE IsIdentifier( str: ARRAY OF CHAR ): BOOLEAN;
(*
Returns TRUE if str has an identifier syntax.
*)
PROCEDURE IsAllowedResWord( str: ARRAY OF CHAR ): BOOLEAN;
(*
Returns TRUE if str has an identifier syntax, and its
length is less than LgthResWord.
*)
VAR
notDeclaredSymTable: SymTable; (* read only *)
PROCEDURE NewSymTable( VAR symTbl: SymTable;
VAR errTxt: ARRAY OF CHAR ):BOOLEAN ;
(*
Instanciates a new symbol table. If this is not possible,
FALSE is returned and an error message is given in errTxt
If symTbl already exists, its contents are cleared.
*)
PROCEDURE RemoveSymTable( VAR symTbl: SymTable );
(*
Removes symTbl if it exists.
*)
PROCEDURE InsertSymbol( symTbl : SymTable;
resWrd : ARRAY OF CHAR;
sym : Symbol;
VAR errTxt: ARRAY OF CHAR):BOOLEAN ;
(*
Inserts reserved word resWrd associated with symbol sym
in symTbl. If this is not possible, FALSE is returned and
an error message is given in errTxt.
*)
(*--------------------------------------------------------------------------*)
PROCEDURE InitFileScan(fileName : ARRAY OF CHAR;
symTbl : SymTable;
VAR errTxt: ARRAY OF CHAR): BOOLEAN;
(*
Prepare scanning of file fileName using symbol table
symTbl. From here on, calling GetSym (see below) will
operate on this file this file.
NOTE: The file is opened for scanning and is left open until
StopScanner is called succesfully.
*)
PROCEDURE InitBufferScan(VAR buff : ARRAY OF CHAR;
symTbl : SymTable;
VAR errTxt: ARRAY OF CHAR): BOOLEAN;
(*
Prepare scanning of buffer buff using symbol table
symTbl. From here on, calling GetSym (see below) will
operate on this buffer. NOTE: The buffer variable passed
should persist after calling this procedure, as long as
its contents are scanned. Its content may adhere
to the end of line convention according to the currently
running platform (as returned by routines RunsOnAUnixMachine,
RunsOnAnIBMPC, and RunsOnAUnixMachine from DMSystem) or
may mark end of lines with the Modula-2 symbol EOL from
module DMFiles in any combination.
*)
PROCEDURE StopScanner;
(*
Closes a file possibly opened by a call to InitFileScan and
disables GetSym (see below), i.e. sym=nul is always returned.
NOTE: This procedure must be called before calling Init...Scan
a second time.
*)
(*--------------------------------------------------------------------------*)
PROCEDURE SetSymTable( symTbl : SymTable;
VAR errTxt: ARRAY OF CHAR): BOOLEAN;
(*
Sets the currently used symbol table for scanning to symTbl.
A call to this procedure will have no effect, if the scanner
is currently stopped.
*)
PROCEDURE CurSymTable(): SymTable;
(*
Returns the current symbol table.
*)
PROCEDURE SetCaseSensitivity( cs: BOOLEAN );
(*
Sets whether identifiers are scanned case sensitively.
The default is TRUE.
NOTE: In case that case sensitivity is switched off, make
sure that reserved words in the symbol table contain ONLY
capitalized characters, otherwise the scanner will not
operate correctly!
*)
(*--------------------------------------------------------------------------*)
PROCEDURE GetSym;
(*
Returns the next symbol read from the current input (file or
buffer) into the variable sym and sets the variables str,
int or real to the newest value(s).
*)
PROCEDURE ReGetSym;
(*
Forces that next call to GetSym will return the last read symbol.
The 2dn next call to GetSym will then continue normally.
*)
PROCEDURE GetResWord( symTbl: SymTable; sym: Symbol;
VAR str: ARRAY OF CHAR );
(*
Returns a descriptor of the predefined symbol sym (in this case
symTbl is irrelevant) or the reserved word for the symbol sym
from table symTbl.
*)
PROCEDURE GetLineCount( VAR lineNr: LONGINT ) ;
(*
Returns line of last scanned symbol.
*)
PROCEDURE GetCharCount( VAR charPos: INTEGER );
(*
Returns position of last read character on current line.
*)
(*--------------------------------------------------------------------------*)
(*
The following procedures are particularly usefull for reading
data files.
All procedures attempt to read a respective predefined symbol
followed by a white character. If succesfull, the result is
retuned in the formal parameter.
The respective variables (i.e. str, int, lint, real, lreal).
NOTE: Signed or unsigned numbers are recognized.
The respective symbol is also returned in variable sym.
The global variables int, lint, real, lreal (see above)
will however NOT have the correct sign.
*)
PROCEDURE TryGetString ( VAR ss: ARRAY OF CHAR ):BOOLEAN;
PROCEDURE TryGetInteger( VAR ii: INTEGER ):BOOLEAN;
PROCEDURE TryGetLongInt( VAR li: LONGINT ):BOOLEAN;
PROCEDURE TryGetReal ( VAR rr: REAL ):BOOLEAN;
PROCEDURE TryGetLongReal( VAR lr: LONGREAL ):BOOLEAN;
(*--------------------------------------------------------------------------*)
PROCEDURE TryGetText( untilSym: Symbol; VAR txt: ARRAY OF CHAR ):INTEGER;
(*
Reads into variable txt all text found from the current
position until symbol untilSym.
Returns -1 if EOD was reached before finding untilSym,
0 if everything was all right, and +1 if reading is stopped
because txt is full.
*)
END Scanner.