DEFINITION MODULE PrinComp;
(*******************************************************************
Module PrinComp (Version 1.0)
Copyright (c) 1993-2006 by Dimitrios Gyalistras and ETH Zurich.
Purpose Perform Principal Component Analysis (PCA).
Remarks --
Programming
o Design
Dimitrios Gyalistras 28/07/1993
o Implementation
Dimitrios Gyalistras 28/07/1993
ETH Zurich
Systems Ecology
CHN E 35.1
Universitaetstrasse 16
8092 Zurich
SWITZERLAND
URLs:
<mailto:RAMSES@env.ethz.ch>
<http://www.sysecol.ethz.ch>
<http://www.sysecol.ethz.ch/SimSoftware/RAMSES>
Last revision of definition: 24/05/2002 AF
*******************************************************************)
FROM DMFiles IMPORT TextFile;
IMPORT Errors;
FROM LgMatrices IMPORT LMatrix, LVector;
(******************************************************************)
CONST (* result codes returned *)
allOk = Errors.allOk;
notDone = Errors.onlyAnInsert;
CONST (* types of matrices supported *)
muMatType = 10; (* column means used for calculation of anomalies *)
wghtMatType = 20; (* column weights used for weighting of data columns *)
eigValMatType = 101; (* Eigenvalues *)
tvxMatType = 102; (* Total variances explained by PCs *)
eofMatType = 103; (* EOFs *)
lvxMatType = 104; (* Local variances explained by PCs *)
pcMatType = 105; (* PCs *)
evREBMatType = 106; (* Eigenvalues relative error bounds *)
(*
Note: the dimensions of all LVector and LMatrix objects used below
are determined automatically via procedures LgMatrices.NElems,
LgMatrices.NRows and LgMatrices.NCols.
*)
(*********************************)
(*##### Main procedures #####*)
(*********************************)
TYPE (* methods available for calculation of covariance matrix *)
CovCalcMethod = ( covUsesDivN, (* default *)
covUsesDivNLess1 );
PROCEDURE SetCovCalcMethod( ccm: CovCalcMethod );
PROCEDURE CurCovCalcMethod(): CovCalcMethod;
TYPE (* weighting of variables *)
WeightingType = ( noWeighting, (* data matrix is used unmodified: w(i) = 1.0 *)
equalWeights, (* all columns are standardized: w(i) = 1.0/stdev(i) *)
userSpecfdWeights ); (* columns are rescaled according to: w(i) = u(i) *)
PROCEDURE MakePCA(
data : LMatrix; (* in : data matrix to analyze (nRows x nCols) *)
mu : LMatrix; (* out: subtracted column means (1 x nCols ) *)
wType : WeightingType; (* in : kind of weighting of data columns prior to PCA *)
wght : LMatrix; (* in / out: input factors u(i) / used weights w(i) (1 x nCols ) *)
eigVal : LMatrix; (* out: eigenvalues of the weighted covariance matrix (1 x nEofs ) *)
eof : LMatrix; (* out: empirical orthog. functions (= EOFs,loadings) (nEofs x nEofs) *)
pc : LMatrix; (* out: principal components (= PCs, scores) (nRows x nCols) *)
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE CalcPCsGlobExplVar(
data : LMatrix; (* in : data matrix, needed only because of descriptor *)
eigVal : LMatrix; (* in : eigenvalues of the covariance matrix (1 x nEofs ) *)
tvx : LMatrix; (* out: percentages of total variance explained by PCs (1 x nEofs ) *)
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE CalcEigValsRelErrBounds(
data : LMatrix; (* in : data matrix, needed only because of descriptor *)
eigVal : LMatrix; (* in : eigenvalues of the covariance matrix (1 x nEofs ) *)
evREB : LMatrix; (* out: eigenvalues' relative error bounds (1 x nEofs ) *)
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
(*
Relative error bounds are defined as follows
absErrBound(k) = eigVal(k)*Sqrt(2/N)
relErrBound(k) = absErrBound(k)/((eigVal(k)-eigVal(k+1))
where N is the effective number of degrees of freedom, here approximated
by the number of rows of the matrix "data".
If relErrBound(k) < 1 the eigenvalues k and k+1 are probably degenerated,
i.e. the corresponding EOFs do not differ significantly from each other.
Reference:
G. R. North, T. L. Bell, R. F. Cahalan, and F. J. Moeng (1982):
"Sampling errors in the estimation of empirical orthogonal functions"
Mon. Weather Rev. 110, 699-706.
*)
PROCEDURE CalcPCsLocalExplVar(
data : LMatrix; (* in : data matrix to analyze (nRows x nCols) *)
pc : LMatrix; (* in : principal components (= PCs, scores) (nRows x nCols) *)
lvx : LMatrix; (* out: percentages of local variances explained by PCs (nCols x nCols) *)
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE GetNumEofs1( eigVal : LMatrix;
minCumulVar : LONGREAL;
VAR nEofs : INTEGER;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE GetNumEofs2( data : LMatrix;
eigVal : LMatrix;
maxEigValREB : LONGREAL; (* maximum value for eigenvalue relative error bound *)
VAR nEofs : INTEGER;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
(**************************)
(*##### Auxilary #####*)
(**************************)
PROCEDURE NEofs( data: LMatrix ): INTEGER;
PROCEDURE NEofsOK( data : LMatrix;
nEofs : INTEGER;
genericDescr : ARRAY OF CHAR; (* used if data has no descriptor attached to it *)
callee : ARRAY OF CHAR;
VAR errTxt : ARRAY OF CHAR ): BOOLEAN;
PROCEDURE WeightsOK( wght : LMatrix;
genericDescr : ARRAY OF CHAR; (* used if wght has no descriptor attached to it *)
callee : ARRAY OF CHAR;
VAR errTxt : ARRAY OF CHAR ): BOOLEAN;
TYPE
RescalingType = ( normalize, denormalize );
PROCEDURE RescalePCs( eigVal : LMatrix; (* in *)
pc : LMatrix; (* in *)
rscldPC : LMatrix; (* out *)
doWhat : RescalingType;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE RescaleEOFs( eigVal : LMatrix; (* in *)
eof : LMatrix; (* in *)
rscldEof : LMatrix; (* out *)
doWhat : RescalingType;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE AllocPCAResVars( data : LMatrix;
VAR mu : LMatrix;
VAR wght : LMatrix;
VAR eigVal : LMatrix;
VAR eof : LMatrix;
VAR pc : LMatrix;
allocAlsoVX : BOOLEAN;
VAR tvx : LMatrix;
VAR lvx : LMatrix;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE DeallocPCAResVars( VAR mu : LMatrix;
VAR wght : LMatrix;
VAR eigVal : LMatrix;
VAR eof : LMatrix;
VAR pc : LMatrix;
VAR tvx : LMatrix;
VAR lvx : LMatrix );
(************************************)
(*##### I/O of PCA results #####*)
(************************************)
PROCEDURE WritePCAResults( VAR outF : TextFile;
wrMu : BOOLEAN; mu : LMatrix;
wrWght : BOOLEAN; wght : LMatrix;
wrEigVal : BOOLEAN; eigVal : LMatrix;
wrEof : BOOLEAN; eof : LMatrix;
wrPc : BOOLEAN; pc : LMatrix;
wrTvx : BOOLEAN; tvx : LMatrix;
wrLvx : BOOLEAN; lvx : LMatrix;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
PROCEDURE ReadPCAResults( inFName : ARRAY OF CHAR;
tryOpenFile : BOOLEAN;
keepFileOpen : BOOLEAN;
rdMu : BOOLEAN; VAR mu : LMatrix;
rdWght : BOOLEAN; VAR wght : LMatrix;
rdEigVal : BOOLEAN; VAR eigVal : LMatrix;
rdEof : BOOLEAN; VAR eof : LMatrix;
rdPc : BOOLEAN; VAR pc : LMatrix;
rdTvx : BOOLEAN; VAR tvx : LMatrix;
rdLvx : BOOLEAN; VAR lvx : LMatrix;
VAR endOfFile : BOOLEAN;
VAR resCode : INTEGER;
VAR errTxt : ARRAY OF CHAR );
END PrinComp.