偏最小二乘法PLS回归NIPALS算法的Matlab程序及例子②
function [T,P,W,Wstar,U,b,C,B_pls,...
Bpls_star,Xori_rec,Yori_rec,...
R2_X,R2_Y]=PLS_nipals(X,Y,nfactor)
% USAGE: [T,P,W,Wstar,U,b,C,Bpls,Bpls_star,Xhat,Yhat,R2X,R2Y]=PLS_nipals(X,Y,nfact)
% PLS regression NIPALS algorithm PLS回归NIPALS算法
% Compute the PLS regression coefficients PLS回归系数的计算
% X=T*P' Y=T*B*C'=X*Bpls X and Y being Z-scores
% B=diag(b)
% Y=X*Bpls_star with X being augmented with a col of ones
% and Y and X having their original units
% T'*T=I (NB normalization <> SAS)
% W'*W=I
%
% Test for PLS regression
% Herve Abdi November 2002/rev November 2004
%
%
% Version with T, W, and C being unit normalized
% U, P are not
% nfact=number of latent Variables to keep 保持潜在变量的数量
% default = rank(X)
X_ori=X;
Y_ori=Y;
if exist('nfactor')~=1;nfactor=rank(X);end
M_X=mean(X);
M_Y=mean(Y);
S_X=std(X);
S_Y=std(Y);
X=zscore(X);
Y=zscore(Y);
[nn,np]=size(X) ;
[n,nq]=size(Y) ;
if nn~= n;
error(['Incompatible # of rows for X and Y']);
end
% Precision for convergence
epsilon=eps;
% # of components kepts
% Initialistion
% The Y set
U=zeros(n,nfactor);
C=zeros(nq,nfactor);
% The X set
T=zeros(n,nfactor);
P=zeros(np,nfactor);
W=zeros(np,nfactor);
b=zeros(1,nfactor);
R2_X=zeros(1,nfactor);
R2_Y=zeros(1,nfactor);
Xres=X;
Yres=Y;
SS_X=sum(sum(X.^2));
SS_Y=sum(sum(Y.^2));
for l=1:nfactor
t=normaliz(Yres(:,1));
t0=normaliz(rand(n,1)*10);
u=t;
nstep=0;
maxstep=100;
while ( ( (t0-t)'*(t0-t) > epsilon/2) & (nstep < maxstep));
nstep=nstep+1;
disp(['Latent Variable #',int2str(l),' Iteration #:',int2str(nstep)])
t0=t;
w=normaliz(Xres'*u);
t=normaliz(Xres*w);
% t=Xres*w;
c=normaliz(Yres'*t);
u=Yres*c;
end;
disp(['Latent Variable #',int2str(l),', convergence reached at step ',...
int2str(nstep)]);
%X loadings
p=Xres'*t;
% b coef
b_l=((t'*t)^(-1))*(u'*t);
b_1=u'*t;
% Store in matrices
b(l)=b_l;
P(:,l)=p;
W(:,l)=w;
T(:,l)=t;
U(:,l)=u;
C(:,l)=c;
% deflation of X and Y
Xres=Xres-t*p';
Yres=Yres-(b(l)*(t*c'));
R2_X(l)=(t'*t)*(p'*p)./SS_X;
R2_Y(l)=(t'*t)*(b(l).^2)*(c'*c)./SS_Y;
end
%Yhat=X*B_pls;
X_rec=T*P';
Y_rec=T*diag(b)*C';
%Y_residual=Y-Y_rec;
%% Bring back X and Y to their original units
%
Xori_rec=X_rec*diag(S_X)+(ones(n,1)*M_X);
Yori_rec=Y_rec*diag(S_Y)+(ones(n,1)*M_Y);
%Unscaled_Y_hat=Yhat*diag(S_Y)+(ones(n,1)*M_Y);
% The Wstart weights gives T=X*Wstar
%
Wstar=W*inv(P'*W);
B_pls=Wstar*diag(b)*C';
%% Bpls_star Y=[1|1|X]*Bpls_star
%Bpls_star=[M_Y;[-M_X;eye(np,np)]*diag(S_X.^(-1))*B_pls*diag(S_Y)]
Bpls_star=[[-M_X;eye(np,np)]*diag(S_X.^(-1))*B_pls*diag(S_Y)];
Bpls_star(1,:)=Bpls_star(1,:)+M_Y;
%%%%%%%%%%%%% functions Here %%%%%%%%%%%%%%%%%%%%%%%
function [f]=normaliz(F);
%USAGE: [f]=normaliz(F);
% normalize send back a matrix normalized by column
% (i.e., each column vector has a norm of 1)
[ni,nj]=size(F);
v=ones(1,nj) ./ sqrt(sum(F.^2));
f=F*diag(v);
function z=zscore(x);
% USAGE function z=zscore(x);
% gives back the z-normalization for x
% if X is a matrix Z is normalized by column
% Z-scores are computed with
% sample standard deviation (i.e. N-1)
% see zscorepop
[ni,nj]=size(x);
m=mean(x);
s=std(x);
un=ones(ni,1);
z=(x-(un*m))./(un*s);
应用例子
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% example_pls.m: PLS example
% RM3 Fall 2004
% November 16 2004
%
% This script shows how to run
% a Partial least square regression
% (PLS).
% Need Programs: PLS_nipals plotxyha
% The example is the
% Wine example from Abdi H. (2003)
% See www.utd.edu/~herve
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear
clc;
disp(['Example of a PLS program. See Abdi H. (2003)']);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%************************************************************
%% -> This is your title.
%% -> Change it to fit your data
ze_title=['PLS. Wines. '];
%% **********************************************************
%% This is the data matrix of the Predictors (IV)
%% -> Change it for your example
X=[...
7 7 13 7
4 3 14 7
10 5 12 5
16 7 11 3
13 3 10 3
];
%%% get the # of rows and columns %%%%%%%%%%%%%%%%%%%%%%%%%%
[nI,nJ]=size(X);
%************************************************************
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% -> These are your predictors names.
% -> Change them to fit your data
% You need as many names as mcX has columns
n=0;
%
n=n+1;nom_x(n)={' Price'};
n=n+1;nom_x(n)={' Sugar'};
n=n+1;nom_x(n)={' Alcohol'};
n=n+1;nom_x(n)={' Acidity'};
%%% Test the # of colums names
if nJ~=n;
erreur(['Error -> (Wrong # of column names)!']);
end
%%***********************************************************
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% -> These are your observations names.
% -> Change them to fit your data
l=0;
l=l+1;nom_r{l}={'W_1'};
l=l+1;nom_r{l}={'W_2'};
l=l+1;nom_r{l}={'W_3'};
l=l+1;nom_r{l}={'W_4'};
l=l+1;nom_r{l}={'W_5'};
if nI~=l;
erreur(['Error -> (Wrong # of row names)!']);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% **********************************************************
%% This is the data matrix of the Dependant Variables (DV)
%% -> Change it for your example
Y=[...
14 7 8
10 7 6
8 5 5
2 4 7
6 2 4
];
%%% get the # of rows and columns %%%%%%%%%%%%%%%%%%%%%%%%%%
[nI2,nK]=size(Y);
%************************************************************
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% -> These are your predictors names.
% -> Change them to fit your data
% You need as many names as mcX has columns
m=0;
%
m=m+1;nom_y(m)={' Hedonic'};
m=m+1;nom_y(m)={' Meat'};
m=m+1;nom_y(m)={' Dessert'};
%%% Test the # of colums names
if nK~=m;
erreur(['Error -> (Wrong # of column names)!']);
end
%%***********************************************************
%%%%%%%%%%%% Call PLS_nipals
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
nfact2keep=2 ;
%%% nfact gives the number of latent variables
%%% the default is equal to the rank of X
[T,P,W,Wstar,U,b,C,Bpls,Bpls_star,Xhat,Yhat,R2X,R2Y]=...
PLS_nipals(X,Y,nfact2keep)
%%%%%%%%%%%% Plot the Observations (T vectors)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
percent_of_inertiaX=100*R2X;
percent_of_inertiaY=100*R2Y;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% The axes to keep for the plots
axe_horizontal=1;
axe_vertical=2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
le_taux=[...
' {\tau_X}_',int2str(axe_horizontal),'=',...
int2str(percent_of_inertiaX(axe_horizontal)),'%,', ...
' {\tau_X}_',int2str(axe_vertical),'=',...
int2str(percent_of_inertiaX(axe_vertical)),'%', ...
' {\tau_Y}_',int2str(axe_horizontal),'=',...
int2str(percent_of_inertiaY(axe_horizontal)),'%,', ...
' {\tau_Y}_',int2str(axe_vertical),'=',...
int2str(percent_of_inertiaY(axe_vertical)),'%'];
%%%% Plot here %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
figure(1);clf
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Now plot the Observations Scores T
%%
ze_tRC=[ze_title,' Observations (T).'];
titre=[ze_tRC, le_taux];
plotxyha(T,1,2,titre,nom_r');
axis('equal') ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Now plot the X Scores W
%%
figure(2);clf
ze_tRC=[ze_title,' Predictors (W).'];
titre=[ze_tRC, le_taux];
plotxyha(W,1,2,titre,nom_x');
axis('equal') ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Now plot the Y Scores U
%%
figure(3);clf
ze_tRC=[ze_title,' DV (C -> Non Ortho).'];
titre=[ze_tRC, le_taux];
plotxyha(C,1,2,titre,nom_y');
% axis('equal') ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%