function [FPE,deff,varest,H] = nnfpe(method,NetDef,W1,W2,U,Y,NN,trparms,Chat)
%  NNFPE
%  ----- 
%           This function calculates Akaike's final prediction error
%           estimate of the average generalization error for network
%           models generated by NNARX, NNOE, NNARMAX1+2, or their recursive
%           counterparts.
%
%  [FPE,deff,varest,H] = nnfpe(method,NetDef,W1,W2,U,Y,NN,trparms,Chat)
%  produces the final prediction error estimate (fpe), the effective number
%  of weights in the network if it has been trained with weight decay,
%  an estimate of the noise variance, and the Gauss-Newton Hessian.
%  
%  INPUT:
%  See the function used for creating the model. The argument
%  'Chat' should only be included if method='nnarmax1'.
%  
%  OUTPUT:
%  FPE    : The final prediction error estimate 
%  deff   : The effective number of weights
%  varest : Estimate of the noise variance
%  H      : The Gauss-Newton Hessian
%
%  REFERENCE:
%       J. Larsen & L.K. Hansen:
%       "Generalization Performance of Regularized Neural Network Models"
%        Proc. of the IEEE Workshop on Neural networks for Signal Proc. IV,
%        Piscataway, New Jersey, pp.42-51, 1994
 
%  Programmed by : Magnus Norgaard, IAU/IMM, Technical Univ. of Denmark
%  LastEditDate  : Jan. 15, 2000

%----------------------------------------------------------------------------------
%--------------             NETWORK INITIALIZATIONS                   -------------
%----------------------------------------------------------------------------------
[outputs,N] = size(Y);                  % # of outputs and # of data
[hidden,inputs] = size(W1);             % # of hidden units 
inputs=inputs-1;                        % # of inputs
parameters1= hidden*(inputs+1);         % # of input-to-hidden weights
parameters2= outputs*(hidden+1);        % # of hidden-to-output weights
parameters = parameters1 + parameters2; % Total # of weights
 if strcmp(method,'nnarmax1') | strcmp(method,'nnrarmx1'),
  parameters12 = parameters;
  nc = length(Chat)-1;
  parameters = parameters12+nc;
                                        % Parameter vector containing all weights
  theta = [reshape(W2',parameters2,1) ; reshape(W1',parameters1,1) ; Chat(2:nc+1)'];
else
  theta = [reshape(W2',parameters2,1) ; reshape(W1',parameters1,1)];
end                                    
theta_index = find(theta);              % Index to weights<>0
theta_red = theta(theta_index);         % Reduced parameter vector
reduced  = length(theta_index);         % The # of parameters in theta_red

if nargin<8 | isempty(trparms) % Default training parameters
  trparms = settrain;
  skip=trparms.skip+1;
  D       = trparms.D;
else                                    % User specified values
  if ~isstruct(trparms),
     error('''trparms'' must be a structure variable.');
  end
  if ~isfield(trparms,'skip')
     trparms= settrain(trparms,'skip','default');
  end
  skip=trparms.skip+1;
  if ~isfield(trparms,'D')
     trparms = settrain(trparms,'D','default');
     D = trparms.D;
  else
    if length(trparms.D)==1,              % Scalar weight decay parameter
      D = trparms.D(ones(1,reduced))';      
    elseif length(trparms.D)==2,          % Two weight decay parameters
      D = trparms.D([ones(1,parameters2) 2*ones(1,parameters1)])';
      D = D(theta_index);
    elseif length(trparms.D)>2,           % Individual weight decay
      D = trparms.D(:);
    end
  end
end
D = D(:);

if nargin<9, Chat=[]; end;


% >>>>>>>>>>>>>>>>>>>>>>  GET NETWORK OUTPUT AND GRADIENT   <<<<<<<<<<<<<<<<<<<<<<
[PSI,E] = getgrad(method,NetDef,NN,W1,W2,Chat,Y,U);
N = length(E);
N2 = N-skip+1;
PI = E(skip:N)*E(skip:N)'/(2*N2);      % Normalized SSE
PI_vector(reduced) = PI;               % Collect PI in vector
PSI_red = PSI(theta_index,skip:N);

        
% >>>>>>>>>>>>>>>>>>>>>>>>    COMPUTE THE HESSIAN MATRIX   <<<<<<<<<<<<<<<<<<<<<<
% --- Calculate the Hessian matrix ---
PSI_red = PSI(theta_index,:);
R     = PSI_red*PSI_red';
H     = R;
index3   = 1:(reduced+1):(reduced^2);       % A third useful vector
H(index3) = H(index3) + D';                 % Add weight deacy to diagonal

% --- FPE in case of no weight decay ---
if D==0,
  FPE  = PI*(N2 + reduced) / (N2 - reduced);
  deff = reduced;
  varest = 2*N*PI/(N-reduced);
else

  % --- FPE in case of weight decay ---
  H_inv  = inv(H);                            % Inverse Hessian
  RHinv  = R*H_inv;
  Dmat   = diag(D);
  gamma1 = trace(RHinv*RHinv);                % Effective # of parameters
  gamma2 = trace(RHinv);        
  %gamma3 = theta(theta_index)'*Dmat*H_inv*RHinv*Dmat*theta(theta_index)/N;
  %varest = (2*N2*PI-N2*gamma3) / (N2 + gamma1 - 2*gamma2);
  %FPE    = (varest*(1+gamma1/N2) + gamma3)/2;  % FPE estimate
  varest  = 2*N2*PI/ (N2 + gamma1 - 2*gamma2);
  FPE = (N2+gamma1)*PI/(N2+gamma1-2*gamma2);  % FPE estimate
  deff = gamma1;                              % Effective # of parameters
end


