% adapted from npi_em_4.gss
% NPI .. EM Factor Model
% Example 4
% Impose NPI Constraint
% Model has the form
% pi(it) = n(t) + lamba'f(t) + u(it)
%
% This version of the program allows user determined unit root
% for each factor
%
%
%     Constraints and normalizations:
%     columns of lambda sum to zero
%     lambda is lower triangular
%     part of Q corresponding to f has the identity covariance
%     matrix
%

workspacedir='.\workspace\';

%% control parameters
nfac=3;                             % Total number of factors %
n_i1_vec=[1;1;0];                   % Indicator for I1 factors %
nar=4;                              % Number of AR lags, including first difference lags %
em_it_max=1000;                      % Number of EM Iterations %
ifirst=1;                           % = 1, on first run %
newdata=1;                          % =1, clean data again from its original format.
small = 1.0e-6;                     % Small Number %
experiments={'original','USdataNew','EUdata','USCPIData4','USCPIData6'}; %three possible cases;
experiment=5; %the second experiment

%construct model identifier string
id_str=['workspace_' num2str(nfac) '_' num2str(nar) '_' num2str(n_i1_vec)'];

%% -read in data & set up calendars-
if newdata==1 %if first time to run, Prepare data.
    dataPrep;
end

switch experiment
    case 1
        % original US data
        range='a1:ge190';
        xlsname='..\data\datat';
        yfirst = 1965.0;
        ylast = 1999.99;
        nfirst=1959;
        sheet=1;
        xdata=xlsread(xlsname,sheet,range);
    case 2
        % new US data
        range='d2:hz177';
        xlsname='..\data\tableExtractedDataUS.xls';
        yfirst = 1965.0;
        ylast = 1999.99;
        nfirst=1959;
        sheet=1;
        xdata=xlsread(xlsname,sheet,range);
        xdata=xdata';
        %xdata=xdata(1:190,:);
    case 3
        %EU data
        range='d2:ch75';
        xlsname='..\data\tableExtractedDataEU.xls';
        yfirst = 1996;
        ylast = 2015.99;
        nfirst=1996;
        sheet=1;
        xdata=xlsread(xlsname,sheet,range);
        xdata=xdata';
    case 4
        %US CPI data 4-character disaggregation
        range='d2:ci55';
        xlsname='..\data\tableExtractedDataUS4CPI.xls';
        yfirst = 1996;
        ylast = 2015.99;
        nfirst=1996;
        sheet=1;
        xdata=xlsread(xlsname,sheet,range);
        xdata=xdata';
    case 5
        %US CPI data 6-character disaggregation
        range='d2:ci76';
        xlsname='..\data\tableExtractedDataUS6CPI.xls';
        yfirst = 1996;
        ylast = 2015.99;
        nfirst=1996;
        sheet=1;
        xdata=xlsread(xlsname,sheet,range);
        xdata=xdata';
        
end


% xdata=xdata[.,1:20]; %
[nobs,nseries]=size(xdata);


%process data for outliers
thr=6;     % Threshold multiple for IQR %
% "Series, Number of outliers"; %

% clean data
xdata_adj=nan(nobs,nseries);
for i=1:nseries
    y=xdata(:,i);
    xdata_adj(:,i)=adjout(y,thr,3); %adjout is a user defined function for missing values.
end

%keep ~nan data
xdata_adj=xdata_adj(:,~isnan(sum(xdata_adj)));

xm=mean(xdata);
sm=std(xdata);
km=mean(xdata.^4)./(sm.^4);
xm_adj=mean(xdata_adj);
xdata_adj=xdata_adj-xm_adj;
sm_adj=std(xdata_adj);
km_adj=mean(xdata_adj.^4)./(sm_adj.^4);


xdata=xdata_adj;  % Use adjusted data %
bpdata=xdata; %backup data


%% -initial estimates of parameters-
nfac1=nfac-1;
if sum(n_i1_vec)>0
    %compute matrix for computing sum of coefficients for i(1)regressors
    sum_coef=0;
    
    for i=1:nfac
        if n_i1_vec(i)==1
            tmp=zeros(1,nfac*nar);
            for j=1:nar
                k=(j-1)*nfac;
                tmp(1,k+i)=1;
            end
            if size(sum_coef,2)==1
                sum_coef=tmp;
            else
                sum_coef=[sum_coef;tmp];
            end
        end
    end
    
    %compute matrix of constrants for each equation
    r_const=zeros(size(sum_coef,1),nfac);
    j=1;
    
    for i=1:nfac
        if n_i1_vec(i)==1
            r_const(j,i)=1;
            j=j+1;
        end
    end
end

if ifirst==1
    %st&ardize data & form principal components
    npi=mean(bpdata,2);
    r=bpdata-npi;
    
    
    [f1,lam1,eigval,ssr_co]=pc_factor(r,nfac1);
    tmp=lam1(1:nfac1,:)*lam1(1:nfac1,:)';
    ct=chol(tmp)';
    hin=ct\lam1(1:nfac1,:);
    
    lam1=lam1/hin;
    f1=f1*hin';      % Normalizes f to have unit variance; upper triangle of lambda to be zero %
    f=[npi f1];
    lam=[ones(nseries,1) lam1];
    alpha=zeros(nseries,1);
    %get uniqueness properties
    u=bpdata-f*lam';
    u1=u(2:end,:);
    u2=u(1:end-1,:);
    u1u1=sum(u1.^2)';
    u1u2=sum(u1.*u2)';
    u2u2=sum(u2.^2)';
    rho=u1u2./u2u2;
    var_eps=(u1u1-u1u2.*rho)/size(u1,1);
    x=f;
    if nar>0
        y=x(nar+1:size(x,1),:);
        z=nan(size(y,1),nar*nfac);
        
        for i=1:nar
            jj=(i-1)*nfac;
            z(:,jj+1:jj+nfac)=x(nar+1-i:size(x,1)-i,:);
        end
        zzi=inv(z'*z);
        phi=zzi*z'*y;
        if sum(n_i1_vec)>0
            phi=phi-zzi*sum_coef'*(inv(sum_coef*zzi*sum_coef'))*(sum_coef*phi-r_const);
        end
        e=y-z*phi;
        var_e=(e'*e)/size(z,1);
    else
        var_e=(x'*x)/size(x,1);
    end
    
    var_e(2:nfac,2:nfac)=eye(nfac1);   % Normalization %
    em_it_completed=0;
    npi_llf_sum=[0 0];
    
    
elseif ifirst==0
    load([workspacedir id_str]);
end


%% -em iteration
%initial set up of system matrices
nobs=size(bpdata,1)-1;
nstate=nfac*(nar+1);


tstart=tic;
for em_it=1+em_it_completed: em_it_max+em_it_completed
    f=zeros(nstate,nstate);
    f(nfac+1:nstate,1:nstate-nfac)=eye(nstate-nfac);
    q=zeros(nstate,nstate);
    h=zeros(nstate,nseries);
    p1t=zeros(nobs+1,nstate*nstate);
    p2t=zeros(nobs+1,nstate*nstate);
    p3t=zeros(nobs+1,nstate*nstate);
    x1t=zeros(nobs+1,nstate);
    x2t=zeros(nobs+1,nstate);
    x3t=zeros(nobs+1,nstate);
    %-set up data & update system matrices-
    y=bpdata(2:end,:)-bpdata(1:end-1,:).*rho'-alpha';
    h(1:nfac,:)=lam';
    h(nfac+1:2*nfac,:)=-(rho.*lam)';
    r=diag(var_eps);
    f(1:nfac,1:nar*nfac)=phi';
    q(1:nfac,1:nfac)=var_e;
    
    %kalman filter-
    llf=0;
    x1=zeros(nstate,1);
    p1=small*eye(nstate);
    x1t(1,:)=x1';
    p1t(1,:)=(p1(:))';
    for t=1:size(y,1)
        yt=y(t,:)';
        [x1,p1,x2,p2,llft]=kfilt_rdiag_llf(yt,x1,p1,h,f,var_eps,q);
        x1t(t+1,:)=x1';
        p1t(t+1,:)=(p1(:))';
        x2t(t+1,:)=x2';
        p2t(t+1,:)=(p2(:))';
        llf=llf+llft;
    end
    
    em_it
    toc(tstart)
    disp(['likelihood value: ' num2str(llf)]);
    
    %kalman smoother
    x3t(nobs+1,:)=x1';
    p3t(nobs+1,:)=(p1(:))';
    x3=x1;p3=p1;
    
    for t=nobs:-1:2
        x2=x2t(t+1,:)';
        p2=p2t(t+1,:)';
        p2=reshape(p2,nstate,nstate);
        x1=x1t(t,:)';
        p1=p1t(t,:)';
        p1=reshape(p1,nstate,nstate);
        [x3,p3]=Ksmooth(x1,x2,x3,p1,p2,p3,f);
        x3t(t,:)=x3';
        p3t(t,:)=(p3(:))';
    end
    %construct various moment matrices
    zm_ss=zeros(nstate,nstate);      % Mean part of Matrix for State Vector %
    zv_ss=zeros(nstate,nstate);      % Variance part of Matrix for State Vector %
    for t=1:nobs
        st=x3t(t+1,:)';
        p3=p3t(t+1,:)';
        p3=reshape(p3,nstate,nstate);
        zm_ss=zm_ss+st*st';
        zv_ss=zv_ss+p3;
    end
    z_ss=zm_ss+zv_ss;
    fac=x3t(2:nobs+1,1:nfac);
    fac_lag=x3t(2:nobs+1,nfac+1:2*nfac);
    %construct updated estimates of state equation
    w_ss=z_ss;
    if nar>0
        yy=w_ss(1:nfac,1:nfac);
        xx=w_ss(nfac+1:nfac+nfac*nar,nfac+1:nfac+nfac*nar);
        xy=w_ss(nfac+1:nfac+nfac*nar,1:nfac);
        xxi=inv(xx);
        phi_em=xxi*(xy);
        if sum(n_i1_vec)>0
            phi_em=phi_em-xxi*sum_coef'*(inv(sum_coef*xxi*sum_coef'))*(sum_coef*phi_em-r_const);
        end
        uu=yy-xy'*phi_em-phi_em'*xy+phi_em'*xx*phi_em;
    elseif nar==0
        uu=w_ss(1:nfac,1:nfac);
    end
    var_e_em=uu/nobs;
    
    if nar > 0
        phi=phi_em;
    end
    % var_e=var_e_em; %
    %notenormalization has factors except first with identity covariance matrix ...
    % impose normalization
    s11=var_e_em(1,1);
    s22=var_e_em(2:nfac,2:nfac);
    s12=var_e_em(1,2:nfac);
    bhat=s12/s22;
    s1_2=s11-bhat*s22*bhat';
    sighat_11=s1_2+bhat*bhat';
    sighat_12=bhat;
    var_e(1,1)=sighat_11;
    var_e(1,2:nfac)=sighat_12;
    var_e(2:nfac,1)=sighat_12';
    var_e(2:nfac,2:nfac)=eye(nfac1);
    
    
    
    
    %update estimates of lambda
    lam_em=zeros(nseries,nfac);
    alpha_em=zeros(nseries,1);
    vb_save=zeros(nseries,nfac1*nfac1);
    for i=1:nseries
        j=min([nfac;i+1]);
        yi=bpdata(2:end,i)-rho(i)*bpdata(1:end-1,i);
        xi=[ones(nobs,1) (fac(:,1:j)-rho(i)*fac_lag(:,1:j))];
        xy=xi'*yi;
        xx=xi'*xi;
        g=zeros(nfac,nstate);
        g(:,1:nfac)=eye(nfac);
        g(:,nfac+1:2*nfac)=-rho(i)*eye(nfac);
        tmp=g*z_ss*g';
        xx(2:j+1,2:j+1)=tmp(1:j,1:j);
        xxi=inv(xx);
        b=xxi*xy;
        %impose constraint that b(2)=1
        tmp=zeros(j+1,1);
        tmp(2)=1;
        tmp=xxi*tmp;
        b=b-tmp*(b(2)-1)/xxi(2,2);
        vb=(xxi-tmp*tmp'/xxi(2,2))*var_eps(i);
        tmp=zeros(nfac1,nfac1);
        tmp(1:j-1,1:j-1)=vb(3:size(vb,2),3:size(vb,2));
        vb_save(i,:)=tmp(:)';
        alpha_em(i)=b(1);
        lam_em(i,1:j)=b(2:end)';
    end
    %impose constraint that cols of lambda sum to 0
    tmp=sum(vb_save);
    tmp=reshape(tmp,nfac1,nfac1);
    
    sum_lam=sum(lam_em(:,2:nfac));
    for i=1:nseries
        vi=reshape(vb_save(i,:),nfac1,nfac1);
        lam_em(i,2:nfac)=lam_em(i,2:nfac)-sum_lam/tmp*vi;
    end
    lam=lam_em;
    alpha=alpha_em;
    %update estimate of rho
    rho_em=zeros(nseries,1);
    var_eps_em=zeros(nseries,1);
    for i=1:nseries
        const=alpha(i)/(1-rho(i));
        u=bpdata(2:end,i)-const*ones(size(bpdata,1)-1,1)-fac*lam(i,:)';
        ulag=bpdata(1:end-1,i)-const*ones(size(bpdata,1)-1,1)-fac_lag*lam(i,:)';
        uu=u'*u;
        ulul=ulag'*ulag;
        uul=u'*ulag;
        uu=uu+lam(i,:)*zv_ss(1:nfac,1:nfac)*lam(i,:)';
        ulul=ulul+lam(i,:)*zv_ss(nfac+1:2*nfac,nfac+1:2*nfac)*lam(i,:)';
        uul=uul+lam(i,:)*zv_ss(1:nfac,nfac+1:2*nfac)*lam(i,:)';
        rho_em(i)=uul/ulul;
        var_eps_em(i)=(uu-uul*rho_em(i))/(size(u,1));
    end
    rho=rho_em;
    var_eps=var_eps_em;
    
end

%load PCE data for figures
switch experiment
    case 1
        % Quarterly Data 1958:1 -- 2006:4
        [~,~,pce]=xlsread('..\data\pcepi',1,'a1:a196');
        for i=1:length(pce)
            if pce{i}=='NA'
                pce{i}=nan;
            end
        end
        nmac=length(pce);
        cal_mac=1958:0.25:1958+(nmac-1)*0.25;
        ns_i=9;  % Series to graph %
    case 2
        [~,~,pce]=xlsread('..\data\PCEUS.xls',1,'d3:ia3');
        pce=pce';
        nmac=length(pce);
        cal_mac=nfirst:0.25:nfirst+(nmac-1)*0.25;
        ns_i=12;  % Series to graph %
    case 3
        [~,~,pce]=xlsread('..\data\HCIPEU.xls',1,'d2:ci2');
        pce=pce';
        nmac=length(pce);
        cal_mac=nfirst:0.25:nfirst+(nmac-1)*0.25;
        ns_i=33;  % Series to graph %
    case 4
        [~,~,pce]=xlsread('..\data\CPIUS.xls',1,'d2:ib2');
        pce=pce';
        nmac=length(pce);
        cal_mac=nfirst:0.25:nfirst+(nmac-1)*0.25;
        ns_i=9;  % Series to graph %
    case 5
        [~,~,pce]=xlsread('..\data\CPIUS.xls',1,'d2:ib2');
        pce=pce';
        nmac=length(pce);
        cal_mac=nfirst:0.25:nfirst+(nmac-1)*0.25;
        ns_i=9;  % Series to graph %
end
pce=cell2mat(pce);
d_pcep=pce;
d_pcep(2:nmac)=400*log(pce(2:nmac)./pce(1:nmac-1));
d_pcep(1)=nan;

%save results
em_it_completed=em_it_completed+em_it_max;
npi_llf_sum=[npi_llf_sum; ([em_it_completed llf])];
save([workspacedir id_str],'em_it_completed','npi_llf_sum','alpha','lam',...
    'rho','phi','var_eps','var_e','x3t','p3t','f','xdata','bpdata'...
    ,'nfirst','yfirst','ylast','experiment','experiments','cal_mac','d_pcep','ns_i');

%% plot
figure3;