The first part is simulation data in which,
1) 4 time series (y1 y2 y3 y4) are generated with ar(1) error process each,
2) there is a hidden factor x which drives y1, y2, y3, y4
The second part is SAS/IML program using Kalman's filter call to make a likelihood estimation.
We only observe ys but not x. The state variables are x + ar processes. We want to estimate all coefficients in the simulation model. Note some variables are not identifiable.
data simudata;
n=500;
seed=1230;
e10=rannor(seed);
e20=rannor(seed);
e30=rannor(seed);
e40=rannor(seed);
x0=0;
do i=-200 to n;
x=0.9*x0+2*rannor(seed);
e1=0.95*e10+rannor(seed);
y1=2.5+0.95*x+e1;
e2=0.9*e20+rannor(seed);
y2=2.0+1.1*x+e2;
e3=0.85*e30+rannor(seed);
y3=1.5+1.25*x+e3;
e4=0.8*e40+rannor(seed);
y4=2.5+0.7*x+e4;
if i>0 then output;
x0=x; e10=e1; e20=e2; e30=e3; e40=e4;
end;
keep y1-y4 x;
run;
proc corr data=simudata;
var y1-y4 x;
run;
proc iml;
start lik(pr) global(y,z0,vz0);
f=diag(pr[1:5]);
a = j(nrow(f),1,0);
h=pr[6:9]||I(4);
b =pr[10:13] ;
var1=(pr[14:18]// {0.000,0.000,0.000,0.000} )##2;
var= diag(var1);
nz = nrow(f);
n = nrow(y);
k = ncol(y);
*print f a h b var z0 vz0;
const = k*log(8*atan(1));
call kalcvf(pred,vpred,filt,vfilt,y,0,a,f,b,h,var,z0,vz0);
et = y - pred*h` - repeat(b`,n);
*et = y - pred*h`;
sum1 = 0;
sum2 = 0;
do i = 1 to n;
vpred_i = vpred[(i-1)*nz+1:i*nz,];
et_i = et[i,];
ft = h*vpred_i*h` + var[nz+1:nz+k,nz+1:nz+k];
sum1 = sum1 + log(det(ft));
sum2 = sum2 + et_i*inv(ft)*et_i`;
end;
*return(-.5*const-.5*(sum1+sum2)/n);
return(-1*(sum1+sum2));
finish;
use simudata var{y1 y2 y3 y4};
read all into y;
pr={ 0.9
0.8 0.8 0.8 0.8
0.95 0.9 0.9 0.9
2 2 2 2
1.5
1 1 1 1
};
z0 = j(1,5,0);
vz0= I(5);
logl = lik(pr);
print logl;
optn = {1 2 };
con={ . . . . . 0.95 . . . 2.5 . . . . . . . .,
. . . . . 0.95 . . . 2.5 . . . . . . . .
};
call nlpdd(rc, xr,"lik", pr , optn, con) ;
logl = lik(xr);
print logl;
quit;