data tem;
set tem;
array m[&m] a1-a&m;
retain a1-a&m;
by code day;
if first.code then count=.;
count+1;
if count<=&m then m[count]=x;
else do;
do i=1 to &n;
m[i]=m[i+1];
end;
m[&m]=x;
end;
Hope answer your question and get the rewards.
A general method.
%let n=2;
%let m=%eval(&n+1);
data tem;
input code day event x;
datalines;
1 02 0 1.9
1 03 0 3.3
1 04 1 -4
1 05 1 18
2 01 0 6
2 02 0 4
2 03 1 7
;
run;
proc sort data=tem;
by code day;
run;
data tem;
set tem;
array m[&m] a1-a&m;
retain a1-a&m;
by code day;
if first.code then do;
count=.;
do i=1 to &m;
m[i]=.;
end;
end;
count+1;
if count<=&m then m[count]=x;
else do;
do i=1 to &n;
m[i]=m[i+1];
end;
m[&m]=x;
end;
do j=1 to &n;
sum=sum+m[j];
end;
avg=sum/&n;
run;
data out;
set tem;
if event=1;
run;
The SQL method code is short but have a problem for some special situation.
for example:
day=04 event=1 before this record there are two records before it. if we want to calculate the 3 records before it. because there are only two records, the mean should be missing but the SQL code will just calculate the mean of two records before it.
This example is just calculate the moving average. Let's say 10 days average, before 10 days there are no 10 days average.
just for discussion. SQL sometimes can give a quick answer. but we need to think some special situation and to make the code robust and strong
right, It depends what you want. My program actually can do both. you just need a little bit change in the part of sum and mean calculation.
Let say a group of data
day 1 2 3 4 5 6 7 8 9 10
X 3 5 6 8 6 9 2 10 11 12
if calculate 5 days mean
the data sould looks like:
day 1 2 3 4 5 6 7 8 9 10
X 3 5 6 8 6 9 2 10 11 12
mn . . . . . (3+5+6+8+6)/5 on day 5
(5+6+8+6+9)/5 on day 6 etc
if you want just calculate whatever data within 5 days before(like missing data or only 2 or 3 data
you just calculate of those data .
*间隔的天数;
%let interval=2;
data a;
input code day event x;
datalines;
1 03 0 3.3
1 04 1 -4
1 05 1 18
2 01 0 6
2 02 0 4
2 03 1 7
;
run;
proc sql;
create table result as
select code,day,event,x,
(select std(x)
from a t2
where t2.code=t1.code and t2.day<t1.day and t2.day>=t1.day-&interval)
*(case (exists (select 1 from a t3 where t3.code=t1.code and t3.day=t1.day-&interval))
when 0 then . else 1 end)
as mean_&interval._includemissing
from a t1
where event=1
;
quit;
The new SQL code still doesnot cover the situation if one of the data in the middle is missing.
If you consider and think thoroughly, the SQL will become complicated and run slowly.
anyway proc sql and data step each has strength and weakness.
My opinion is data step is more stronger but proc sql sometimes is good for quick and dirty solution and sometimes proc sql has strength for some problems solving.
as to std, from statistics, you know how to calculate the standard deviation. it is
square root of [sum (x-mean)*(x-mean)/&n], so you can add code for std calculation.
* the program includine mean and standar deviation;
revised total program.
Hope answer your question and get the rewards.
A general method.
%let n=2;
%let m=%eval(&n+1);
data tem;
input code day event x;
datalines;
1 02 0 1.9
1 03 0 3.3
1 04 1 -4
1 05 1 18
2 01 0 6
2 02 0 4
2 03 1 7
;
run;
proc sort data=tem;
by code day;
run;
data tem;
set tem;
array m[&m] a1-a&m;
retain a1-a&m;
by code day;
if first.code then do;
count=.;
do i=1 to &m;
m[i]=.;
end;
end;
count+1;
if count<=&m then m[count]=x;
else do;
do i=1 to &n;
m[i]=m[i+1];
end;
m[&m]=x;
end;
do j=1 to &n;
sum=sum+m[j];
end;
avg=sum/&n;
do j=1 to &n;
sumsq=sumsq+(m[j]-avg)**2;
end;
std=sumsq/&n;
*间隔的天数;
%let interval=2;
data a;
input code day event x;
datalines;
1 02 0 .
1 03 0 3.3
1 04 1 .
1 05 1 18
2 01 0 6
2 02 0 4
2 03 1 7
;
run;
proc sql;
create table result as
select code,day,event,x,
(select std(x)
from a t2
where t2.code=t1.code and t2.day<t1.day and t2.day>=t1.day-&interval)
*(case (exists (select 1 from a t3 where t3.code=t1.code and t3.day=t1.day-&interval))
when 0 then . else 1 end)
*(case t1.x when . then . else 1 end)
as mean_&interval._includemissing
from a t1
where event=1
;
quit;