一个参考
proc means data=train nopoint;
var variable;
output out=outlier p25=p25 p75=p75; /*p25,p75分别是上四分位数,下四分位数*,outlier为离群值即异常值/
run;
data a;
set outlier;
IQR3=3*(p75-p25);/*四分位数差的三倍,即离群值的范围*/
call symputx('IQR3',IQR3); /*将变量变成宏*/
call symputx('p75',p75);
call symputx('p25',p25);
run;
data outlier1;
set train;
if(&p25-&IQR3)<=&var<=(&p75+&IQR3) then outlier=0;
else outlier=1; /*观测值在外界之内,将outlier标识为0,在之外,则标识为1*/
run;
data outlier2;
set outlier1;
where outlier=1;
run;