/**********************************************************
Purpose: trim or winsorize SAS dataset to remove the impact from extreme values
Input
dsetin : dataset to winsorize/trim
byvar : define subset to winsorize/trim,e.g. 'date'. type 'none' for the whole dataset
type : 'delete' or 'winsor' (delete will trim, winsor will winsorize
vars : subsetting variables to winsorize/trim on; type 'none for no byvar
pctl : the percenagte of left and right tails to trim/winsorize
Output
dsetout : dataset to output with winsorized/trimmed values
************************************************************/
proc univariate data = xtemp noprint;
by &byvar;
var &vars;
output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
run;
data &dsetout;
merge xtemp xtemp_pctl;
by &byvar;
array trimvars{&xn} &vars;
array trimvarl{&xn} &varL;
array trimvarh{&xn} &varH;
do xi = 1 to dim(trimvars);
%if &type = winsor %then %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
end;
%end;
%else %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then delete;
if (trimvars{xi} > trimvarh{xi}) then delete;
end;
%end;
这是SAS的方法
data a;
do i=1 to 200;
x=ranuni(0);
output;
end;
run;
proc univariate data=a noprint;
var x;
output out=b pctlpts=1 pctlpts=99 pctlpre=p_;
run;
data c;
set a;
if (_n_=1) then set b;
if x gt p_99 then y=p_99;
else if x lt p_1 then x=p_1;
else y=x;
run;