昨晚把以前的代码整理了一下,你参考一下:
data Remission;
input remiss cell smear infil li blast temp;
label remiss='Complete Remission';
datalines;
1 .8 .83 .66 1.9 1.1 .996
1 .9 .36 .32 1.4 .74 .992
0 .8 .88 .7 .8 .176 .982
0 1 .87 .87 .7 1.053 .986
1 .9 .75 .68 1.3 .519 .98
0 1 .65 .65 .6 .519 .982
1 .95 .97 .92 1 1.23 .992
0 .95 .87 .83 1.9 1.354 1.02
0 1 .45 .45 .8 .322 .999
0 .95 .36 .34 .5 0 1.038
0 .85 .39 .33 .7 .279 .988
0 .7 .76 .53 1.2 .146 .982
0 .8 .46 .37 .4 .38 1.006
0 .2 .39 .08 .8 .114 .99
0 1 .9 .9 1.1 1.037 .99
1 1 .84 .84 1.9 2.064 1.02
0 .65 .42 .27 .5 .114 1.014
0 1 .75 .75 1 1.322 1.004
0 .5 .44 .22 .6 .114 .99
1 1 .63 .63 1.1 1.072 .986
0 1 .33 .33 .4 .176 1.01
0 .9 .93 .84 .6 1.591 1.02
1 1 .58 .58 1 .531 1.002
0 .95 .32 .3 1.6 .886 .988
1 1 .6 .6 1.7 .964 .99
1 1 .69 .69 .9 .398 .986
0 1 .73 .73 .7 .398 .986
;
run;
%let pi1=.2;/*总体先验概率*/
/* 计算样本的先验概率:*/
proc SQL noprint;
select mean(remiss) into :rho1 from Remission;
quit;
%put &rho1;
%let selected =
cell smear infil li blast temp
;
/*1.全模型训练*/
ods listing close;
ods output bestsubsets=score;
proc logistic data=Remission des;
model remiss=&selected
/ selection=SCORE best=2;
run;
ods listing;
proc print data=score(obs=12);
run;
/*2.全模型评估*/
proc sql noprint;
select variablesinmodel into :inputs1 - :inputs99999
from score;
select NumberOfVariables into :ic1 - :ic99999
from score;
quit;
%put &inputs1;
%put &ic1;
%let lastindx = &SQLOBS;
%let rho0 = %sysevalf(1-&rho1);
%let pi0 = %sysevalf(1-&pi1);
%put &rho0;
%put &pi0;
%let pf11 = 99; %let pf10 = 0;
%let pf01 = -1; %let pf00 = 0;
%let valf1=%sysevalf(&pi0/&rho0);
%let valf2=%sysevalf(&pi1/&rho1);
%put &valf1;
%put &valf2;
proc logistic data=Remission des;
model remiss=&inputs1;
score data=Remission
out=scoredtrain(keep=remiss p_1 p_0)
priorevent=&pi1;
run;
/*此宏为全模型训练和全模型评估,目的是求C统计量和总收益*/
proc sort data=scoredtrain;
by descending p_1;
run;
data assess;
retain sse 0 csum 0 ;
/* 2 x 2 count array, or count matrix */
array n[0:1,0:1] _temporary_ (0 0 0 0);
/* sample weights array */
array w[0:1] _temporary_ (&valf1 &valf2);
set scoredtrain end=last;
/* profit associated with each decision */
d1=&PF11*p_1+&PF01*p_0;
d0=&PF10*p_1+&PF00*p_0;
/* T is a flag for response */
t=(strip(remiss)="1");
/* D is the decision, based on profit. */
d=(d1>d0);
/* update the count matrix, sse, and c */
n[t,d] + w[t];
n_td=n[t,d];
n_1=n[0,0];
n_2=n[0,1];
n_3=n[1,0];
n_4=n[1,1];
w_1=w[0];
w_2=w[1];
sse + (remiss-p_1)**2;
csum + ((n[1,1]+n[1,0])*(1-t)*w[0]);
if last then do;
INPUT_COUNT=&ic1;
TOTAL_PROFIT = sum(&PF11*n[1,1],&PF10*n[1,0],&PF01*n[0,1],&PF00*n[0,0]);
OVERALL_AVG_PROFIT = TOTAL_PROFIT/sum(n[0,0],n[1,0],n[0,1],n[1,1]);
ASE = sse/sum(n[0,0],n[1,0],n[0,1],n[1,1]);
C = csum/(sum(n[0,0],n[0,1])*sum(n[1,0],n[1,1]));
end;
run;
自己慢慢体会吧。其中的TOTAL_PROFIT应该是你关心的。
[此贴子已经被作者于2009-6-6 13:08:40编辑过]