以下宏就是计算WOE的方法,关于其应用和介绍可以看:
1、
http://blog.sina.com.cn/s/blog_8813a3ae0102uyo3.html
2、
http://blog.csdn.net/kevin7658/article/details/50780391
本质上,IV,WOE,熵,以及logit(odds)的概念都是互通的。
/*取自《信用风险评分卡研究_基于SAS的开发与实施》*/
%macro calcwoe(dsin,ivvar,dvvar,woeds,woevar,dsout,daccess);
/*输出频数*/
proc freq data=&dsin noprint;
tables &ivvar * &dvvar/out=temp_freqs;
run;
/*排序*/
proc sort data=temp_freqs;
by &ivvar * &dvvar;
run;
/*根据变量的每一个水平对目标汇总,c1t,c0t是累计值*/
data temp_woe1;
set temp_freqs;
retain c1 c0 c1t 0 c0t 0;
by &ivvar &dvvar;
if first.&ivvar then do;
c0=count;
c0t=c0t+c0;
end;
if last.&ivvar then do;
c1=count;
c1t=c1t+c1;
end;
if last.&ivvar then output;
drop count percent &dvvar;
call symput('c0t',c0t);
call symput('c1t',c1t);
run;
/*如果good和bad频数不为0则计算woe*/
data &woeds;
set temp_woe1;
goodist=c0/&c0t;
badist=c1/&c1t;
if(goodist > 0 and badist > 0) then woe=log(badist/goodist);
else woe=.;
keep &ivvar woe;
run;
proc sql noprint;
create table &dsout as
select a.*,b.woe as &woevar from &dsin a,&woeds b
where a.&ivvar=b.&ivvar;
quit;
proc sql noprint;
create table &daccess as
select a.*,b.woe as &woevar from &daccess a,&woeds b where
a.&ivvar=b.&ivvar;
quit;
/*清除工作空间*/
proc delete data=temp_freqs temp_woe1;
run;
quit;
%mend;