*本程序提供SAS爱好者交流学习,禁止商业用途;
*算法参考冯定<神经网络专家系统>;
*案例DeepLearning
http://blog.sina.com.cn/s/blog_593af2a70101endk.html;
*作者:keming;
*开发于2008年7月.;
读者可先阅读
http://blog.sina.com.cn/s/blog_593af2a70101endk.html,再理解程序。
/*************************************/
*本程序提供SAS爱好者交流学习,禁止商业用途;
*算法参考冯定<神经网络专家系统>;
*案例DeepLearning
http://blog.sina.com.cn/s/blog_593af2a70101endk.html;
*作者:keming;
*开发于2008年7月.;
/*************************************/
X CD ".";
OPTIONS MPRINT CENTER ERRORS=0 LS=255 COMPRESS=YES;
DATA RAWDATA;
LENGTH EVENT_ID 8;
EVENT_ID=_N_;
INPUT TMPV1 TMPV2 TMPV3 TMPV4;
CARDS;
1 0 0 0
0 1 0 0
0 0 1 0
0 0 0 1
;
RUN;
%MACRO TMPTARGET;
PROC CONTENTS DATA=RAWDATA(DROP=EVENT_ID ) OUT=INPUTVARLST NOPRINT;
RUN;
DATA _NULL_;
SET INPUTVARLST END=LAST;
CALL SYMPUT(CATS("VAR",_N_),COMPRESS(NAME));
IF LAST THEN CALL SYMPUT("NVAR",COMPRESS(_N_));
RUN;
DATA NETWORK_BASE;
SET RAWDATA;
%DO I=1 %TO &NVAR.;
TARGET&I.= &&VAR&I..;
%END;
KEEP EVENT_ID
%DO I=1 %TO &NVAR.;
TARGET&I.
&&VAR&I..
%END;
;
RUN;
%MEND TMPTARGET;
%TMPTARGET;
DATA NW_LAYER;
INPUT LAYER CEIL CWGT ACTFUNC $;
LABEL LAYER="层数" CEIL="神经元个数" CWGT="神经元权重个数包含常数" ACTFUNC="激活函数" ;
CARDS;
1 1 5 LOGSIG
1 2 5 LOGSIG
2 1 3 LOGSIG
2 2 3 LOGSIG
2 3 3 LOGSIG
2 4 3 LOGSIG
;
RUN;
PROC PRINT DATA=NW_LAYER LABEL;
RUN;
%MACRO RNAME(INDATA=);
PROC CONTENTS DATA=&INDATA.(DROP=EVENT_ID TARGET: ) NOPRINT OUT=NCNT;
RUN;
DATA NCNT;
SET NCNT(KEEP=NAME ) END=LAST;
ID=_N_;
*当前处理的是哪条记录;
LENGTH ACT_VAR $ 32;
ACT_VAR=CATS("ACT_0_",_N_);
CALL SYMPUT(CATS("OLDVAR",ID),COMPRESS(NAME));
CALL SYMPUT(CATS("NEWVAR",ID),COMPRESS(ACT_VAR));
IF LAST THEN CALL SYMPUT("NVAR",COMPRESS(ID)) ;
RUN;
DATA NWDATA_MODEL;
SET &INDATA.;
RENAME %DO I=1 %TO &NVAR.; &&OLDVAR&I..=&&NEWVAR&I.. %END;;
TMPRANUNI=RANUNI(123456);
LENGTH DT $ 8;
DT="DP";
OUTPUT;
DT="DV";
OUTPUT;
DT="TV";
OUTPUT;
RUN;
PROC SORT DATA=NWDATA_MODEL OUT=NWDATA_MODEL(DROP=TMPRANUNI);
BY DT TMPRANUNI;
RUN;
%MEND RNAME;
%RNAME(INDATA=NETWORK_BASE);
%MACRO ACTSLT(VAR=,FUNC=,D=);
%IF &D.=N %THEN %DO;
%IF &FUNC.=LOGSIG %THEN (1/(1+EXP(-&VAR.)));
%ELSE %IF &FUNC.=TANSIG %THEN (2/(1+EXP(-2*&VAR.))-1);
%ELSE %IF &FUNC.=PURELIN %THEN (&VAR.);
%END;
%ELSE %IF &D.=Y %THEN %DO;
%IF &FUNC.=LOGSIG %THEN (1)*(EXP(-&VAR.)/((1+EXP(-&VAR.))**2));
%ELSE %IF &FUNC.=TANSIG %THEN (1)*(4*EXP(-2*&VAR.)/((1+EXP(-2*&VAR.))**2));
%ELSE %IF &FUNC.=PURELIN %THEN (1)*(1);
%END;
%MEND ACTSLT;
%MACRO NETWORK(WGTSEED=12,MINERROR=0.001,ETA=0.001,ALPHA=0.7,MAXLOOPN=40000);
DATA _NULL_;
SET NW_LAYER END=LAST;
BY LAYER;
IF LAST.LAYER THEN DO;
CALL SYMPUT(CATS("CEIL",LAYER),COMPRESS(CEIL));
CALL SYMPUT(CATS("CWGT",LAYER),COMPRESS(CWGT));
END;
CALL SYMPUT(CATS("ACTFUNC_",LAYER,"_",CEIL),COMPRESS(ACTFUNC));
IF LAST THEN CALL SYMPUT("NLAYER",LAYER);
RUN;
*SASFILE WORK.NWDATA_MODEL.DATA OPEN;
DATA NETWORK_T;
RETAIN
DLP_ALLERROR DLP_AVGERROR 100
TEST_ALLERROR TEST_AVGERROR 100
LAST_DLP_AVGERROR 100
DOBS TOBS 0;
ARRAY L_T_AE LAG_TEST_AVGERROR1-LAG_TEST_AVGERROR9 (9*100);
RETAIN %DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%DO K=1 %TO &&CWGT&I..;
WGT_&I._&J._&K.
PARTIAL_P0_WGT_&I._&J._&K.
DETAX_WGT_&I._&J._&K.
%END;
%END;
%END;
0
;
ARRAY ALLWGT WGT_:;
DO OVER ALLWGT;
ALLWGT=-1+2*RANUNI(&WGTSEED.);
END;
DO WHILE(NOT (DLP_AVGERROR<&MINERROR. OR LOOPN>&MAXLOOPN. ));
LOOPN+1;
DLP_ALLERROR =0;
TEST_ALLERROR=0;
DOBS=0;TOBS=0;
RETAIN LAST_DLP_AVGERROR 999 ETA &ETA. ALPHA &ALPHA. ;
%******逐条读入数据;
DO PI=1 TO NOBS;
SET NWDATA_MODEL NOBS=NOBS POINT=PI;
**正向输入;
%DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%******连接函数;
COMB_&I._&J.=SUM(%DO K=1 %TO &&CWGT&I..; WGT_&I._&J._&K.* %IF &K.^=&&CWGT&I.. %THEN ACT_%EVAL(&I.-1)_&K. ;%ELSE 1; , %END;0 );
%******激活函数;
ACT_&I._&J.= %ACTSLT(VAR=COMB_&I._&J.,FUNC=&&ACTFUNC_&I._&J..,D=N);
%END;
%END;
IF DT="DP" THEN DO;
**反向修正;
%DO I=&NLAYER. %TO 1 %BY -1;
%DO J=1 %TO &&CEIL&I..;
D_&I._&J.=%ACTSLT(VAR=COMB_&I._&J.,FUNC=&&ACTFUNC_&I._&J..,D=Y) *SUM(
%IF &NLAYER.=&I. %THEN -(TARGET&J.-ACT_&I._&J.) ;
%ELSE %DO;
%DO NL=1 %TO %LEFT(%NRSTR(&CEIL)%EVAL(&I.+1));
WGT_%EVAL(&I.+1)_&NL._&J.*D_%EVAL(&I.+1)_&NL.,
%END;
0
%END;
);
%DO K=1 %TO &&CWGT&I..;
PARTIAL_P0_WGT_&I._&J._&K.=D_&I._&J.* %IF &K.^=&&CWGT&I.. %THEN ACT_%EVAL(&I.-1)_&K.; %ELSE 1;;
%END;
%END;
%END;
%DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%DO K=1 %TO &&CWGT&I..;
WGT_&I._&J._&K.=WGT_&I._&J._&K.-ETA*PARTIAL_P0_WGT_&I._&J._&K.+ALPHA*DETAX_WGT_&I._&J._&K.;
DETAX_WGT_&I._&J._&K.= -ETA*PARTIAL_P0_WGT_&I._&J._&K.+ALPHA*DETAX_WGT_&I._&J._&K.;
%END;
%END;
%END;
END;
ELSE IF DT="DV" THEN DO;
DOBS+1;
%DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%IF &NLAYER.=&I. %THEN DLP_ALLERROR+((ACT_&I._&J.-TARGET&J.)**2) ;;
%END;
%END;
END;
ELSE IF DT="TV" THEN DO;
TOBS+1;
%DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%IF &NLAYER.=&I. %THEN TEST_ALLERROR+((ACT_&I._&J.-TARGET&J.)**2) ;;
%END;
%END;
END;
END;
DLP_AVGERROR = (DLP_ALLERROR /DOBS);
TEST_AVGERROR= (TEST_ALLERROR/TOBS);
OUTPUT;
*修改步长;
IF DLP_AVGERROR<LAST_DLP_AVGERROR THEN ETA=MIN(ETA*1.2,0.1);
ELSE IF DLP_AVGERROR>LAST_DLP_AVGERROR THEN ETA=MAX(ETA*0.8,0.00001);
*上次误差序列;
*DLP;
LAST_DLP_AVGERROR=DLP_AVGERROR;
*TEST;
DO _TPI=9 TO 2 BY -1;
L_T_AE(_TPI)=L_T_AE(_TPI-1);
END;
L_T_AE(1)=TEST_AVGERROR;
END;
STOP;
***************;
KEEP LOOPN DLP_ALLERROR DLP_AVGERROR TEST_ALLERROR TEST_AVGERROR WGT_: ETA ALPHA ;
***************;
RUN;
*SASFILE WORK.NWDATA_MODEL.DATA CLOSE;
DATA ALLWGT;
SET NETWORK_T END=LAST;
BY LOOPN;
IF LAST.LOOPN ;
OUTPUT;
KEEP LOOPN DLP_ALLERROR DLP_AVGERROR TEST_ALLERROR TEST_AVGERROR WGT_: ETA ;
RUN;
PROC SORT DATA=ALLWGT OUT=ALLWGT_S;
BY TEST_AVGERROR;
RUN;
DATA MINWGT;
SET ALLWGT_S;
OUTPUT;
STOP;
RUN;
SYMBOL C=RED;
SYMBOL2 C=BLUE;
PROC GPLOT DATA=ALLWGT;
PLOT DLP_AVGERROR*LOOPN=1 TEST_AVGERROR*LOOPN=2/OVERLAY;
RUN;
QUIT;
%MEND NETWORK;
%NETWORK;
%MACRO PERFM(NEWKDIS=,INDATA=,DISTCNT=,DISWGT=,OUTDATA=);
*NEWKDIS层数字典 INDATA输入数据 DISTCNT重命名字典 DISWGT 权重字典;
DATA _NULL_;
SET &NEWKDIS. END=LAST;
BY LAYER;
IF LAST.LAYER THEN DO;
CALL SYMPUT(CATS("CEIL",LAYER),COMPRESS(CEIL));
CALL SYMPUT(CATS("CWGT",LAYER),COMPRESS(CWGT));
END;
CALL SYMPUT(CATS("ACTFUNC_",LAYER,"_",CEIL),COMPRESS(ACTFUNC));
IF LAST THEN CALL SYMPUT("NLAYER",LAYER);
RUN;
DATA NCNT;
SET &DISTCNT.(KEEP=NAME ) END=LAST;
ID=_N_;
LENGTH ACT_VAR $ 32;
ACT_VAR=CATS("ACT_0_",_N_);
CALL SYMPUT(CATS("OLDVAR",ID),COMPRESS(NAME));
CALL SYMPUT(CATS("NEWVAR",ID),COMPRESS(ACT_VAR));
IF LAST THEN CALL SYMPUT("NVAR",COMPRESS(ID)) ;
RUN;
DATA PP_DATA;
SET &INDATA.;
RENAME %DO I=1 %TO &NVAR.; &&OLDVAR&I..=&&NEWVAR&I.. %END;;
RUN;
DATA &OUTDATA.;
SET PP_DATA NOBS=NOBS END=LAST;
IF _N_=1 THEN SET &DISWGT.;
%DO I=1 %TO &NLAYER.;
%DO J=1 %TO &&CEIL&I..;
%******连接函数;
COMB_&I._&J.=SUM(%DO K=1 %TO &&CWGT&I..; WGT_&I._&J._&K.* %IF &K.^=&&CWGT&I.. %THEN ACT_%EVAL(&I.-1)_&K. ;%ELSE 1; , %END;0 );
%******激活函数;
ACT_&I._&J.= %ACTSLT(VAR=COMB_&I._&J.,FUNC=&&ACTFUNC_&I._&J..,D=N);
%END;
%END;
KEEP EVENT_ID ACT_:;
RUN;
%MEND PERFM;
%PERFM(NEWKDIS=NW_LAYER,INDATA=RAWDATA,DISTCNT=NCNT,DISWGT=MINWGT,OUTDATA=TMPDATA);
PROC PRINT DATA=TMPDATA;
RUN;