全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 SAS专版
7563 4
2013-01-15
北大出版的那本书,那里面的sas其实没有忠于元论文,尤其账面市值比的分组问题,那个程序是平分的,不是按照30%,40%,30%分组的。下面这段程序,洋人写的,供大家参考。没有调试,不知对错。自己好好看看是否需修改。
/*                                                                                                                                                                *//*         This code creates a SAS dataset with the Fama-French monthly factor returns purged of firms that have not been listed for at least 60 months                 *//*         Part 1 of this code assings stocks to NYSE Size/BM-based groups and was written by Denys Glushkov at WRDS. I made some minor modifications.                                                                                                                                                                                                                                                                         Part 2 extends the first part by computing the resulting Fama-French monthly factor returns.                                                                                */********************************************************** Part 1 - this part of the code runs on the WRDS server**********************************************************;libname comp 'H:\www\compustat';                *or whatever directory on your personal computer you want to save the SAS dataset;%let wrds=wrds.wharton.upenn.edu 4016;options comamid=TCP remote=WRDS;signon username=_prompt_;rsubmit;options nodate nocenter nonumber ps=max ls=72 fullstimer;title ' ';* The size breakpoint for year t is the median NYSE market equity at the end of June of year t. ;* BE/ME for June of year t is the book equity for the last fiscal year end in t-1 divided by ME ;* for December of t-1. The BE/ME breakpoints are the 30th and 70th NYSE percentiles.            ;libname link '/wrds/crsp/sasdata/a_ccm';data linktable; set link.CCMXPF_LINKTABLE;run;%MACRO SIZE_BM (bdate=, edate=, link=);         %local msfars;         %local msevars;         %local sharecode;         %local exchangecode;         %local begdate;         %local enddate;     %let msfvars = prc ret shrout;      * Selected variables from the CRSP monthly data          file (crsp.msf file);     %let msevars = exchcd shrcd dlret;       * Selected variables from the CRSP monthly event          file (crsp.mse);     %let sharecode = and shrcd in (10,11);       * Restriction on the type of shares (e.g. common stocks);      %let exchangecode = ;        * In this case, no restrictions on the exchange codes;                           %let begdate=intck('year',&bdate,-1);           %let enddate=&edate;*Call and Run the msf2a.sas program; %include '/wrds/crsp/samples/msf2a.sas';data msex2; set mylib.msf2a; by permno date;   size=abs(prc)*shrout;   size_lag=lag(size);   ldate = lag(date);   if first.permno then size_lag = size / (1+ret);    ret = sum(ret,dlret);   if size>0;   drop prc shrout ldate;run;*Assign Stocks to NYSE Size-Based groups;proc sort data=msex2 (keep=date size exchcd) out=msex3;   where month(date)=6 and exchcd=1;   by date;run;proc means data=msex3 noprint;   var size;   by date;   output out=nyse (drop=_freq_ _type_) median=/autoname;run;proc sql;   create table size_assign   as select a.permno, a.date, a.size,        case when size<=size_median then 'Small' else 'Big'                end as size_port   from msex2 (keep=permno date size where = (month(date)=6)) as a   left join nyse as b   on a.date= b.date;quit;*Create Book Equity(BE) measure from Compustat (definition from Daniel and Titman (JF, 2006;data comp_extract; set comp.funda    (where=(fyr>0 and at>0 and consol='C' and            indfmt='INDL' and datafmt='STD' and popsrc='D'));   if missing(SEQ)=0 then she=SEQ;else   if missing(CEQ)=0 and missing(PSTK)=0 then she=CEQ+PSTK;else   if missing(AT)=0 and missing(LT)=0 and missing(MIB)=0 then she=AT-(LT+MIB);else she=.;   if missing(PSTKRV)=0 then BE0=she-PSTKRV;else if missing(PSTKL)=0 then BE0=she-PSTKL;   else if missing(PSTK)=0 then BE0=she-PSTK; else BE0=.;   * Converts fiscal year into calendar year data;         if (1<=fyr<=5) then date_fyend=intnx('month',mdy(fyr,1,fyear+1),0,'end');        else if (6<=fyr<=12) then date_fyend=intnx('month',mdy(fyr,1,fyear),0,'end');          calyear=year(date_fyend);          format date_fyend date9.;        * Accounting data since calendar year 't-1';   if (year(date_fyend) >= year(&bdate) - 1) and (year(date_fyend) <=year(&edate) + 1);   keep gvkey calyear fyr BE0 date_fyend indfmt consol datafmt popsrc datadate TXDITC;run;proc sql; create table comp_extractas select a.gvkey, a.calyear, a.fyr, a.date_fyend,                   case when missing(TXDITC)=0 and missing(PRBA)=0 then BE0+TXDITC-PRBA else BE0                  end as BE        from comp_extract as a left join                  comp.aco_pnfnda (keep=gvkey indfmt consol datafmt popsrc datadate prba) as bon a.gvkey=b.gvkey and a.indfmt=b.indfmt and a.consol=b.consol and a.datafmt=b.datafmt    and a.popsrc=b.popsrc and a.datadate=b.datadate;quit;*Create Book to Market (BM) ratios at December;proc sql;   create table BM0        (where=(BM>0))   as select a.gvkey, a.calyear, c.permno, c.exchcd, c.date, a.be/(abs(c.prc)*c.shrout/1000) as BM   from comp_extract as a,                 &link as b,                                mylib.msf2a (where=( month(date)=12)) as c        where a.gvkey=b.gvkey and                   ((b.linkdt<=c.date<=b.linkenddt) or (b.linkdt<=c.date and b.linkenddt=.E)                   or (c.date<=b.linkenddt and b.linkdt=.B)) and b.lpermno=c.permno   and a.calyear = year(c.date) and (abs(c.prc)*c.shrout)>0;quit;*Keep only those cases with valid stock market in June;proc sql;   create table BM   as select a.gvkey, a.permno, a.bm, a.calyear, a.date as decdate,                          a.exchcd, b.date, b.size, b.size_port   from BM0 as a, size_assign as b   where a.permno=b.permno   and intck('month',a.date,b.date)=6 and b.size>0;quit;*Assign stocks to NYSE BM-based groups;proc sort data=BM out=nyse1 (keep=permno bm calyear decdate);   where exchcd=1;   by decdate;run;proc univariate data=nyse1 noprint;   var bm;   by decdate;   output out=nyse2 pctlpts = 30 70 pctlpre=per;run;*Merge back with master file that contains all securities from NYSE, Nasdaq and AMEX;proc sql;   create table bm1   as select a.permno, a.gvkey, a.bm, a.size, a.size_port, a.date, a.decdate,   case when bm<=per30 then 'Low'                   when per30<bm<=per70 then 'Medium'                else 'High'                 end as bm_port   from BM as a, nyse2 as b   where a.decdate=b.decdate;   *The 'date' variable refers to June, the 'decdate' variable refers to December of the previous year;quit;proc sort data=bm1; by permno descending date;run;data size_bm_port; set bm1; by permno;        leaddate=lag(date);        if first.permno then leaddate=intnx('month',date,-12,'end');        format date leaddate decdate date9.;        rename date=size_date decdate=bm_date;        label date='Valid date for firm size';        label decdate='Valid date for Book-to-Market';run;proc sort data=size_bm_port; by permno size_date;run;proc sql; drop table nyse1, nyse2, nyse, size_assign, msex2, msex3, msedata, bm, bm0, bm1, comp_extract;quit;%MEND;%SIZE_BM (bdate='01Jan1962'd, edate='31DEC2011'd, link=linktable);proc download data=size_bm_port out=size_bm_port;run;endrsubmit;proc sort data=size_bm_port out=comp.size_bm_port; by permno bm_date;run;********************************************************** Part 2**********************************************************;libname msf         'H:\www\crsp';                                *directory where CRSP monthly stock return file is stored;libname comp         'H:\www\compustat';                         *directory where output file 'size_bm_port' from Part 1 is stored;data crsp; set msf.msf; by permno date;                        *CRSP montly stock return file with permno, date, ret;        n+1;        if first.permno then n=0;        yyyymm        =        year(date)*100+month(date);        keep permno date yyyymm ret n;proc sort data=comp.size_bm_port out=compu; by permno bm_date;data compu; set compu;        cyear  = year(bm_date);        drop leaddate;run;*I match the accounting data for all fiscal year ends in calender year t-1 with the returns for July of year t to June of year t+1;%let begyear=1962;%let endyear=2011;%let begmon=6;%let endmon=12;%macro merg(st); data crsp; set crsp;  %do myear=%eval(&begyear) %to %eval(&endyear);   %do mmonth=%eval(&begmon) %to %eval(&endmon);                 if  (yyyymm ge %eval((&myear)*100+(&mmonth+1))) and           (yyyymm le %eval((&myear+1)*100+(&mmonth)))        then cyear=%eval(&myear-1);       %end;  %end;  proc sort data=crsp; by permno cyear; run;%mend;%merg(1);proc sort data=crsp; by permno cyear;proc sort data=compu; by permno cyear;*Unpurged factors;data ff; merge crsp (in=m1) compu (in=m2); by permno cyear;        if m1 and m2;        if ret  = .B then ret = .;        if ret  = .C then ret = .;run;proc sort data=ff; by date size_port bm_port;proc means data=ff noprint;        var ret / weight=size;        by date size_port bm_port;        output out=ff_factors mean=vwret;run;proc transpose data=ff_factors out=ff_factors_wide;        var vwret;        by date;        id size_port bm_port;run;data msf.ff_factors_unpurged; set ff_factors_wide;        uSMB = 1/3*(SmallHigh + SmallMedium + SmallLow) - 1/3*(BigHigh + BigMedium + BigLow);        uHML = 1/2*(SmallHigh + BigHigh) - 1/2*(SmallLow + BigLow);                label uSMB = 'SMB [unpurged]';        label uHML = 'HML [unpurged]';        keep date uSMB uHML;run;*Purged factors;data ff; merge crsp (in=m1) compu (in=m2); by permno cyear;        if m1 and m2;        if ret  = .B then ret = .;        if ret  = .C then ret = .;        if n > 60;run;proc sort data=ff; by date size_port bm_port;proc means data=ff noprint;        var ret / weight=size;        by date size_port bm_port;        output out=ff_factors mean=vwret;run;proc transpose data=ff_factors out=ff_factors_wide;        var vwret;        by date;        id size_port bm_port;run;data msf.ff_factors_purged; set ff_factors_wide;        pSMB = 1/3*(SmallHigh + SmallMedium + SmallLow) - 1/3*(BigHigh + BigMedium + BigLow);        pHML = 1/2*(SmallHigh + BigHigh) - 1/2*(SmallLow + BigLow);                label pSMB = 'SMB [purged]';        label pHML = 'HML [purged]';        keep date pSMB pHML;run;data msf.ff_factors; merge msf.ff_factors_unpurged msf.ff_factors_purged; by date;        if year(date) >= 1963;        keep date pSMB pHML uSMB uHML;run;
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

全部回复
2013-1-15 12:25:20
其实分组不是三因素构建的重点,wrds上的程序用CSMAR的数据是需要做一些修改的。
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2013-8-3 20:42:03
你这个代码乱七八糟的,建议重新编辑下,谢谢
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2013-8-13 20:04:01
虽然乱,就不用说,而且缺少一个重要的东西
*Call and Run the msf2a.sas program;
%include '/wrds/crsp/samples/msf2a.sas';
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2013-8-14 10:09:07
把链接贴过来比较好。
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群