我有一段非常优秀的code如下, 可以实现找到每个变量的missing value的百分比,大家一运行便知结果
data survey;
input id diet exer hours xwk educ;
datalines;
1 1 . 1 3 1
1 . 2 1 4 2
1 . 4 . . .
1 1 5 2 3 .
2 . 9 2 3 .
2 5 9 2 4 .
2 . 3 . 5 3
3 2 . . . .
;
proc sql noprint;
select distinct name into : var_list separated by " "
from dictionary.columns
where libname="WORK" and memname="SURVEY" and upcase(name) ne "ID";
quit;
%put &var_list;
%macro count_miss(ds= , byvar= , vars= );
%let i=1;
%let var1=%sysfunc(scan(&vars,1));
%do %until(&v eq );
%let i=%eval(&i+1);
%let v=%sysfunc(scan(&vars,&i));
%let var&i=&v;
%end;
%let varnum=%eval(&i-1);
proc sql;
create table countmiss as
select &byvar
%do i=1 %to &varnum;
,sum(missing(&&var&i)) as &&var&i.._miss
%end;
from &ds
group by &byvar;
create table counttotal as
select distinct &byvar, count(*) as total
from &ds
group by &byvar;
create table countmiss_pct_&ds as
select a.&byvar
%do i=1 %to &varnum;
,(&&var&i.._miss)/total as &&var&i.._miss_pct
%end;
from countmiss a, counttotal b
where a.&byvar=b.&byvar;
quit;
proc print data=countmiss_pct_&ds; run;
%mend;
%count_miss(ds=survey, byvar=id, vars=&var_list);
但是针对上面这个survey数据没问题,但在我实际工作中的数据却会出错。因为我的变量名字都太长,比如有个变量叫‘TOTAL_CONTROL_DOSAGE_UNITS_MISS_PCT' 在log里面就会显示下面这个样的error:
“Name 'TOTAL_CONTROL_DOSAGE_UNITS_MISS_PCT' is too long for a SAS name in this context”
因为我的变量名字都是这么长,所以想请教一下如何解决这个问题。比如把上面的data set中的变量名educ改成TOTAL_CONTROL_DOSAGE_UNITS_MISS_PCT
data survey;
input id diet exer hours xwk TOTAL_CONTROL_DOSAGE_UNITS_MISS_PCT;
datalines;
1 1 . 1 3 1
1 . 2 1 4 2
1 . 4 . . .
1 1 5 2 3 .
2 . 9 2 3 .
2 5 9 2 4 .
2 . 3 . 5 3
3 2 . . . .
;
在运行宏%count_miss(ds=survey, byvar=id, vars=&var_list); 就会有下面的error
ERROR: The variable named TOTAL_CONTROL_DOSAGE_UNITS_MISS_PCT contains more than 32 characters.
垦请高手如何修改我的%count_miss让程序可以解决变量名长度的问题。