harlon1976 发表于 2010-3-11 15:39 
假设数据集中有变量两个,分别为数值型的x和字符型的class。假设有如下5个观测:
x class
1 A
5 B
4 A
2 B
8 A
在这个数据集中,变量class有两个取值,分别为A、B,如果现在考察对变量class进行重新排序,根据概率论知识:这里有5条观测,分成两类,一类(A类)有3个,第二类(B类)有2个,则可以重新组合的结果为10种,例如下面的结果就是其中的两种:
x class x class
1 A 5 B
4 A 2 B
8 A 8 A
5 B 1 A
2 B 4 A
现在的问题是:如何根据原始的数据集,编写SAS程序生成所有的10种结果。
如果现在不只是A、B两类,比如3类,类似的程序又如何编写呢?请高手给予解决。
The following solution is based SAS provided conbination macro. It can be found at,
Reference:
http://support.sas.com/techsup/technote/ts498.html (thanks to jackbt123)
I add the count variable to determine which number of the combination is needed.
The macro will calculate the obsevation index for a particular combination. The index is the number of observation position on the data set(it is variable i in the following pgm .
After one has the index, then one can retrieve that data with that index + the completement of that index.
Hope this helps.
%macro combo(r)/parmbuff;
%let i=2;
%let things=;
%do %while (%Qscan(&syspbuff,&i,%STR(,%))) ne );
%let p&i="%Qscan(&syspbuff,&i,%STR(,%)))";
%if &i=2 %then %let things=&&p&i;
%else %let things=&things,&&p&i;
%let i=%eval(&i+1);
%end;
%let n=%eval(&i-2);
data combo;
keep v1-v&r count;
array word $8 w1-w&n (&things);
array rr (*) r1-r&r;
array v $8 v1-v&r;
%do i=1 %to &r; /* create the DO LOOPs */
%if &i=1 %then %do;
do r&i=1 to &n-(&r-&i);
%end;
%else %do;
do r&i=r%eval(&i-1)+1 to &n-(&r-&i);
%end;
%end;
count+1;
do k=1 to &r; /* select subscripted items */
v(k)=word (rr(k)); /* for a SAS data set */
*put v(k) ' ' @; /* for log */
end;
*put; /* writes to log */
output; /* writes to a SAS data set */
%do i=1 %to &r;
end; /* create ENDs for the DO LOOPs */
%end;
put;
run;
proc print uniform data=combo;
title "combinations of &n items taken &r at a time ";
run;
%mend combo;
option mprint;
data t1 t2;
do i=1 to 15;
a=byte(i+64);
output;
end;
run;
%let n=10;
%let dsnnum=8;
proc sql noprint;
select i into : vlist separated by ','
from t1;
%put &vlist;
quit;
%combo(&n,&vlist)
data _null_;
length index $2000;
set combo(where=(count= &dsnnum));
array v(*) v1-v&n;
index=catx(',', of v(*));
call symputx('index', index);
run;
data t2;
set t1;
if i in (&index) then class='A';
else class='B';
run;
proc print; run;