/* see the notes in the code below */
/* the idea is to add a variable of random number */
/* then sort the dataset by the random number of the new variable */
/* then output the specified number of observations to a new dataset */
/* this can also be done using a few data steps within a macro */
data allData; /* sample dataset with 1100 observations as your original dataset */
do id = 10001 to 11100; /* one ID variable */
x = round(ranuni(12345)*100, 0.01); /* x and y are two other variable */
y = round(ranuni(12345)*100, 0.01);
output;
end;
run;
%macro samp;
%let seed=1234; /* initial seed used for generating random number */
%do i = 1 %to 10; /* loop: control the number of datasets to be generated */
proc sql outobs=100; /* PROC SQL for creating one sampling dataset */
create table sample&i. as /* outobs= : control the number of observations sampled */
select *
from alldata
order by ranuni(&seed); /* creating a column of random number */
quit; /* for ordering the dataset */
%let seed = %eval(&seed+2); /* changing the seed for next sampling */
%end; /* end of do-loop */
%mend samp;
%samp
这段程序可以从1000个观测的数据集中,抽取100个观测的样本,随机抽取10次。程序时,人大论坛的一位大神写的,我copy了下,但对程序不太理解,order by 后面跟着一个随机变量,这个随机变量和数据集alldata没有任何关系,求问这是为啥呢