duoweisa 发表于 2013-10-20 12:48 
"一种想法是因为这些钱数都比较分散,并不是平均分布的,所以用SRS的方法不太合理",
我觉得你想法挺有趣 ...
谢谢您的建议
我稍微改了一下程序,其实,你是10000个population里面取1000个,可能差别不大,但是如果是1000000个里面取就有有一点差别了。
data sim;
do i=1 to 1000000;
x=rand("exponential");
output;
end;
run;
proc univariate data=sim;
var x;
histogram x;
run;
title1 'Customer Satisfaction Survey';
title2 'Simple Random Sampling';
proc surveyselect data=sim method=srs n=1000
out=SampleSRS;
run;
proc univariate data=sampleSRS;
var x;
histogram x;
run;
data sim2;
set sim;
group='population';
data SampleSRS2;
set SampleSRS;
group='sample';
run;
DATA GRAPH;
SET sim2 SampleSRS2;
RUN;
proc sql;
create table test as
select max(x) as maxx from GRAPH
;
quit;
data _null_;
set test;
call symputx('maxx', put (maxx,8.0));
call symputx('maxx2',put (maxx*0.1,8.0));
run;
title '1000 sample out of 1000000 population';
proc univariate data=GRAPH noprint;
CLASS group;
histogram x;
inset mean std='Std Dev' median/ pos = ne format = 6.3 ;
run;