soporaeternus 发表于 2011-8-18 16:16 
楼上和楼楼上,请问下
一个data步遍历一次输出到n张表快
还是分n个data步用point输出快啊?
这个问题不懂,应该和数据集大小,和要分成多少块有关系。
做了个小测试:
740 data test;
741 do i=1 to 1000000;
742 varlue=ranuni(0);
743 output;
744 end;
745 run;
746
747 %macro split(origin=test, dsnum=, times=100);
748
749 %put %str( );
750 %put %str(If we want to split a dataset into &dsnum datasets and do it × times:);
751 *** Split in one data step;
752 %let totaltime=0;
753 %do j=1 %to ×
754 %let timestart=%sysfunc(time());
755 data %do i=1 %to &dsnum;
756 test&i
757 %end;
758 ;
759 set &origin nobs=n;
760 obs=ceil(n/&dsnum);
761 %do i=1 %to &dsnum;
762 %if &i ne 1 %then %do; else %end;
763 if %eval(&i-1)*obs<_n_<=&i*obs then output test&i;
764 %end;
765 run;
766 %let timestop=%sysfunc(time());
767 %let timeused=%sysevalf(×top-×tart);
768 %let totaltime=%sysevalf(&totaltime+&timeused);
769 %end;
770
771 %let totaltime=%sysfunc(round(&totaltime,0.01));
772 %put %str( when spliting in one data step, we need: &totaltime seconds.);
773
774
775 *** Split in separate data steps;
776 %let totaltime=0;
777
778 %do j=1 %to ×
779 %do i=1 %to &dsnum;
780 data _null_;
781 if 0 then set &origin nobs=n;
782 obs=ceil(n/&dsnum);
783 call symputx("start",(&i-1)*obs+1);
784 call symputx("end",min(&i*obs,n));
785 run;
786
787 %let timestart=%sysfunc(time());
788 data test&i;
789 do obs=&start to &end;
790 set &origin nobs=n point=obs;
791 output;
792 end;
793 stop;
794 run;
795 %let timestop=%sysfunc(time());
796 %let timeused=%sysevalf(×top-×tart);
797 %let totaltime=%sysevalf(&totaltime+&timeused);
798 %end;
799 %end;
800
801 %let totaltime=%sysfunc(round(&totaltime,0.01));
802 %put %str( when spliting in separate data steps, we need: &totaltime seconds.);
803 %put %str( );
804 %put %Str( );
805 %mend;
806
807 options nonotes;
808 %split(dsnum=10,times=10)
If we want to split a dataset into 10 datasets and do it 10 times:
when spliting in one data step, we need: 4.11 seconds.
when spliting in separate data steps, we need: 6.31 seconds.
809
810
811 %split(dsnum=10,times=100)
If we want to split a dataset into 10 datasets and do it 100 times:
when spliting in one data step, we need: 41.36 seconds.
when spliting in separate data steps, we need: 54.25 seconds.
812
813 %split(dsnum=100,times=10)
If we want to split a dataset into 100 datasets and do it 10 times:
when spliting in one data step, we need: 28.47 seconds.
when spliting in separate data steps, we need: 14.59 seconds.
814 %split(dsnum=1000,times=10)
If we want to split a dataset into 1000 datasets and do it 10 times:
when spliting in one data step, we need: 371.59 seconds.
when spliting in separate data steps, we need: 104.97 seconds.
815 %split(dsnum=4,times=1000)
If we want to split a dataset into 4 datasets and do it 1000 times:
when spliting in one data step, we need: 349.94 seconds.
when spliting in separate data steps, we need: 492.72 seconds.