看到楼主总结的甚好,我来回复个
1 、set a b
纵向合并a、b数据集,先生成pdv,有些变量在另一个数据集里面没有,合并的时候对应的变量为缺失值。
2、双set语句(set a;set b)
先生成pdv,对于a、b数据集里面都有的变量则b数据值代替a里面的数据值。对于没有的变量则保存下来了。而且观测值个数取决于最小的那个。
例:
data a;
input x y;
cards;
1 2
2 3
;
run;
data b;
input x ;
cards;
2
3
;
run;
data c;
set a ;
set b ;
by x;
run;
proc print data=c;
run;
3、merge语句(横向合并)
注意:merge合并的数据必须根据某一变量先排序了,之后才能根据某一变量合并。
Merge语句还有一个重要的关键字可以使用(in)。用in判断可以实现类似于sql里面左联右连。
4、sql语句
Sql语句之强大不用多说,此处列举一些注意地方吧!
(1) merge语句合并与sql/where合并不太一样(由旨多对多的连接)
sql是生成笛卡尔积,之后根据where判断,而merge语句是根据排序后的顺序,merge连接是循序的(根据sas advance书上应该是如此翻译,不太好理解)。请大家指教。
例:为什么不是1-1不是4个,merge到底是怎么弄的?
data a;
input x y;
cards;
1 2
1 3
2 3
;
run;
data b;
input x ;
cards;
1
1
2
3
;
run;
data c;
merge a b;
by x;
run;
proc print data=c;
run;
Data sets
(1)proc sql;
内外连接的实现的基本原理:首先生成两个数据表的迪卡尔积,然后再根据where语句来选择符合条件的数据作为输出结果
inner join: where a.XX=b.XX; 仅返回匹配的数据,最多可以有32个表同时进行内连接
left outer join: on a.XX=b.XX; 左表(a)的记录将会全部表示出来,而右表(b)只会显示符合搜索条件的记录(例子中为: A.aID = B.bID).
B表记录不足的地方均为NULL
right outer join: on a.XX=b.XX;
full outer join: on a.XX=b.XX;
(2) One-to-One Reading:the new data set contains all the variables from all the input data sets. If the data sets contain same-named variables, the values that are read in from the last data set replace those that were read in from earlier ones
data one2one;
set a;
set b;
run; Concatenating:the new data set contains all the variables and the total number of observations from all input data sets
data concat;
set a b;
run; Interleaving:input data set must be sorted or indexed in ascending order based on the BY variable(s),new data set contains all the variables and the total number of observations from all input data sets.
data interlv;
set a b;
by num;
run; Match-Merging: input data set must be sorted or indexed in ascending order based on the BY variable(s). During match-merging, SAS sequentially checks each observation of each data set to see whether the BY values match, then writes the combined observation to the new data set.
data merged;
merge a b;
by num;
run;
Data
Vertically:
Combining Raw Data Files Using a FILENAME Statement
filename qtr1 ('c:\data\month1.dat''c:\data\month2.dat'
'c:\data\month3.dat');
data work.firstqtr;
infile qtr1;
input Flight $ Origin $ Dest $
Date : date9. RevCargo : comma15.2;
run;
Combining Raw Data Files Using an INFILE Statement
data quarter (drop=monthnum midmon lastmon);
monthnum=month(today());
midmon=month(intnx('month',today(),-1));
lastmon=month(intnx('month',today(),-2));
do i = monthnum, midmon, lastmon;
nextfile="c:\sasuser\month"
!!compress(put(i,2.)!!".dat",' ');
do until (lastobs);
infile temp filevar=nextfile end=lastobs;
input Flight $ Origin $ Dest $ Date : date9.
RevCargo : comma15.2;
output;
end;
end;
stop;
run;
Horizontally:
Combining Data with the IF-THEN/ELSE Statement
data mylib.employees_new;
set mylib.employees;
if IDnum=1001 then Birthdate='01JAN1963'd;
else if IDnum=1002 then Birthdate='08AUG1946'd;
else if IDnum=1003 then Birthdate='23MAR1950'd;
else if IDnum=1004 then Birthdate='17JUN1973'd;
run;
Combining Data with the ARRAY Statement
data mylib.employees_new;
array birthdates{1001:1004} _temporary_ ('01JAN1963'd
'08AUG1946'd '23MAR1950'd '17JUN1973'd);
set mylib.employees;
Birthdate=birthdates(IDnum);
run;
Combining Data with the FORMAT Procedure
proc format;
value $birthdate '1001' = '01JAN1963'
'1002' = '08AUG1946'
'1003' = '23MAR1950'
'1004' = '17JUN1973';
run;
data mylib.employees_new;
set mylib.employees;
Birthdate=input(put(IDnum,$birthdate.),date9.);
run;
Performing a DATA Step Match-Merge
proc sort data=sasuser.expenses out=expenses;
by flightid date;
run;
proc sort data=sasuser.revenue out=revenue;
by flightid date;
run;
data revexpns (drop=rev1st revbusiness revecon
expenses);
merge expenses(in=e) revenue(in=r);
by flightid date;
if e and r;
Profit=sum(rev1st, revbusiness, revecon,
-expenses);
run;
proc sort data=revexpns;
by dest;
run;
proc sort data=sasuser.acities out=acities;
by code;
run;
data sasuser.alldata;
merge revexpns(in=r) acities
(in=a rename=(code=dest)
keep=city name code);
by dest;
if r and a;
run;
Performing a PROC SQL Join
proc sql;
create table sqljoin as
select revenue.flightid,
revenue.date format=date9.,
revenue.origin, revenue.dest,
sum(revenue.rev1st,
revenue.revbusiness,
revenue.revecon)
-expenses.expenses as Profit,
acities.city, acities.name
from sasuser.expenses, sasuser.revenue,
sasuser.acities
where expenses.flightid=revenue.flightid
and expenses.date=revenue.date
and acities.code=revenue.dest
order by revenue.dest, revenue.flightid,
revenue.date;
quit;
Working with a Many-to-Many Match
proc sql;
create table flightemp as
select flightschedule.*, firstname, lastname
from sasuser.flightschedule, sasuser.flightattendants
where flightschedule.empid=flightattendants.empid;
quit;
data fightemps3(drop=empnum jobcode)
set sasuser.flightschedule;
do i=1 to num;
set sasuser.flightattendants
(rename=(empid=empnum))
nob=num point=i;
if empid=empnum then output;
end;
run;
Combining Summary Data and Detail Data
proc means data=sasuser.monthsum noprint;
var revcargo;
output out=sasuser.summary sum=Cargosum;
run;
data sasuser.percent1;
if _n_=1 then set sasuser.summary
(keep=cargosum);
set sasuser.monthsum
(keep=salemon revcargo);
PctRev=revcargo/cargosum;
run;
data sasuser.percent2(drop=totalrev);
if _n_=1 then do until(lastobs);
set sasuser.monthsum(keep=revcargo)
end=lastobs;
totalrev+revcargo;
end;
set sasuser.monthsum (keep=salemon revcargo);
PctRev=revcargo/totalrev;
run;
Using an Index to Combine Data
data work.profit work.errors;
set sasuser.dnunder;
set sasuser.sale2000(keep=routeid
flightid date rev1st revbusiness
revecon revcargo)key=flightdate;
if _iorc_=0 then do;
Profit=sum(rev1st, revbusiness, revecon,
revcargo, -expenses);
output work.profit;
end;
else do;
_error_=0;
output work.errors;
end;
run;