全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 SAS专版
1025 5
2020-03-08
在sas运行时,用merge合并之后,会出现很多重复值,现在代码如下不知道哪里错了:/*调入工作状态*/
QQ图片20200308165403.jpg QQ图片20200308165356.jpg data age;
set SYC.age;
run;
data dbdbum;
set SYC.dbdbum;
run;
data degree;
set SYC.degree;
run;
data earings;
set SYC.earings;
run;
data lev;
set SYC.lev;
run;
data presmn;
set SYC.presmn;
run;
data quality;
set SYC.quality;
run;
data revenuerate;
set SYC.revenuerate;
run;
data roa;
set SYC.roa;
run;
data sex;
set SYC.sex;
run;
data size;
set SYC.size;
run;
data big;
set SYC.big;
run;
data tenure;
set SYC.tenure;
run;


/*合并数据集*/
/*1.删除季度值 取出股票代码*/
data age;
set age;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data dbdbum;
set dbdbum;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data degree;
set degree;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data earings;
set earings;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data lev;
set lev;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data presmn;
set presmn;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data revenuerate;
set revenuerate;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data roa;
set roa;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data sex;
set sex;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data size;
set size;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data big;
set big;
month=substr(year,6,2);
year=substr(year,1,4);
run;
data tenure;
set tenure;
month=substr(year,6,2);
year=substr(year,1,4);
run;




/*2.选取合并变量,统一合并变量的格式,age rate education earingspershare lev presmn result result1 growthrateoftotalrevenue roa sex logarithmoftotalassets totalassets  tenure*/
data age;
set age;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data age;
set age;
drop code year;
rename code1=code;
rename year1=year;
run;
data dbdbum;
set dbdbum;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data dbdbum;
set dbdbum;
drop code year;
rename code1=code;
rename year1=year;
run;
data degree;
set degree;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data degree;
set degree;
drop code year;
rename code1=code;
rename year1=year;
run;
data earings;
set earings;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data earings;
set earings;
drop code year;
rename code1=code;
rename year1=year;
run;
data lev;
set lev;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data lev;
set lev;
drop code year;
rename code1=code;
rename year1=year;
run;
data presmn;
set presmn;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data presmn;
set presmn;
drop code year;
rename code1=code;
rename year1=year;
run;
data quality;
set quality;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data quality;
set quality;
drop code year;
rename code1=code;
rename year1=year;
run;
data revenuerate;
set revenuerate;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data revenuerate;
set revenuerate;
drop code year;
rename code1=code;
rename year1=year;
run;
data roa;
set roa;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data roa;
set roa;
drop code year;
rename code1=code;
rename year1=year;
run;
data sex;
set sex;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data sex;
set sex;
drop code year;
rename code1=code;
rename year1=year;
run;
data size;
set size;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data size;
set size;
drop code year;
rename code1=code;
rename year1=year;
run;
data big;
set big;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data big;
set big;
drop code year;
rename code1=code;
rename year1=year;
run;
data tenure;
set tenure;
code1=input(code,best12.);
year1=input(year,best12.);
run;
data tenure;
set tenure;
drop code year;
rename code1=code;
rename year1=year;
run;


/*3.依据合并变量排序*/
proc sort data=age;
by code year;
run;
proc sort data=dbdbum;
by code year;
run;
proc sort data=degree;
by code year;
run;
proc sort data=earings;
by code year;
run;
proc sort data=lev;
by code year;
run;
proc sort data=presmn;
by code year;
run;
proc sort data=quality;
by code year;
run;
proc sort data=revenuerate;
by code year;
run;
proc sort data=roa;
by code year;
run;
proc sort data=sex;
by code year;
run;
proc sort data=size;
by code year;
run;
proc sort data=big;
by code year;
run;
proc sort data=tenure;
by code year;
run;

/*4.合并*/
data total;
merge age dbdbum degree earings lev presmn quality revenuerate roa sex size big tenure;
by code year;
run;



二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

全部回复
2020-3-9 09:35:31
有可能每个数据存在多条一模一样的数据,你可以nodupkey一下
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2020-3-10 13:16:05
Jia1Zhao 发表于 2020-3-9 09:35
有可能每个数据存在多条一模一样的数据,你可以nodupkey一下
大神  什么意思啊   新手小白 刚刚接触这个软件-   这个代码还不太会写。 什么地方用你说的那个nodupkey
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2020-3-11 09:45:44
proc sql noprint;
create table final as
select a.*,b.,c.,....,m.
from age a left join dbdbum b on a.code=b.code and a.year=b.year
                left join (之后剩余的数据集,同上写法,b.,c.,e.跟的是你要获得变量,code,year不需要重复获取)
order by code year;
quit;
这里的;,都得是英文,没有具体数据不晓得怎么处理,大致程序是这样这个等同于merge by,可以免去合并前排序的步骤
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2020-3-11 12:18:53
Jia1Zhao 发表于 2020-3-11 09:45
proc sql noprint;
create table final as
select a.*,b.,c.,....,m.
DATA example;
PROC IMPORT OUT= example
            DATAFILE= "C:\Users\Li-ion\Desktop\神经网络模拟0426生物量\数据\生物量0426.xlsx"
            DBMS=EXCEL REPLACE;
     SHEET='SAS数据'n;
     GETNAMES=YES;
     MIXED=NO;
     SCANTEXT=YES;
     USEDATE=YES;
     SCANTIME=YES;
RUN;
PROC PLS data=example cv=one method=pls cvtest details plots=vip;
model biomass=x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 /solution;
RUN;
PROC PRINT data=vip;
RUN;
ODS OUTPUT CLOSE;


大佬请您给看看我这个 我想让他显示变量投影重要性VIP的数值,但是一直报错说
ERROR: 文件“WORK.VIP.DATA”不存在。
我该怎么改呢,谢谢了
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2020-3-11 14:17:05
w无名小卒 发表于 2020-3-11 12:18
DATA example;
PROC IMPORT OUT= example
            DATAFILE= "C:\Users\Li-ion\Desktop\神经网络模 ...
proc pls; run;,在model下面加一个output out=你要命名的sas数据集名
你这个不定义输出的数据集,那肯定是没有的
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群