可以,希望对你有所帮助.我提取的双色球.
%macro lehecai(sumpage,start,end);
%let page = %nrstr(&page);
%let ds = %nrstr(&ds);
%let de = %nrstr(&de);
proc sql noprint;
create table cdh.lehe1(
dt date label="开奖日期" format=yymmdd10.,
issue varchar(10) label="期号",
c1 numeric,
c2 numeric,
c3 numeric,
c4 numeric,
c5 numeric,
c6 numeric,
c7 numeric,
sell numeric label="销量"
);
create table cdh.lehe2(
dt date label="开奖日期" format=yymmdd10.,
issue varchar(10) label="期号",
number varchar(14) label="中奖号码",
sell numeric label="销量"
);
quit;
%do i = 1 %to &sumpage ;
%let lehe_url =
http://baidu.lehecai.com/lottery ... rt&de=&end;
%put &lehe_url;
filename lehecai url "http://baidu.lehecai.com/lottery/draw/list/50?lottery_type=50&page=&i&ds=&start&de=&end" RECFM=v;
/*读入此网页所有内容到变长字段content中*/
data work.a;
infile lehecai length=len;
length content $32766. ;
input content $varying32766. len;
run;
data work.b;
length content_final $20.;
set work.a;
retain flag;
flag + 1;
/*取出包含Shibor 期限字符的观测及其下一个观测(即期限所对应的Shibor利率)*/ ;
if index(content,'<td class="td1">')>0
or index(content,'<a href="/lottery/draw/view/50?phase=')>0
or index(content,'<span class="ball_1">')>0
or index(content,'<span class="ball_2">')>0
or index(content,'<td class="td4">')>0 then do;
flag = 1;
end;
else do;
flag = 0;
end;
/*去除所有被<>包含的代码及其所有空格*/
pattern2 = 's/<.+?>//s';
retain pattern_id;
pattern_id = prxparse(pattern2);
call prxchange(pattern_id, -1, content);
content_new = compress(content,"0123456789-");
content_final = compress(content,content_new);
if flag = 1 then do;
i + 1;
retain i;
end;
else do;
delete;
end;
if i in (1,2) then delete;
keep content_final;
run;
data work.c;
set work.b;
if mod(_n_,10) = 1 then do;
id = 1;
retain id;
end;
else do;
id+1;
retain id;
end;
run;
proc sort data=work.c;
by id;
quit;
proc transpose data=work.c out=work.d(drop = id _name_) ;
var content_final;
by id;
run;
proc transpose data=work.d out=work.e(drop = _name_);
var _all_;
run;
data work.f(drop = col1-col10);
length dt 8. issue $10. c1-c7 8. sell 8.;
set work.e;
dt = input(col1,yymmdd10.);
format dt yymmdd10.;
issue = col2;
c1 = input(col3,8.);
c2 = input(col4,8.);
c3 = input(col5,8.);
c4 = input(col6,8.);
c5 = input(col7,8.);
c6 = input(col8,8.);
c7 = input(col9,8.);
sell = input(col10,comma18.);
run;
data work.g(keep = dt issue number sell);
length dt 8. issue $10. number $14. sell 8.;
set work.e;
dt = input(col1,yymmdd10.);
format dt yymmdd10.;
issue = col2;
number = compress(col3)||compress(col4)||compress(col5)||compress(col6)||compress(col7)||compress(col8)||compress(col9);
sell = input(col10,comma18.);
run;
proc sql noprint;
insert into cdh.lehe1
select dt,
issue,
c1,
c2,
c3,
c4,
c5,
c6,
c7,
sell
from work.f;
insert into cdh.lehe2
select dt,
issue,
number,
sell
from work.g;
quit;
%end;
%mend;
%lehecai(45,2001-01-01,2012-12-31);