、用R语言模拟一个六元线性回归模型=β+,i=1...n,其中服从标准正态分布的六维随机数,β=(2,3,0,0,0,3),~N(0,0,).
1.分别用逐步回归法和和Lars方法做n=50,100,200时的变量选择,列表显示正确选择,欠拟合和过度拟合的比例,并分析得到的结果。
这是题目,各位大大们n=50;
loop=100;
Correct=numeric(loop);
Overfit=numeric(loop);
Underfit=numeric(loop);
for (i in 1:loop)
x1<-matrix(rnorm(n,0,1),n,1);
x2<-matrix(rnorm(n,0,1),n,1);
x3<-matrix(rnorm(n,0,1),n,1);
x4<-matrix(rnorm(n,0,1),n,1);
x5<-matrix(rnorm(n,0,1),n,1);
x6<-matrix(rnorm(n,0,1),n,1);
y=rnorm(n,0,0.01)+2*x1+3*x2+3*x6;
fm=lm(y~x1+x2+x3+x4+x5+x6)
x=data.frame(x1,x2,x3,x4,x5,x6)
xx=as.matrix(x)
library(lars)
object <- lars(xx,y,"lar")
小弟初学R语言,写到这里就写不下去了,后面的判断条件什么的我都不清楚,希望大大们能帮看看
这里有份lasso的范例,n=50;
loop=100;
Correct=numeric(loop);
Overfit=numeric(loop);
Underfit=numeric(loop);
for (i in 1:loop)
x1<-matrix(rnorm(n,0,1),n,1);
x2<-matrix(rnorm(n,0,1),n,1);
x3<-matrix(rnorm(n,0,1),n,1);
x4<-matrix(rnorm(n,0,1),n,1);
x5<-matrix(rnorm(n,0,1),n,1);
x6<-matrix(rnorm(n,0,1),n,1);
y=2*x1+3*x2+3*x6+rnorm(n,0,0.01);
fm=lm(y~x1+x2+x3+x4+x5+x6)
x=data.frame(x1,x2,x3,x4,x5,x6)
xx=as.matrix(x);
y=c(y);
library(msgps)
al=msgps(xx,y,penalty="alasso",gamma=1,lambda=0);
a2=coef(al);a2
a3=a2[,4];a3;a3[2]
a4=a3[c(4,5,6)];a4
b=c(2,3,0,0,0,3);
if (a3[2]!=0&&a3[3]!=0&&a3[7]!=0&&sum(a4^2)==0)
{
Correct[i]=1;
}else
{Correct[i]=0;
}
if (a3[2]!=0&&a3[3]!=0&&a3[7]!=0&&sum(a4^2)!=0)
{
Overfit[i]=1;
}else
{Overfit[i]=0;
}
if (a3[2]==0|a3[3]==0|a3[7]==0)
{
Underfit[i]=1;
}else
{Underfit[i]=0;
}}
correct=mean(Correct)
overfit=mean(Overfit)
underfit=mean(Underfit)
data.frame(correct, overfit, underfit)
但是老师要求我们用lars