[backcolor=rgba(255, 255, 255, 0.917969)]求助大家,我需要把boston housing data分成两个set,400个data在training set 里面, 106个在test.set里。
[backcolor=rgba(255, 255, 255, 0.917969)]我需要对trainingset 的data用ordinary least square method和ridge regression method 去fit。程序到最后一步出错了,大家帮忙看看行吗
[backcolor=rgba(255, 255, 255, 0.917969)]
[backcolor=rgba(255, 255, 255, 0.917969)]library(mlbench)
data(BostonHousing)
attach(BostonHousing)
pairs(BostonHousing, main="BostonHousing data",pch=".",cex=3)
BostonHousing$lstat <- log(BostonHousing$lstat)
BostonHousing$rm <- BostonHousing$rm^2
BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1,labels = c("no", "yes"))
BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE)
x=seq(1,506,1)
y=sample(x,size=506,replace=F)
train.set=BostonHousing[y[1:400],]
test.set=BostonHousing[y[401:506],]
#OLS method
boston.lm = lm( medv~.,data=train.set)
summary(boston.lm)
opar=par(mfrow=c(2,2),oma=c(0,0,1.1,0),mar=c(4.1,4.1,2.1,1.1))
plot(boston.lm)
par(opar)
[backcolor=rgba(255, 255, 255, 0.917969)]
[backcolor=rgba(255, 255, 255, 0.917969)]#ridge method
[backcolor=rgba(255, 255, 255, 0.917969)]options(continue=" ")
ridge=function(x,y,lam){
n=length(y)
p=ncol(x)
xm=apply(x,2,mean)
xs=apply(x,2,sd)
ym=mean(y)
ys=sd(y)
X=t((t(x)-xm)/xs)#standardized x matrix
Y=(y-ym)/ys #standardized x matrix
tmp1=t(X)%*%X
tmp2=t(X)%*%Y
l.lam=length(lam)
out=list(gcv=rep(0,l.lam),coef=matrix(0,(p+1),l.lam),coefs=matrix(0,p,l.lam))
for (i in 1:l.lam){
tmp3=solve(tmp1+diag(lam,p))
beta=as.vector(tmp3%*%tmp2) #standardized coefficient
tmp4=X%*%tmp3%*%t(X)
yhat=tmp4%*%Y
out$gcv=mean((Y-yhat)^2)/((1-mean(diag(temp4)))^2) #GCV values
coef=(ys/xs)*beta
intercept=ym-sum(coef*xm)
out$coef[,i]=c(intercept,coef)
out$coefs[,i]=beta
}
out
}
lam=seq(0,0.1,0.001)
try=ridge(train.set[,1:13],train.set[,14],lam)