全部版块 我的主页
论坛 数据科学与人工智能 数据分析与数据科学 R语言论坛
1771 2
2014-07-11
悬赏 50 个论坛币 未解决
## Chapter 11 Predicting Seabed Hardness Using Random Forest in R

setwd("C:/temp/")
hard <- read.table("hardness.csv", header = TRUE, sep = ",")
class(hard)
hard[hard==9999] <- NA
str(hard)
hard$hardness2 <- 0
hard$hardness2[hard$hardness == "hard"] <- 1
hard2 <- na.omit(hard)
dim(hard2)

library(randomForest)
tuneRF(hard2[,-c(1,17,18)], hard2[,17], ntreeTry = 100)
set.seed(123)
rf.1 <- randomForest(hard2[,-c(1,17,18)], hard2[,17], data = hard2,importance = TRUE, ntree = 500, proximity = TRUE)
varImpPlot(rf.1)
names(rf.1)

dev.rf1 <- predict(rf.1, hard2)
table(hard2[,17], dev.rf1)
result1 <- replicate(100,rfcv(hard2[,-c(1,17,18)], hard2[,17], scale ="non.log", cv.fold =10,step = -1), simplify = FALSE)
error.cv <- sapply(result1, "[[", "error.cv")
matplot(result1[[1]]$n.var, cbind(rowMeans(error.cv), error.cv), type = "l",lwd = c(2, rep(1, ncol(error.cv))), col = c(2, rep(1, ncol(error.cv))), lty = 1, xlab = "Number of variables", ylab = "CV Error")

plot(result1[[1]]$n.var, (1-rowMeans(error.cv))*100, type = "l", lwd = 2, col = 2, lty = 1, xlab = "Number of variables", ylab = "Correct classification rate (%)")

rf.cv <- function (trainx, trainy, cv.fold = 10,
    mtry = function(p) max(1, floor(sqrt(p))), ntree=500, ...) {
    classRF <- is.factor(trainy)
    n <- nrow(trainx)
    p <- ncol(trainx)
    cv.pred <- NULL
    if (classRF) {
        f <- trainy
    }     else {
        stop ("This function is only for categorical response variable")
    }
    nlvl <- table(f)
    idx <- numeric(n)
    for (i in 1:length(nlvl)) {
        idx[which(f == levels(f)[i])] <- sample(rep(1:cv.fold,
            length = nlvl[i]))
    }
    for (i in 1:cv.fold) {
        all.rf <- randomForest(trainx[idx != i, , drop = FALSE],
            trainy[idx != i], trainx[idx == i, , drop = FALSE],
            trainy[idx == i], mtry = mtry(p), ntree=ntree)
        cv.pred[idx == i] <- all.rf$test$predicted
    }

    require(psy)
        data1<-as.data.frame(cbind(cv.pred, trainy))
        kappa.cv<-ckappa(data1)$kappa
        ccr.cv<-sum(diag(table(data1)))/sum(table(data1))*100

    list(kappa.cv = kappa.cv, ccr.cv = ccr.cv, predicted = cv.pred)
}

ccr.cv.1 <-NULL; kappa.cv.1<-NULL; mccr.cv.1<-NULL; mkappa.cv.1 <-NULL
for (i in 1:100){
rfcv.1<-rf.cv(hard2[,-c(1,17,18)], hard2[,17],cv.fold =10, ntree = 500)
ccr.cv.1[i]<-rfcv.1$ccr.cv;kappa.cv.1[i]<-rfcv.1$kappa.cv
mccr.cv.1[i]<-mean(ccr.cv.1);mkappa.cv.1[i]<-mean(kappa.cv.1)}

x <- c(1:100)
par(mfrow=c(2,1), font.axis = 2, font.lab = 2)
plot(ccr.cv.1 ~ x, xlab = "Iteration", ylab = "Correct classification rate")
points(mccr.cv.1 ~ x, col = 2)
plot(kappa.cv.1 ~ x, xlab = "Iteration", ylab = "Kappa")
points(mkappa.cv.1 ~ x, col = 2)






二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

全部回复
2014-7-11 12:08:11
要数据和具体章节的可以加我QQ465620024,感激不尽
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

2014-7-11 12:15:55
还没学到能解决楼主问题的程度。。==
二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

相关推荐
栏目导航
热门文章
推荐文章

说点什么

分享

扫码加好友,拉您进群
各岗位、行业、专业交流群