在构造决策树模型我的数据量比较大,有几百万条数据,在决策树建模后出来的运行出来的图有点奇怪,但取样本的前4万条出来的图又是正常的。这是设置的参数的问题还是什么问题,求大神们指教!以下为代码:
# 4、决策树模型
#4.1今日购买量模型
library(rpart)
library(rpart.plot)
purchase_model2<-rpart(total_purchase_amt~mfd_daily_yield+mfd_7daily_yield+Interest_O_N+
Interest_1_W+Interest_2_W+Interest_1_M+Interest_3_M+Interest_6_M+
Interest_9_M+Interest_1_Y+白羊座+处女座+金牛座+巨蟹座+摩羯座+射手座+狮子座+
双鱼座+双子座+水瓶座+天秤座+天蝎座+X6081949+X6281949+X6301949+X6411949+
X6412149+X6481949+X6581949+X0+X1,data=purchase_train,method = "anova" )
#4.1.1绘制决策回归树
rpart.plot(purchase_model2,type=2,faclen=TRUE)
#4.1.2剪枝
#4.1.2.1取合适的cp值
plotcp(purchase_model2,minline=TRUE,lty=3,col=1,upper=c("size","splits","none"))
#4.1.2.2剪枝
p_model<-prune(purchase_model2,cp=0.038)
rpart.plot(purchase_model2,type=2,faclen=TRUE)
#4.1.3预测
purchase_test2<-predict(purchase_model2,purchase_test)
#4.2今日总赎回模型
redeem_model2<-rpart(total_redeem_amt~mfd_daily_yield+mfd_7daily_yield+Interest_O_N+
Interest_1_W+Interest_2_W+Interest_1_M+Interest_3_M+Interest_6_M+
Interest_9_M+Interest_1_Y+白羊座+处女座+金牛座+巨蟹座+摩羯座+射手座+狮子座+
双鱼座+双子座+水瓶座+天秤座+天蝎座+X6081949+X6281949+X6301949+X6411949+
X6412149+X6481949+X6581949+X0+X1,data=redeem_train,method = "anova" )
#4.2.1绘制决策回归树
rpart.plot(redeem_model2,type=2,faclen=TRUE)
#4.2.2剪枝
#4.2.2.1取合适的cp值
plotcp(redeem_model2,minline=TRUE,lty=3,col=1,upper=c("size","splits","none"))
#4.2.2.2剪枝
p_model<-prune(redeem_model2,cp=0.038)
rpart.plot(redeem_model2,type=2,faclen=TRUE)
#4.2.3预测
redeem_test2<-predict(redeem_model2,redeem_test)