原始数据
head(data)
大类 数量 天后下单
1 葡提类 5 1
2 葡提类 9 1
3 葡提类 6 1
4 葡提类 8 1
5 葡提类 9 1
6 枣 3 1
自己编的密度函数
密度
<-function(mydata,x){
myvar=var(mydata)
fx<-(2*pi_set(10000)*myvar)^-0.5*exp(-(x-mean(mydata))^2/2*myvar)
return(fx)}
naive.bayes.prediction<-function(condition.vec){
probility_1_day<-
sum((data[,1])==condition.vec[1]&data[,3]==1)/sum(data[,3]==1)*密度(data[,3],1)*密度(subset(data,天后下单==1)[,2],condition.vec[2])
probility_2_day<-
sum((data[,1])==condition.vec[1]&data[,3]==2)/sum(data[,3]==2)* 密度(data[,3],2) *密度(subset(data,天后下单==2)[,2],condition.vec[2])
probility_3_day<-
sum((data[,1])==condition.vec[1]&data[,3]==3)/sum(data[,3]==3)* 密度(data[,3],3) *密度(subset(data,天后下单==3)[,2],condition.vec[2])
probility_4_day<-
sum((data[,1])==condition.vec[1]&data[,3]==4)/sum(data[,3]==4)* 密度(data[,3],4)*密度(subset(data,天后下单==4)[,2],condition.vec[2])
probility_5_day<-
sum((data[,1])==condition.vec[1]&data[,3]==5)/sum(data[,3]==5)* 密度(data[,3],5) *密度(subset(data,天后下单==5)[,2],condition.vec[2])
probility_6_day<-
sum((data[,1])==condition.vec[1]&data[,3]==6)/sum(data[,3]==6)* 密度(data[,3],6) *密度(subset(data,天后下单==6)[,2],condition.vec[2])
probility_7_day<-
sum((data[,1])==condition.vec[1]&data[,3]==7)/sum(data[,3]==7)* 密度(data[,3],7) *密度(subset(data,天后下单==7)[,2],condition.vec[2])
天数<-c('probility_1_day','probility_2_day','probility_2_day'
)
day<-c('probility_1_day','probility_2_day','probility_3_day','probility_4_day','probility_5_day','probility_6_day','probility_7_day')
dentisty<-c(probility_1_day,probility_2_day,probility_3_day,probility_4_day,probility_5_day,probility_6_day,probility_7_day)
jieguo<-data.frame(day,dentisty)
return(jieguo)}
跑一下
> naive.bayes.prediction(c("葡提类",4))
Error in x - mean(mydata) : 二进列运算符中有非数值参数
老是出现错误
sum((data[,1])=="葡提类"&data[,3]==4)/sum(data[,3]==4)*密度(subset(data,天后下单==4)[,2],4)*密度(data[,3],4)
[1] 1.074741e-06
挨个拎出来又可以,实在是找不出原因,望大神相助