iris
k<-levels(iris$Species);k##把数据框的种类列数字化##
gop=iris$Species
u<-nrow(iris[gop==k[1],])
w1=rep(1,u)
w1=as.numeric(w1)
q<-nrow(iris[gop==k[2],])
w2=rep(2,q)
w2=as.numeric(w2)
z<-nrow(iris[gop==k[3],])
w3=rep(3,z)
w3=as.numeric(w3)
v<-c(w1,w2,w3)
names(v)="Species"
iris1<-cbind(iris[,-5],v)
iris1
##均值的检验##
two.mu0<-function(data1,data2,alpha)
{
n1=nrow(data1)
n2=nrow(data2)
p=ncol(data1)
A1=(n1-1)*var(data1)
A2=(n2-1)*var(data2)
w=A1+A2
xbar=apply(data1,2,mean)
ybar=apply(data2,2,mean)
T0=(n1*n2/(n1+n2))*(n1+n2-2)*t(xbar-ybar)%*%solve(w)%*%(xbar-ybar)
F0=(n1+n2-p-1)/((n1+n2-2)*p)*T0
a1=qf(1-alpha,p,n1+n2-p-1)
p_v=1-pf(F0,p,n1+n2-p-1)
rj=c(F0,a1)
names(rj)=c("F0","临界值")
if(F0>a1)
result=c("拒绝原假设")
else
result=c("接受原假设")
return(list(p.value=p_v,adjust=rj,result=result))
}
##马氏距离判别##
library(dplyr)
dis<-function(data,k)
{
n=nrow(data)
m=ncol(data)
gop=data[,m]
x.bar=matrix(0,m-1,k)
r=matrix(0,k,n)
sp=rep(0,k)
d<-matrix(0,nrow=k,ncol=k)
for(i in 1:k)##两两之间进行均值是否显著的判断##
{ for(j in 1:k)
{ if(i!=j)
{
data1=data[gop==i,]
data2=data[gop==j,]
two.mu0(data1,data2,0.01)
d[i,j]=two.mu0$p.value
}
}
}
if(max(d)<0.01)##若两两之间均值均显著则进行马氏距离的判别##
{
for(p in 1:k)
{
datap=data[gop==p,]
np=nrow(datap)
x.bar[,p]=apply(datap[,1:m-1],2,mean)
for(q in 1:n) r[p,q]=t(data[q,1:m-1]-x.bar[,p])%*%solve(var(datap[,1:m-1]))%*%(data[q,1:m-1]-x.bar[,p])
sp[p]=np
}
critical=apply(r,2,which.min)##判别结果##
Data=cbind(data,critical)
wr=n-nrow(subset(Data,Data[,m]%in%Data[,m+1]))
er=wr/n
result=list("样本总量"=n,"判别结果"=critical,"判错率"=wr)
return(result)
}
else
print("data error")
}
iris1=as.matrix(iris1)
dis(iris1,3)