我觉得可以有以下几种方法:
1、直接删除
2、均值替代
3、最邻近方法补充,代码如下:
library(cluster)
tData2<-tData[1:500,]
dist.mtx <- as.matrix(daisy(tData2,stand=T))
central.value <- function(x) {
if (is.numeric(x)) median(x,na.rm=T)
else if (is.factor(x)) levels(x)[which.max(table(x))]
else {
f <- as.factor(x)
levels(f)[which.max(table(f))]
}
}
for(r in which(!complete.cases(tData2)))
{
tData2[r,which(is.na(tData2[r,]))] <-
apply(data.frame(tData2[c(as.integer(names(sort(dist.mtx[r,])[2:11]))),
which(is.na(tData2[r,]))]), 2,central.value)
}