#方法1:采用大于四分位数3倍间距作为离群点。
outlier.IQR <- function(x, multiple = 1.5, replace = T, revalue = mean(x)) {
q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
IQR <- q[4] - q[2]
x1 <- which(x < q[2] - multiple * IQR | x > q[4] + multiple * IQR)
x2 <- x[x1]
if (length(x2) > 0) outlier <- data.frame(location = x1, value = x2)
else outlier <- data.frame(location = 0, value = 0)
if (replace == TRUE) {
x[x1] <- revalue
}
return(list(new.value = x, outlier = outlier))
}
lla=apply(d2016[,3:17],2,outlier.IQR)
x1=lla$x1$new.value
x2=lla$x2$new.value
x3=lla$x3$new.value
x4=lla$x4$new.value
x5=lla$x5$new.value
x6=lla$x6$new.value
x7=lla$x7$new.value
x8=lla$x8$new.value
x9=lla$x9$new.value
x10=lla$x10$new.value
x11=lla$x11$new.value
x12=lla$x12$new.value
x13=lla$x13$new.value
x14=lla$x14$new.value
x15=lla$x15$new.value
x_ycz=cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15)
write.csv(x_ycz,"C:/myR/2018-03-04/x_ycz2014.csv",row.names = T)
#方法2:3 探测方法(切比雪夫不等式)。
outlier.q<- function(x, replace = T, revalue = mean(x)) {
q1=mean(x)+3*sd(x)
q2=mean(x)-3*sd(x)
x1 <- which(x < q2 | x > q1)
x2 <- x[x1]
if (length(x2) > 0) outlier <- data.frame(location = x1, value = x2)
else outlier <- data.frame(location = 0, value = 0)
if (replace == TRUE) {
x[x1] <- revalue
}
return(list(new.value = x, outlier = outlier))
}
lla=apply(d2016[,3:17],2,outlier.q)
有参考了其他人的,顺便根据第一个修改了第二个。后面也很繁琐。还没修改。不过有需要的人可以大概参考下。revalue是采用mean()均值代替异常值。