# Classifying using the K-Nearest Neighbors (KNN) approach
#----------------------------------------------------------------
library(class)
library(caret)
vac <- read.csv("vacation-trip-classification.csv")
vac$Income.z <- scale(vac$Income)
vac$Family_size.z <- scale(vac$Family_size)
set.seed(1000)
train.idx <- createDataPartition(vac$Result, p = 0.5, list = FALSE)
train <- vac[train.idx, ]
temp <- vac[-train.idx, ]
val.idx <- createDataPartition(temp$Result, p = 0.5, list = FALSE)
val <- temp[val.idx, ]
test <- temp[-val.idx, ]
pred1 <- knn(train[,4:5], val[,4:5], train[,3], 1)
errmat1 = table(val$Result, pred1, dnn = c("Actual", "Predicted"))
pred.test <- knn(train[,4:5], test[,4:5], train[,3], 1)
errmat.test = table(test$Result, pred.test, dnn = c("Actual", "Predicted"))
knn.automate <- function (trg_predictors, val_predictors, trg_target, val_target, start_k, end_k)
{
for (k in start_k:end_k) {
pred <- knn(trg_predictors, val_predictors,
trg_target, k)
tab <- table(val_target, pred, dnn = c("Actual", "Predicted"))
cat(paste("Error matrix for k=", k,"\n"))
cat("==========================\n")
print(tab)
cat("--------------------------\n\n\n")
}
}
knn.automate(train[,4:5], val[,4:5], train[,3], val[,3], 1,7)
pred5 <- knn(train[4:5], val[,4:5], train[,3], 5, prob=TRUE)
pred5