pacman:: p_load(caret, party, reshape, ggplot2, dplyr)
x <- runif(50, min = 0, max = 100) # generate 50 random number from 0 to 100
z <- runif(50, min = 0, max = 100)
a <- runif(50, min = 0, max = 100)
b <- runif(50, min = 0, max = 100)
y <- runif(50, min = 0, max = 100)
df <- as.data.frame(cbind(x,z,a,b,y))
train model
set.seed(100)
in_training <- createDataPartition(df$y, p = 0.7, list = F)
train <- df[in_training,]
test <- df[-in_training,]
a <- c("lm", "rf","knn", "svmLinear", "svmRadial")
compare.model <- c()
for(i in a) {
model <- train(y ~., data = train, method = i)
pred <- predict(model, newdata = test)
pred.metric <- postResample(test$y, pred)
compare.model <- cbind(pred.metric, compare.model)
}
compare.model <- compare.model[ , seq(ncol(compare.model), 1 , -1)] # Invert order of dataframe (from last column to first)
colnames(compare.model) <- a
compare.model
## lm rf knn svmLinear svmRadial
## RMSE 34.07731030 36.55747844 30.86533537 36.84210580 32.27818991
## Rsquared 0.06713161 0.03756654 0.04125198 0.05728815 0.02541492
## MAE 29.93277833 30.00960300 26.24757928 32.63475051 27.23245666
compare.model.melt <- melt(compare.model, varnames = c("metric", "model"))
compare.model.melt <- as.data.frame(compare.model.melt)
compare.model.melt
## metric model value
## 1 RMSE lm 34.07731030
## 2 Rsquared lm 0.06713161
## 3 MAE lm 29.93277833
## 4 RMSE rf 36.55747844
## 5 Rsquared rf 0.03756654
## 6 MAE rf 30.00960300
## 7 RMSE knn 30.86533537
## 8 Rsquared knn 0.04125198
## 9 MAE knn 26.24757928
## 10 RMSE svmLinear 36.84210580
## 11 Rsquared svmLinear 0.05728815
## 12 MAE svmLinear 32.63475051
## 13 RMSE svmRadial 32.27818991
## 14 Rsquared svmRadial 0.02541492
## 15 MAE svmRadial 27.23245666
for(i in c("RMSE","Rsquared", "MAE")) {
metric <- compare.model.melt %>% filter(metric == i)
f <- ggplot(metric, aes(model,value))
print(f + geom_bar(stat = "identity") + ggtitle(i))
}
a <- c("y ~ x + a", "y ~ z + a", "y ~ a")
compare.var <- c()
for ( i in a) {
model <- train(formula(i), data = train, method = "lm")
pred <- predict(model, newdata = test)
pred.metric <- postResample(test$y, pred)
compare.var <- cbind(pred.metric, compare.var)
}
compare.var <- compare.var[ , seq(ncol(compare.var), 1 , -1)]
colnames(compare.var) <- a
compare.var
## y ~ x + a y ~ z + a y ~ a
## RMSE 31.41514469 35.6888815 31.0029861
## Rsquared 0.05745741 0.2978386 0.1308906
## MAE 27.21851663 31.6735311 27.1076748
a <- c("y ~ x + a", "y ~ z + a", "y ~ a")
b <- c("lm", "knn")
compare.var.mod <- c()
for ( i in a) {
for (j in b) {
model <- train(formula(i), data = train, method = j)
pred <- predict(model, newdata = test)
pred.metric <- postResample(test$y, pred)
compare.var.mod <- cbind(pred.metric, compare.var.mod)
}
}
compare.var.mod <- compare.var.mod[ , seq(ncol(compare.var.mod), 1 , -1)]
x <- as.vector(outer(a, b, paste, sep="."))
x <- x[c(1,4,2,5,3,6)]
colnames(compare.var.mod) <- x
compare.var.mod
## y ~ x + a.lm y ~ x + a.knn y ~ z + a.lm y ~ z + a.knn y ~ a.lm
## RMSE 31.41514469 32.14527450 35.6888815 34.36670813 31.0029861
## Rsquared 0.05745741 0.01075993 0.2978386 0.05701168 0.1308906
## MAE 27.21851663 26.79961609 31.6735311 29.78991248 27.1076748
## y ~ a.knn
## RMSE 30.90856184
## Rsquared 0.03143209
## MAE 27.29233553