Memuat Package
library(tidyverse)
library(caret)
Dataset
cars
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
Grafik dari Dataset
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point()
Model
set.seed(1)
# Membagi Data Menjadi Training dan Test ----
sampled_cars <- cars %>%
mutate(training = sample(0:1,
nrow(cars),
replace = TRUE))
training_data <- sampled_cars %>%
filter(training == 1)
test_data <- sampled_cars %>%
filter(training == 0)
# Model Regresi Linear ----
model1 <- training_data %>%
lm(dist ~ speed, data = .)
model1
##
## Call:
## lm(formula = dist ~ speed, data = .)
##
## Coefficients:
## (Intercept) speed
## -10.466 3.377
summary(model1)
##
## Call:
## lm(formula = dist ~ speed, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.0646 -7.9998 -0.5707 5.0124 25.6884
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.4658 8.7180 -1.200 0.243
## speed 3.3765 0.4941 6.834 9.33e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.59 on 21 degrees of freedom
## Multiple R-squared: 0.6898, Adjusted R-squared: 0.6751
## F-statistic: 46.7 on 1 and 21 DF, p-value: 9.333e-07
Perbaikan Model
Berdasarkan output diatas terlihat bahwa intercept tidak signifikan (p-value = 0.243). Jadi intercept dihilangkan pada model.
set.seed(2)
training_data %>%
lm(dist ~ speed-1, data = .)
##
## Call:
## lm(formula = dist ~ speed - 1, data = .)
##
## Coefficients:
## speed
## 2.811
model1 <-
training_data %>%
lm(dist ~ speed-1, data = .)
training_data %>%
lm(dist ~ speed-1, data = .)%>%
summary
##
## Call:
## lm(formula = dist ~ speed - 1, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.218 -8.408 -2.542 3.349 25.538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## speed 2.8109 0.1503 18.71 5.36e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.72 on 22 degrees of freedom
## Multiple R-squared: 0.9409, Adjusted R-squared: 0.9382
## F-statistic: 350 on 1 and 22 DF, p-value: 5.36e-15
Validasi Menggunakan RMSE
rmse <- function(x,t) {
sqrt(mean(sum((t - x)^2)))
}
rmse(predict(model1, test_data),
test_data$dist)
## [1] 97.59294
Cross Validasi
set.seed(3)
random_group <-
function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n),
c(0, cumsum(probs)),
rightmost.closed = TRUE)
names(probs)[sample(g)]
}
partition <-
function(df, n, probs) {
replicate(n,
split(df, random_group(nrow(df),
probs)),
FALSE)
}
prediction_accuracy_cars <-
function(test_and_training) {
result <- vector(mode = "numeric",
length = length(test_and_training))
for (i in seq_along(test_and_training)) {
training <-
test_and_training[[i]]$training
test <- test_and_training[[i]]$test
model <- training %>%
lm(dist ~ speed-1, data = .)
predictions <-
predict(model, test)
targets <- test$dist
result[i] <- rmse(targets, predictions) }
result
}
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## [1] 69.23068 62.94201 93.80765 53.87501 75.10687
akurasi <-
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
mean(akurasi)
## [1] 72.70533
PLot Data dengan Prediksi
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x-1)
Memuat Package
library(party)
Dataset
cars
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
Grafik dari Dataset
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point()
Model
set.seed(11)
# Model Regresi Linear ----
model11 <- training_data %>%
ctree(dist ~ speed, data = .)
model11
##
## Conditional inference tree with 2 terminal nodes
##
## Response: dist
## Input: speed
## Number of observations: 23
##
## 1) speed <= 16; criterion = 1, statistic = 15.176
## 2)* weights = 9
## 1) speed > 16
## 3)* weights = 14
# Plot Tree
model11 %>% plot
Validasi menggunakan RMSE
rmse(predict(model11, test_data),
test_data$dist)
## [1] 114.7792
Cross Validations
set.seed(33)
random_group <-
function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n),
c(0, cumsum(probs)),
rightmost.closed = TRUE)
names(probs)[sample(g)]
}
partition <-
function(df, n, probs) {
replicate(n,
split(df, random_group(nrow(df),
probs)),
FALSE)
}
prediction_accuracy_cars <-
function(test_and_training) {
result <- vector(mode = "numeric",
length = length(test_and_training))
for (i in seq_along(test_and_training)) {
training <-
test_and_training[[i]]$training
test <- test_and_training[[i]]$test
model <- ctree(dist ~ speed, data = test)
predictions <-
predict(model, test)
targets <- test$dist
result[i] <- rmse(targets, predictions) }
result
}
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## [1] 90.30919 89.84260 75.98438 70.58417 79.78826
akurasi1 <-
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
mean(akurasi1)
## [1] 92.27278
Memuat Package
library(randomForest)
Dataset
cars
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
Grafik dari Dataset
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point()
Model
set.seed(111)
# Model Regresi Linear ----
model111 <- training_data %>%
randomForest(dist ~ speed, data = .)
model111
##
## Call:
## randomForest(formula = dist ~ speed, data = .)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 1
##
## Mean of squared residuals: 217.039
## % Var explained: 53.48
Validasi menggunakan RMSE
rmse(predict(model111, test_data),
test_data$dist)
## [1] 86.09424
Cross Validations
set.seed(333)
random_group <-
function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n),
c(0, cumsum(probs)),
rightmost.closed = TRUE)
names(probs)[sample(g)]
}
partition <-
function(df, n, probs) {
replicate(n,
split(df, random_group(nrow(df),
probs)),
FALSE)
}
prediction_accuracy_cars <-
function(test_and_training) {
result <- vector(mode = "numeric",
length = length(test_and_training))
for (i in seq_along(test_and_training)) {
training <-
test_and_training[[i]]$training
test <- test_and_training[[i]]$test
model <- randomForest(dist ~ speed, data = test)
predictions <-
predict(model, test)
targets <- test$dist
result[i] <- rmse(targets, predictions) }
result
}
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## [1] 28.55229 50.76490 35.16914 37.86496 61.55537
akurasi2 <-
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
mean(akurasi2)
## [1] 42.54693
Memuat Package
library(nnet)
Dataset
cars
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
Grafik dari Dataset
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point()
Model
set.seed(1111)
# Model Regresi Linear ----
model1111 <- training_data %>%
nnet(dist ~ speed, data = ., size = 5)
## # weights: 16
## initial value 59324.165942
## final value 58029.000000
## converged
model1111
## a 1-5-1 network with 16 weights
## inputs: speed
## output(s): dist
## options were -
Validasi menggunakan RMSE
rmse(predict(model1111, test_data),
test_data$dist)
## [1] 250.2519
Cross Validations
set.seed(3333)
random_group <-
function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n),
c(0, cumsum(probs)),
rightmost.closed = TRUE)
names(probs)[sample(g)]
}
partition <-
function(df, n, probs) {
replicate(n,
split(df, random_group(nrow(df),
probs)),
FALSE)
}
prediction_accuracy_cars <-
function(test_and_training) {
result <- vector(mode = "numeric",
length = length(test_and_training))
for (i in seq_along(test_and_training)) {
training <-
test_and_training[[i]]$training
test <- test_and_training[[i]]$test
model <- nnet(dist ~ speed, data = test, size= 5)
predictions <-
predict(model, test)
targets <- test$dist
result[i] <- rmse(targets, predictions) }
result
}
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## # weights: 16
## initial value 48451.302343
## final value 47931.000000
## converged
## # weights: 16
## initial value 35428.883955
## final value 34675.000000
## converged
## # weights: 16
## initial value 56878.548802
## final value 55467.000000
## converged
## # weights: 16
## initial value 53226.242589
## final value 52099.000000
## converged
## # weights: 16
## initial value 57069.054848
## final value 56250.000000
## converged
## [1] 218.9315 186.2122 235.5143 228.2521 237.1708
akurasi3 <-
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## # weights: 16
## initial value 52776.016601
## final value 51875.000000
## converged
## # weights: 16
## initial value 62544.367282
## final value 61402.000000
## converged
## # weights: 16
## initial value 49992.727867
## final value 49180.000000
## converged
## # weights: 16
## initial value 38652.804814
## final value 37643.000000
## converged
## # weights: 16
## initial value 50694.007678
## final value 49907.000000
## converged
mean(akurasi3)
## [1] 222.9475
Memuat Package
library(kernlab)
Dataset
cars
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
## 7 10 18
## 8 10 26
## 9 10 34
## 10 11 17
## 11 11 28
## 12 12 14
## 13 12 20
## 14 12 24
## 15 12 28
## 16 13 26
## 17 13 34
## 18 13 34
## 19 13 46
## 20 14 26
## 21 14 36
## 22 14 60
## 23 14 80
## 24 15 20
## 25 15 26
## 26 15 54
## 27 16 32
## 28 16 40
## 29 17 32
## 30 17 40
## 31 17 50
## 32 18 42
## 33 18 56
## 34 18 76
## 35 18 84
## 36 19 36
## 37 19 46
## 38 19 68
## 39 20 32
## 40 20 48
## 41 20 52
## 42 20 56
## 43 20 64
## 44 22 66
## 45 23 54
## 46 24 70
## 47 24 92
## 48 24 93
## 49 24 120
## 50 25 85
Grafik dari Dataset
cars %>%
ggplot(aes(x = speed, y = dist)) +
geom_point()
Model
set.seed(11111)
# Model Regresi Linear ----
model11111 <- training_data %>%
ksvm(dist ~ speed, data = .)
model11111
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 2.17636213288387
##
## Number of Support Vectors : 19
##
## Objective Function Value : -8.4675
## Training error : 0.271066
Validasi menggunakan RMSE
rmse(predict(model11111, test_data),
test_data$dist)
## [1] 97.90536
Cross Validations
set.seed(33333)
random_group <-
function(n, probs) {
probs <- probs / sum(probs)
g <- findInterval(seq(0, 1, length = n),
c(0, cumsum(probs)),
rightmost.closed = TRUE)
names(probs)[sample(g)]
}
partition <-
function(df, n, probs) {
replicate(n,
split(df, random_group(nrow(df),
probs)),
FALSE)
}
prediction_accuracy_cars <-
function(test_and_training) {
result <- vector(mode = "numeric",
length = length(test_and_training))
for (i in seq_along(test_and_training)) {
training <-
test_and_training[[i]]$training
test <- test_and_training[[i]]$test
model <- ksvm(dist ~ speed, data = test)
predictions <-
predict(model, test)
targets <- test$dist
result[i] <- rmse(targets, predictions) }
result
}
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
## [1] 58.55086 70.12731 79.32970 40.83409 75.87795
akurasi4 <-
cars %>%
partition(5, c(training = 0.6, test = 0.4)) %>%
prediction_accuracy_cars
mean(akurasi4)
## [1] 65.1345
tribble(
~RegresiLinear, ~DecisionTree, ~RandomForest, ~NeuralNetworks, ~SVM,
mean(akurasi), mean(akurasi1), mean(akurasi2), mean(akurasi3), mean(akurasi4)
)
## # A tibble: 1 x 5
## RegresiLinear DecisionTree RandomForest NeuralNetworks SVM
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 72.7 92.3 42.5 223. 65.1
RMSE paling kecil adalah Random forest, model terbaik menggunakan random forest.