library(readxl)
library(e1071)
library(tidyverse)
library(lmtest)
library(broom)
library(MASS)
library(car)
library(caret)
Pra-proses
data <- read_xlsx("C:/Users/Nabil Izzany/Documents/Kuylah/Semester 6/Teknik Pembelajaran Mesin/data tugas akhir tpm.xlsx", sheet = 1)
data <- na.omit(data)
data$FDI <- scale(data$FDI)
data$GFCF <- scale(data$GFCF)
data$TL <- scale(data$TL)
data$GOED <- scale(data$GOED)
data$INFL <- scale(data$INFL)
set.seed(7)
itrain <- sample(c(TRUE, FALSE), nrow(data), replace=TRUE, prob=c(0.8,0.2))
train <- data[itrain,]
test <- data[!itrain,]
head(data)
## # A tibble: 6 × 7
## `Country Name` GDP FDI[,1] GFCF[,1] TL[,1] GOED[,1] INFL[,1]
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Papua New Guinea 2594. -0.255 -2.26 -0.233 -1.32 -0.0638
## 2 Fiji 5968. -0.255 -1.81 -0.269 0.274 -0.167
## 3 Algeria 4022. -0.228 -1.71 -0.0990 0.760 -0.159
## 4 North America 63199. 8.97 -1.62 2.38 0.186 -0.162
## 5 Ecuador 6233. -0.239 -1.43 -0.156 -0.123 -0.240
## 6 Jamaica 5626. -0.246 -1.38 -0.253 0.319 -0.0649
str(data)
## tibble [123 × 7] (S3: tbl_df/tbl/data.frame)
## $ Country Name: chr [1:123] "Papua New Guinea" "Fiji" "Algeria" "North America" ...
## $ GDP : num [1:123] 2594 5968 4022 63199 6233 ...
## $ FDI : num [1:123, 1] -0.255 -0.255 -0.228 8.966 -0.239 ...
## ..- attr(*, "scaled:center")= num 1.04e+10
## ..- attr(*, "scaled:scale")= num 3.94e+10
## $ GFCF : num [1:123, 1] -2.26 -1.81 -1.71 -1.62 -1.43 ...
## ..- attr(*, "scaled:center")= num 23.9
## ..- attr(*, "scaled:scale")= num 7.33
## $ TL : num [1:123, 1] -0.233 -0.269 -0.099 2.378 -0.156 ...
## ..- attr(*, "scaled:center")= num 19406730
## ..- attr(*, "scaled:scale")= num 70825793
## $ GOED : num [1:123, 1] -1.322 0.274 0.76 0.186 -0.123 ...
## ..- attr(*, "scaled:center")= num 4.49
## ..- attr(*, "scaled:scale")= num 2.12
## $ INFL : num [1:123, 1] -0.0638 -0.1675 -0.1589 -0.1623 -0.24 ...
## ..- attr(*, "scaled:center")= num 5.25
## ..- attr(*, "scaled:scale")= num 20.8
## - attr(*, "na.action")= 'omit' Named int [1:94] 3 6 8 10 22 24 33 37 42 43 ...
## ..- attr(*, "names")= chr [1:94] "3" "6" "8" "10" ...
data1 <- data[,-1]
cor(data1)
## GDP FDI GFCF TL GOED INFL
## GDP 1.000000000 0.32576377 -0.07824416 0.003433851 0.16273770 -0.23705632
## FDI 0.325763768 1.00000000 -0.10368061 0.530536562 0.02284193 -0.04431541
## GFCF -0.078244160 -0.10368061 1.00000000 -0.024194872 0.02109307 0.12290748
## TL 0.003433851 0.53053656 -0.02419487 1.000000000 -0.04209653 0.05534658
## GOED 0.162737698 0.02284193 0.02109307 -0.042096529 1.00000000 -0.11031056
## INFL -0.237056317 -0.04431541 0.12290748 0.055346580 -0.11031056 1.00000000
Regresi Linier
Model
reg <- lm(GDP~ FDI+GFCF+GOED+TL+INFL, data = train)
summary(reg)
##
## Call:
## lm(formula = GDP ~ FDI + GFCF + GOED + TL + INFL, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26482 -11524 -5851 5986 74070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15546 1798 8.647 1.19e-13 ***
## FDI 7282 2085 3.492 0.000726 ***
## GFCF -1941 1812 -1.071 0.286760
## GOED 3508 1740 2.017 0.046536 *
## TL -2813 3198 -0.880 0.381261
## INFL -4446 1879 -2.366 0.019969 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18040 on 96 degrees of freedom
## Multiple R-squared: 0.2311, Adjusted R-squared: 0.1911
## F-statistic: 5.772 on 5 and 96 DF, p-value: 0.0001059
Uji Asumsi
##===== Fungsi Identifikasi Asumsi Klasik Regresi Linier =====
#Install dulu package randtests, lmtest, skedastic, tseries
uji_asumsi <- function(modelreg, autocol.test = c("runs", "dw", "bgodfrey"),
homos.test = c("bp", "glejser", "ncv"),
normal.test = c("ks", "shapiro.w", "jb"), taraf.nyata=0.05){
#Hipoteisis: H0: Asumsi terpenuhi vs H1: Asumsi tidak terpenuhi
sisaan <- modelreg$residuals
uji_t <- c()
autokol <- c()
homoskedastisitas <- c()
ks <- c()
p_value <- matrix(NA, ncol = 1, nrow = 4)
a <- t.test(sisaan,
mu = 0,
conf.level = 0.95)
p_value[1,1] <- round(a$p.value,3)
if(a$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
uji_t <- hasil
if (autocol.test == "runs"){
b <- randtests::runs.test(sisaan)
p_value[2,1] <- round(b$p.value,3)
if(b$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
autokol <- hasil
}else if (autocol.test == "dw") {
b <- lmtest::dwtest(modelreg)
p_value[2,1] <- round(b$p.value,3)
if(b$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
autokol <- hasil
}else if (autocol.test == "bgodfrey") {
b <- lmtest::bgtest(modelreg)
p_value[2,1] <- round(b$p.value,3)
if(b$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
autokol <- hasil
}
if (homos.test == "bp") {
c <- lmtest::bptest(modelreg)
p_value[3,1] <- round(c$p.value,3)
if(c$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
homoskedastisitas <- hasil
} else if (homos.test == "glejser") {
c <- skedastic::glejser(modelreg)
p_value[3,1] <- round(c$p.value,3)
if(c$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
homoskedastisitas <- hasil
} else if (homos.test == "ncv") {
c <- car::ncvTest(modelreg)
p_value[3,1] <- round(c$p,3)
if(c$p<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
homoskedastisitas <- hasil
}
if (normal.test == "ks"){
d <- ks.test(sisaan, "pnorm")
p_value[4,1] <- round(d$p.value,3)
if(d$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
ks <- hasil
} else if (normal.test == "shapiro.w") {
d <- shapiro.test(sisaan)
p_value[4,1] <- round(d$p.value,3)
if(d$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
ks <- hasil
} else if (normal.test == "jb") {
d <- tseries::jarque.bera.test(sisaan)
p_value[4,1] <- round(d$p.value,3)
if(d$p.value<taraf.nyata){hasil = "Tolak H0"}else{hasil="Tak Tolak H0"}
ks <- hasil
}
keputusan <- rbind(uji_t, autokol, homoskedastisitas, ks)
tabel <- data.frame(p_value, keputusan)
colnames(tabel) <- c("P-value", "Keputusan")
rownames(tabel) <- c("E(sisaan) = 0", "Non-autokorelasi", "Homoskedastisitas",
"Normality")
duga <- plot(modelreg,1)
kuantil <- plot(modelreg,2)
urutan <- plot(x = 1:length(sisaan),
y = sisaan,
type = 'b',
ylab = "Residuals",
xlab = "Observation")
print(tabel, duga, kuantil, urutan)
}
uji_asumsi(reg, autocol.test = "dw", homos.test = "bp", normal.test = "shapiro.w")
## P-value Keputusan
## E(sisaan) = 0 1.000 Tak Tolak H0
## Non-autokorelasi 0.929 Tak Tolak H0
## Homoskedastisitas 0.089 Tak Tolak H0
## Normality 0.000 Tolak H0
Penanganan Asumsi
b <- boxcox(lm(GDP~ FDI+GFCF+GOED+TL+INFL, data = train))
lambda <- b$x[which.max(b$y)]
lambda
## [1] -0.02020202
reg.bc <- lm((GDP^-0.02020202)~ FDI+GFCF+GOED+TL+INFL, data = train)
uji_asumsi(reg.bc, autocol.test = "dw", homos.test = "bp", normal.test = "ks")
## P-value Keputusan
## E(sisaan) = 0 1.000 Tak Tolak H0
## Non-autokorelasi 0.606 Tak Tolak H0
## Homoskedastisitas 0.046 Tolak H0
## Normality 0.000 Tolak H0
Ditransformasi semakin membuat uji asumsi semakin banyak yang tidak terpenuhi.
step(lm(GDP~ FDI+GFCF+GOED+TL+INFL, data = data), direction="both", trace = 10)
## Start: AIC=2438.5
## GDP ~ FDI + GFCF + GOED + TL + INFL
##
## Df Sum of Sq RSS AIC
## - GFCF 1 19325557 4.5466e+10 2436.6
## <none> 4.5447e+10 2438.5
## - GOED 1 859849191 4.6307e+10 2438.8
## - TL 1 1695376038 4.7142e+10 2441.0
## - INFL 1 2010155996 4.7457e+10 2441.8
## - FDI 1 7128924742 5.2576e+10 2454.4
##
## Step: AIC=2436.55
## GDP ~ FDI + GOED + TL + INFL
##
## Df Sum of Sq RSS AIC
## <none> 4.5466e+10 2436.6
## - GOED 1 851198476 4.6318e+10 2436.8
## + GFCF 1 19325557 4.5447e+10 2438.5
## - TL 1 1706787453 4.7173e+10 2439.1
## - INFL 1 2087481848 4.7554e+10 2440.1
## - FDI 1 7274242127 5.2741e+10 2452.8
##
## Call:
## lm(formula = GDP ~ FDI + GOED + TL + INFL, data = data)
##
## Coefficients:
## (Intercept) FDI GOED TL INFL
## 16352 9153 2662 -4438 -4181
reg.best <- lm(formula = GDP ~ FDI + GOED + TL + INFL, data = data)
summary(reg.best)
##
## Call:
## lm(formula = GDP ~ FDI + GOED + TL + INFL, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23959 -11703 -6536 5542 95158
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16352 1770 9.239 1.27e-15 ***
## FDI 9153 2107 4.345 2.97e-05 ***
## GOED 2662 1791 1.486 0.1399
## TL -4438 2109 -2.105 0.0374 *
## INFL -4181 1796 -2.328 0.0216 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19630 on 118 degrees of freedom
## Multiple R-squared: 0.2031, Adjusted R-squared: 0.176
## F-statistic: 7.516 on 4 and 118 DF, p-value: 1.984e-05
uji_asumsi(reg.best, autocol.test = "dw", homos.test = "bp", normal.test = "shapiro.w")
## P-value Keputusan
## E(sisaan) = 0 1.000 Tak Tolak H0
## Non-autokorelasi 0.899 Tak Tolak H0
## Homoskedastisitas 0.486 Tak Tolak H0
## Normality 0.000 Tolak H0
Support Vector Machine
svm.model <- svm(GDP~ FDI+GFCF+GOED+TL+INFL, data = train)
summary(svm.model)
##
## Call:
## svm(formula = GDP ~ FDI + GFCF + GOED + TL + INFL, data = train)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.2
## epsilon: 0.1
##
##
## Number of Support Vectors: 83
Tuning Parameter
tuningsvm <- tune(svm, GDP ~ FDI+GFCF+GOED+TL+INFL ,data=train,
ranges=list(kernel=c("linear","polynomial","radial","sigmoid")))
print(tuningsvm)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## kernel
## radial
##
## - best performance: 369812621
tuningsvm$best.model
##
## Call:
## best.tune(METHOD = svm, train.x = GDP ~ FDI + GFCF + GOED + TL +
## INFL, data = train, ranges = list(kernel = c("linear", "polynomial",
## "radial", "sigmoid")))
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.2
## epsilon: 0.1
##
##
## Number of Support Vectors: 83
tuningsvm$performances
## kernel error dispersion
## 1 linear 390700962 290340185
## 2 polynomial 12229289683 37094608737
## 3 radial 369812621 275214305
## 4 sigmoid 1377674022 1588647646
tuningsvm2 <- tune(svm, GDP ~ FDI+GFCF+GOED+TL+INFL ,data=train, kernel = "radial",
ranges = list(epsilon = seq(0, 1, 0.1), cost = 2^(-2:12)))
plot(tuningsvm2)
svm.best <- tuningsvm2$best.model
summary(svm.best)
##
## Call:
## best.tune(METHOD = svm, train.x = GDP ~ FDI + GFCF + GOED + TL +
## INFL, data = train, ranges = list(epsilon = seq(0, 1, 0.1), cost = 2^(-2:12)),
## kernel = "radial")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 2
## gamma: 0.2
## epsilon: 0.3
##
##
## Number of Support Vectors: 62
Perbandingan
library(Metrics)
reg.test <- predict(reg.best, test)
svm.test <- predict(svm.best, test)
mape_reg <- mape(test$GDP, reg.test)
mape_svm <- mape(test$GDP, predict(svm.model, test))
banding <- format(matrix(c(mape_reg, mape_svm), ncol = 2, byrow = TRUE), scientific = T, digits = 5)
colnames(banding) <- c("Regresi", "SVM")
row.names(banding) <- c("MAPE")
knitr::kable(banding)
| Regresi | SVM | |
|---|---|---|
| MAPE | 7.4878e+00 | 5.9006e+00 |