library(readxl)
library(car)
## Loading required package: carData
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lapply(c("glmnet","lmridge"),library,character.only=T)[[1]]
## Loading required package: Matrix
## Loaded glmnet 4.1-7
##
## Attaching package: 'lmridge'
## The following object is masked from 'package:car':
##
## vif
## [1] "glmnet" "Matrix" "lmtest" "zoo" "car" "carData"
## [7] "readxl" "stats" "graphics" "grDevices" "utils" "datasets"
## [13] "methods" "base"
data<-read_excel("D:/Analisis Regresi/Buat TA Reg/World Happiness Report 2023.xlsx",sheet="2")
x<-data.matrix(data[, c('X1', 'X2', 'X3', 'X4','X5','X6')])
y<-data$Y
X1<-data$X1
X2<-data$X2
X3<-data$X3
X4<-data$X4
X5<-data$X5
X6<-data$X6
modelrlb<-lm(y~X1+X2+X3+X4+X5+X6, data=data)
modelrlb
##
## Call:
## lm(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -1.328421 0.252062 0.009613 3.734823 1.916248 0.081909
## X6
## -0.830771
summary(modelrlb)
##
## Call:
## lm(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.71680 -0.23868 0.04814 0.27118 0.96430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.328421 0.582329 -2.281 0.024214 *
## X1 0.252062 0.063426 3.974 0.000118 ***
## X2 0.009613 0.006557 1.466 0.145091
## X3 3.734823 0.532860 7.009 1.29e-10 ***
## X4 1.916248 0.467708 4.097 7.44e-05 ***
## X5 0.081909 0.307475 0.266 0.790373
## X6 -0.830771 0.269129 -3.087 0.002488 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4657 on 126 degrees of freedom
## Multiple R-squared: 0.8062, Adjusted R-squared: 0.797
## F-statistic: 87.38 on 6 and 126 DF, p-value: < 2.2e-16
car::vif(modelrlb)
## X1 X2 X3 X4 X5 X6
## 3.448872 1.654767 2.498448 1.425609 1.168867 1.401088
Dari pemeriksaan multikolinearitas terbukti nilai VIF tiap peubah penjelas tidak ada yang nilainya \[>10\] sehingga dapat dinyatakan tidak ada multikolinearitas.
cv.r<-cv.glmnet(x,y,alpha=0);plot(cv.r)
### Regresi Ridge
best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr);coef(bestridge)
## 7 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -1.19093187
## X1 0.24922223
## X2 0.01141737
## X3 3.43457896
## X4 1.90803971
## X5 0.08135685
## X6 -0.80080299
summary(bestridge)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 5 -none- call
## nobs 1 -none- numeric
Fungsi Rsquare
rsq<-function(bestmodel,bestlambda,x,y){
#y duga
y.duga <- predict(bestmodel, s = bestlambda, newx = x)
#JKG dan JKT
jkt <- sum((y - mean(y))^2)
jkg <- sum((y.duga- y)^2)
#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq)
}
rsq(bestridge,best.lr,x,y)
## [1] 0.8050545
y_pred_ridge <- predict(bestridge, s = 0.01, newx = x) # Sesuaikan dengan nilai lambda yang sesuai
RSE_ridge <- round(sqrt(sum((y - y_pred_ridge)^2) / (length(y) - ncol(x) - 1)),4)
cv.l<-cv.glmnet(x,y,alpha=1);plot(cv.l)
### Regresi LASSO
best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)
## 7 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -1.093497317
## X1 0.249866554
## X2 0.007886511
## X3 3.635945532
## X4 1.811847022
## X5 .
## X6 -0.744288580
rsq(bestlasso,best.ll,x,y)
## [1] 0.8045828
y_pred_lasso <- predict(bestlasso, s = 0.01, newx = x) # Sesuaikan dengan nilai lambda yang sesuai
RSE_lasso <- round(sqrt(sum((y - y_pred_lasso)^2) / (length(y) - ncol(x) - 1)),4)
RSE_lasso
## [1] 0.4677
lmr<-lmridge(y~X1+X2+X3+X4+X5+X6,data=data,scaling="centered");plot(lmr)
summary(lmr)
##
## Call:
## lmridge.default(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data,
## scaling = "centered")
##
##
## Coefficients: for Ridge parameter K= 0
## Estimate Estimate (Sc) StdErr (Sc) t-value (Sc) Pr(>|t|)
## Intercept -1.3284 -1.3284 0.9511 -1.3968 0.1649
## X1 0.2521 0.2521 0.0632 3.9898 0.0001 ***
## X2 0.0096 0.0096 0.0065 1.4720 0.1435
## X3 3.7348 3.7348 0.5308 7.0368 <2e-16 ***
## X4 1.9163 1.9162 0.4659 4.1133 0.0001 ***
## X5 0.0819 0.0819 0.3063 0.2674 0.7896
## X6 -0.8308 -0.8308 0.2681 -3.0991 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Ridge Summary
## R2 adj-R2 DF ridge F AIC BIC
## 0.80620 0.79860 6.00004 88.06871 -198.46046 469.29818
## Ridge minimum MSE= 0.668424 at K= 0
## P-value for F-test ( 6.00004 , 126.9999 ) = 7.650318e-43
## -------------------------------------------------------------------
# Membuat data untuk tabel
metode <- c("RLB", "Ridge", "Lasso")
Adj.R2 <- c( 0.797, 0.8050,0.8048)
RSE<- c(0.4657,0.4671,0.4677)
# Membuat data frame
data_tabel <- data.frame(Metode = metode, `Adjusted R-Square` = Adj.R2, RSE=RSE)
data_tabel
## Metode Adjusted.R.Square RSE
## 1 RLB 0.7970 0.4657
## 2 Ridge 0.8050 0.4671
## 3 Lasso 0.8048 0.4677
Berdasarkan perbandingan 3 metode diperoleh model terbaik yaitu Ridge Regression dengan Adjusted R-Square terbesar senilai 0.8050 dan RSE terkecil senilai 0.4671.
Model yang terbentuk adalah : \[-1.19093187+0.24922223X_1+0.01141737X_2+3.43457896X_3+1.90803971X_4+0.08135685X_5-0.80080299X_6\]
PDB per kapita (X1),Dukungan sosial(X3),Kebebasan dalam menentukan pilihan hidup(X4) secara signifikan memengaruhi Skor Kebahagiaan (Y). Sedangkan Persepsi korupsi(X6), Harapan hidup sehat(X2), dan Kemurahan hati(X5) teruji tidak signifikan. Namun pada Regresi Ridge peubah yang tidak signifikan tetap dimasukkan dalam model. Koefisien regresi X1,X2,X3,X4,X5 bernilai positif sehingga semakin tinggi nilai peubah-peubah tersebut maka akan meningkatkan skor kebahagiaan. Sedangkan semakin tinggi nilai peubah X6 maka akan menurunkan skor kebahagiaan.