Tugas 1 PSD

Library

library(readxl)
library(car)

## Loading required package: carData

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

lapply(c("glmnet","lmridge"),library,character.only=T)[[1]]

## Loading required package: Matrix

## Loaded glmnet 4.1-7

## 
## Attaching package: 'lmridge'

## The following object is masked from 'package:car':
## 
##     vif

##  [1] "glmnet"    "Matrix"    "lmtest"    "zoo"       "car"       "carData"  
##  [7] "readxl"    "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [13] "methods"   "base"

Baca Data

data<-read_excel("D:/Analisis Regresi/Buat TA Reg/World Happiness Report 2023.xlsx",sheet="2")

x<-data.matrix(data[, c('X1', 'X2', 'X3', 'X4','X5','X6')])
y<-data$Y
X1<-data$X1
X2<-data$X2
X3<-data$X3
X4<-data$X4
X5<-data$X5
X6<-data$X6

Regresi Linear Berganda

modelrlb<-lm(y~X1+X2+X3+X4+X5+X6, data=data)
modelrlb

## 
## Call:
## lm(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
## 
## Coefficients:
## (Intercept)           X1           X2           X3           X4           X5  
##   -1.328421     0.252062     0.009613     3.734823     1.916248     0.081909  
##          X6  
##   -0.830771

summary(modelrlb)

## 
## Call:
## lm(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.71680 -0.23868  0.04814  0.27118  0.96430 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.328421   0.582329  -2.281 0.024214 *  
## X1           0.252062   0.063426   3.974 0.000118 ***
## X2           0.009613   0.006557   1.466 0.145091    
## X3           3.734823   0.532860   7.009 1.29e-10 ***
## X4           1.916248   0.467708   4.097 7.44e-05 ***
## X5           0.081909   0.307475   0.266 0.790373    
## X6          -0.830771   0.269129  -3.087 0.002488 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4657 on 126 degrees of freedom
## Multiple R-squared:  0.8062, Adjusted R-squared:  0.797 
## F-statistic: 87.38 on 6 and 126 DF,  p-value: < 2.2e-16

car::vif(modelrlb)

##       X1       X2       X3       X4       X5       X6 
## 3.448872 1.654767 2.498448 1.425609 1.168867 1.401088

Dari pemeriksaan multikolinearitas terbukti nilai VIF tiap peubah penjelas tidak ada yang nilainya \[>10\] sehingga dapat dinyatakan tidak ada multikolinearitas.

CV Ridge

cv.r<-cv.glmnet(x,y,alpha=0);plot(cv.r)

### Regresi Ridge

best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr);coef(bestridge)

## 7 x 1 sparse Matrix of class "dgCMatrix"
##                      s0
## (Intercept) -1.19093187
## X1           0.24922223
## X2           0.01141737
## X3           3.43457896
## X4           1.90803971
## X5           0.08135685
## X6          -0.80080299

summary(bestridge)

##           Length Class     Mode   
## a0        1      -none-    numeric
## beta      6      dgCMatrix S4     
## df        1      -none-    numeric
## dim       2      -none-    numeric
## lambda    1      -none-    numeric
## dev.ratio 1      -none-    numeric
## nulldev   1      -none-    numeric
## npasses   1      -none-    numeric
## jerr      1      -none-    numeric
## offset    1      -none-    logical
## call      5      -none-    call   
## nobs      1      -none-    numeric

Fungsi Rsquare

rsq<-function(bestmodel,bestlambda,x,y){
 #y duga
 y.duga <- predict(bestmodel, s = bestlambda, newx = x)

 #JKG dan JKT
 jkt <- sum((y - mean(y))^2)
 jkg <- sum((y.duga- y)^2)

#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq) 
}

Rsquare dan RSE Ridge

rsq(bestridge,best.lr,x,y)

## [1] 0.8050545

y_pred_ridge <- predict(bestridge, s = 0.01, newx = x)  # Sesuaikan dengan nilai lambda yang sesuai
RSE_ridge <- round(sqrt(sum((y - y_pred_ridge)^2) / (length(y) - ncol(x) - 1)),4)

CV LASSO

cv.l<-cv.glmnet(x,y,alpha=1);plot(cv.l)

### Regresi LASSO

best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)

## 7 x 1 sparse Matrix of class "dgCMatrix"
##                       s0
## (Intercept) -1.093497317
## X1           0.249866554
## X2           0.007886511
## X3           3.635945532
## X4           1.811847022
## X5           .          
## X6          -0.744288580

Rsquare dan RSE LASSO

rsq(bestlasso,best.ll,x,y)

## [1] 0.8045828

y_pred_lasso <- predict(bestlasso, s = 0.01, newx = x)  # Sesuaikan dengan nilai lambda yang sesuai
RSE_lasso <- round(sqrt(sum((y - y_pred_lasso)^2) / (length(y) - ncol(x) - 1)),4)
RSE_lasso

## [1] 0.4677

Regresi Ridge menggunakan fungsi (lmridge)

lmr<-lmridge(y~X1+X2+X3+X4+X5+X6,data=data,scaling="centered");plot(lmr)

summary(lmr)

## 
## Call:
## lmridge.default(formula = y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data, 
##     scaling = "centered")
## 
## 
## Coefficients: for Ridge parameter K= 0 
##           Estimate Estimate (Sc) StdErr (Sc) t-value (Sc) Pr(>|t|)    
## Intercept  -1.3284       -1.3284      0.9511      -1.3968   0.1649    
## X1          0.2521        0.2521      0.0632       3.9898   0.0001 ***
## X2          0.0096        0.0096      0.0065       1.4720   0.1435    
## X3          3.7348        3.7348      0.5308       7.0368   <2e-16 ***
## X4          1.9163        1.9162      0.4659       4.1133   0.0001 ***
## X5          0.0819        0.0819      0.3063       0.2674   0.7896    
## X6         -0.8308       -0.8308      0.2681      -3.0991   0.0024 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Ridge Summary
##         R2     adj-R2   DF ridge          F        AIC        BIC 
##    0.80620    0.79860    6.00004   88.06871 -198.46046  469.29818 
## Ridge minimum MSE= 0.668424 at K= 0 
## P-value for F-test ( 6.00004 , 126.9999 ) = 7.650318e-43 
## -------------------------------------------------------------------

# Membuat data untuk tabel
metode <- c("RLB", "Ridge", "Lasso")
Adj.R2 <- c( 0.797, 0.8050,0.8048)
RSE<- c(0.4657,0.4671,0.4677)
# Membuat data frame
data_tabel <- data.frame(Metode = metode, `Adjusted R-Square` = Adj.R2, RSE=RSE)
data_tabel

##   Metode Adjusted.R.Square    RSE
## 1    RLB            0.7970 0.4657
## 2  Ridge            0.8050 0.4671
## 3  Lasso            0.8048 0.4677

Berdasarkan perbandingan 3 metode diperoleh model terbaik yaitu Ridge Regression dengan Adjusted R-Square terbesar senilai 0.8050 dan RSE terkecil senilai 0.4671.

Interpretasi Model

Model yang terbentuk adalah : \[-1.19093187+0.24922223X_1+0.01141737X_2+3.43457896X_3+1.90803971X_4+0.08135685X_5-0.80080299X_6\]

PDB per kapita (X1),Dukungan sosial(X3),Kebebasan dalam menentukan pilihan hidup(X4) secara signifikan memengaruhi Skor Kebahagiaan (Y). Sedangkan Persepsi korupsi(X6), Harapan hidup sehat(X2), dan Kemurahan hati(X5) teruji tidak signifikan. Namun pada Regresi Ridge peubah yang tidak signifikan tetap dimasukkan dalam model. Koefisien regresi X1,X2,X3,X4,X5 bernilai positif sehingga semakin tinggi nilai peubah-peubah tersebut maka akan meningkatkan skor kebahagiaan. Sedangkan semakin tinggi nilai peubah X6 maka akan menurunkan skor kebahagiaan.