#Memanggil Packages
library(car)
## Loading required package: carData
library(MASS)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(readxl)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#menginput data dari excel
data<- read_excel("C:/Users/Asus/Downloads/tugasindividu.xlsx")
data
## # A tibble: 27 × 5
## daerah y x1 x2 x3
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Kabupaten Bogor 5.2 53.0 48.6 89.8
## 2 Kabupaten Sukabumi 3.3 82.3 72.8 92.2
## 3 Kabupaten Cianjur 2.8 87.2 71.8 98.2
## 4 Kabupaten Bandung 4.2 95.4 66.2 98.2
## 5 Kabupaten Garut 2.5 89.1 74.0 97.7
## 6 Kabupaten Tasikmalaya 4.6 90.5 51.0 85
## 7 Kabupaten Ciamis 2.9 93.8 80.4 99.3
## 8 Kabupaten Kuningan 3.6 77 76.3 107.
## 9 Kabupaten Cirebon 4.8 81.4 70.9 96.7
## 10 Kabupaten Majalengka 3 92.9 80.3 99.2
## # ℹ 17 more rows
#regresi klasik
model<- lm(y~x1+x2+x3, data=data)
modell<-summary(model)
modell
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4864 -0.5304 -0.1503 0.7042 2.7747
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.51932 4.21961 1.545 0.1360
## x1 -0.02633 0.03074 -0.856 0.4006
## x2 -0.04049 0.02278 -1.778 0.0887 .
## x3 0.02441 0.03252 0.751 0.4605
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.364 on 23 degrees of freedom
## Multiple R-squared: 0.1822, Adjusted R-squared: 0.07555
## F-statistic: 1.708 on 3 and 23 DF, p-value: 0.1932
model regresi klasik yang didapatkan yaitu yduga = 6.51932 -0.02633x1 -0.04049x2 + 0.02441x3 dengan R-Square = 0.1822
#r-square
rsqmodel<- 0.1822
#mencari RSE
rsemodel<-modell$sigma
rsemodel
## [1] 1.364007
didapatkan rse model = 1.364007
#Regresi Ridge
x <- data.matrix(data[,c('x1','x2','x3')])
y <- data$y
cv.r<-cv.glmnet(x,y,alpha=0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr)
coef(bestridge)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 6.05678112
## x1 -0.02069593
## x2 -0.02607311
## x3 0.01378371
model regresi ridge yang didapatkan yaitu yduga = 6.41874202 -0.02478486x1 -0.03509349x2 +0.02017414x3
# Fungsi untuk mencari R-Squared
rsq<-function(bestmodel,bestlambda,x,y){
y_duga <- predict(bestmodel, s = bestlambda, newx = x)
JKT <- sum((y - mean(y))^2)
JKG <- sum((y_duga- y)^2)
rsq <- 1 - JKG/JKT
return(rsq)
}
# R2 ridge
R2modelridge <- rsq(bestridge,best.lr,x,y)
R2modelridge
## [1] 0.1620193
didapatkan R-square sebesar 0.1795088
#RSE ridge
trainpredictionsr <- predict(bestridge,newx = x)
# Menghitung residual
residualsr <- y - trainpredictionsr
# Menghitung varian residual
dfr <- length(y) - length(bestridge$beta)
residualvariancer <- sum(residualsr^2) / dfr
# Menghitung RSE
rsemodelridge <- sqrt(residualvariancer)
rsemodelridge
## [1] 1.351677
didapatkan bahwa RSE Model Ridge= 1.337497
#regresi lasso
cv.l<-cv.glmnet(x,y,alpha=1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll)
coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 6.52079370
## x1 -0.02615551
## x2 -0.04026595
## x3 0.02407810
didapatkan model regresi lasso yaitu yduga = 6.52079370 -0.02615551x1 -0.04026595x2 + 0.02407810x3
# R-square lasso
R2modellasso <- rsq(bestlasso,best.ll,x,y)
R2modellasso
## [1] 0.1822088
didapatkan rsquare model lasso yaitu 0.1822088
#RSE Lasso
trainpredictionsLasso <- predict(bestlasso,newx = x)
# Menghitung residual
residualsLasso <- y - trainpredictionsLasso
# Menghitung varian residual
dfLasso <- length(y) - length(bestlasso$beta)
residualvarianceLasso <- sum(residualsLasso^2) / dfLasso
# Menghitung RSE
rsemodelLasso <- sqrt(residualvarianceLasso)
rsemodelLasso
## [1] 1.335295
didapatkan bahwa rse model lasso = 1.335295
#membandingkan semua model regresi
banding <- matrix(c(rsqmodel, R2modelridge, R2modellasso, rsemodel, rsemodelridge, rsemodelLasso),ncol=2,byrow = F)
row.names(banding)<- c("Model Klasik","Model Ridge","Model Lasso")
colnames(banding) <- c("R-Squared","RSE")
banding
## R-Squared RSE
## Model Klasik 0.1822000 1.364007
## Model Ridge 0.1620193 1.351677
## Model Lasso 0.1822088 1.335295
Perbandingan dilihat dari model yang memiliki nilai RSE terendah, yaitu Model Lasso yang memiliki nilai RSE sebesar 1.335295. Oleh karena itu, model terbaiknya adalah Model Lasso. Selain itu, bisa dilihat juga bahwa model klasik memiliki nilai R-Squared yang paling tinggi yaitu 0.1822088.
##model terbaik
coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 6.52079370
## x1 -0.02615551
## x2 -0.04026595
## x3 0.02407810
yduga = 6.52079370 -0.02615551x1 -0.04026595x2 + 0.02407810x3 -> nilai dugaan rata rata prevalensi balita kurang gizi sebesar (6.52079370) jika semua peubah bebas bernilai 0
-> prevalensi balita kurang gizi akan meningkat jika Bayi Baru Lahir Mendapat IMD berkurang, Bayi Diberi Asi Eksklusif berkurang, dan Balita Mendapat Vitamin A bertambah