library(readxl)
data <- read_excel("C:/Users/ASUS/Downloads/insurance.xlsx", sheet = "ini")
data
str(data)
tibble [150 × 5] (S3: tbl_df/tbl/data.frame)
$ Hours Studied : num [1:150] 7 4 8 5 7 3 7 8 5 4 ...
$ Previous Scores : num [1:150] 99 82 51 52 75 78 73 45 77 89 ...
$ Sleep Hours : num [1:150] 9 4 7 5 8 9 5 4 8 4 ...
$ Sample Question Papers Practiced: num [1:150] 1 2 2 2 5 6 6 6 2 1 ...
$ Index : num [1:150] 9.1 6.5 4.5 3.6 6.6 6.1 6.3 4.2 6.1 6.9 ...
summary(model_lm)
Call:
lm(formula = data$Index ~ data$`Hours Studied` + data$`Previous Scores` +
data$`Sleep Hours` + data$`Sample Question Papers Practiced`,
data = data)
Residuals:
Min 1Q Median 3Q Max
-0.48396 -0.12654 0.01592 0.14003 0.38801
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.4876985 0.0990613 -35.207 < 2e-16 ***
data$`Hours Studied` 0.2942557 0.0059664 49.319 < 2e-16 ***
data$`Previous Scores` 0.1019539 0.0009303 109.593 < 2e-16 ***
data$`Sleep Hours` 0.0477104 0.0086621 5.508 1.61e-07 ***
data$`Sample Question Papers Practiced` 0.0278128 0.0058683 4.739 5.06e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.191 on 145 degrees of freedom
Multiple R-squared: 0.9907, Adjusted R-squared: 0.9904
F-statistic: 3858 on 4 and 145 DF, p-value: < 2.2e-16
\[R^2 = 0.9907 \] \[ \hat{Y}= -3.4876985 + 0.2942557X_1 + 0.1019539X_2 + 0.0477104X_3 + 0.0278128X_4 \]
library(glmnet)
x <- matrix(c(data$`Hours Studied`, data$`Previous Scores`, data$`Sleep Hours`, data$`Sample Question Papers Practiced`), ncol = 4)
y <- (data$Index)
cv.r<-cv.glmnet(x,y,alpha=0);plot(cv.r)
best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr);coef(bestridge)
5 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) -2.71314293
V1 0.27337114
V2 0.09361693
V3 0.03812178
V4 0.02479587
# Fungsi R-Square
rsq<-function(bestmodel,bestlambda,x,y){
#y duga
y.duga <- predict(bestmodel, s = bestlambda, newx = x)
#JKG dan JKT
jkt <- sum((y - mean(y))^2)
jkg <- sum((y.duga- y)^2)
#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq)
}
#R-Square Ridge
rsq(bestridge,best.lr,x,y)
[1] 0.9843622
\[ R^2 = 0.9844 \] \[ \hat{Y}= -2.71314293 + 0.27337114X_1 + 0.09361693X_2 + 0.03812178X_3 + 0.02479587X_4 \]
cv.l<-cv.glmnet(x,y,alpha=1);plot(cv.l)
best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll);coef(bestlasso)
5 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) -3.4132745
V1 0.2919490
V2 0.1015992
V3 0.0436682
V4 0.0252132
#R-Square Lasso
rsq(bestlasso,best.ll,x,y)
[1] 0.9906484
\[ R^2 = 0.9906 \]
\[ \hat{Y}= -3.4132745 + 0.2919490X_1 + 0.1015992X_2 + 0.0436682X_3 + 0.0252132X_4 \]
rsqklasik <- 0.9907
rsqridge <- rsq(bestridge,best.lr,x,y)
rsqlasso <- rsq(bestlasso,best.ll,x,y)
rsqgab <- data.frame(c(rsqklasik,rsqridge,rsqlasso))
Terlihat dari perbandingan diatas bahwa R square terbesar diperoleh pada model klasik. Maka model terbaiknya adalah \[ \hat{Y}= -3.4876985 + 0.2942557X_1 + 0.1019539X_2 + 0.0477104X_3 + 0.0278128X_4 \].