library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(mvtnorm)
library(MBESS)
## Warning: package 'MBESS' was built under R version 4.2.3
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.2.3
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
\[ y = 3 + x_{1}+10x_{2}+0x_{3}-2x_{4}+0x_{5}+\epsilon \] dengan $ $ ~ N(0,2) dan $ x_{j} $ ~ N(0,1) #### Membangkitkan Data
set.seed(1234)
n <- 1000
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)
epsilon <- rnorm(n,mean=0,sd=2)
X <- replicate(5,rnorm(n,mean=0,sd=1))
Xgab <- cbind(1,X)
y <- Xgab%*%betaa+epsilon
datamodel <- data.frame(y,X)
regresi <- lm(y~., data=datamodel)
summary(regresi)
##
## Call:
## lm(formula = y ~ ., data = datamodel)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6562 -1.2780 -0.0338 1.3120 6.3482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.938611 0.063212 46.488 <2e-16 ***
## X1 1.119032 0.064370 17.384 <2e-16 ***
## X2 10.035844 0.062377 160.890 <2e-16 ***
## X3 -0.003835 0.063512 -0.060 0.952
## X4 -2.083558 0.064980 -32.065 <2e-16 ***
## X5 0.046863 0.064892 0.722 0.470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.994 on 994 degrees of freedom
## Multiple R-squared: 0.9642, Adjusted R-squared: 0.9641
## F-statistic: 5361 on 5 and 994 DF, p-value: < 2.2e-16
ridge_model1 <- glmnet(X, y, alpha = 0,data=datamodel)
# Cross-validation to find the best lambda
cv_ridge1 <- cv.glmnet(X, y, alpha = 0,data=datamodel)
best_lambda1 <- cv_ridge1$lambda.min
# Build the final model using the best lambda
modelRidge1 <- glmnet(X, y, alpha = 0, lambda = best_lambda1,data=datamodel)
coef(modelRidge1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 2.97391782
## V1 0.98623246
## V2 9.15027341
## V3 -0.01506363
## V4 -1.87815479
## V5 0.05051385
Beta 3 dan Beta 5 tidak signifikan maka berarti bernilai 0
n <- 1000
ulangan <- 100
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)
koef_ulangan_regresi <- NULL
set.seed(1234)
for(i in seq(ulangan)){
epsiloni <- rnorm(n,mean=0,sd=2)
Xi <- replicate(5,rnorm(n,mean=0,sd=1))
Xgab <- cbind(1,Xi)
yi <- Xgab%*%betaa+epsiloni
dataregresii <- data.frame(yi,Xi)
colnames(dataregresii) <-c("Y","X1","X2","X3","X4","X5")
modelRegresi <- lm(Y~.,data=dataregresii)
koef_ulangan_regresi <- rbind(koef_ulangan_regresi,coef(modelRegresi))
}
head(koef_ulangan_regresi)
## (Intercept) X1 X2 X3 X4 X5
## [1,] 2.938611 1.1190325 10.035844 -0.0038349925 -2.083558 0.04686346
## [2,] 2.993374 0.9249880 9.974123 -0.0058226133 -2.159328 0.01133593
## [3,] 3.000601 0.9188709 9.966301 0.1330709367 -2.005644 -0.04573861
## [4,] 2.980008 0.9439232 10.055721 -0.0146016785 -1.917212 0.06669851
## [5,] 3.001763 1.0083136 9.997743 -0.0473483804 -1.944890 -0.01738733
## [6,] 2.963483 0.9806898 10.078451 0.0005788618 -1.929531 -0.04193313
n <- 1000
ulangan <- 100
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)
koef_ulangan_ridge <- NULL
set.seed(1234)
for(i in seq(ulangan)){
epsilon <- rnorm(n,mean=0,sd=2)
X <- replicate(5,rnorm(n,mean=0,sd=1))
Xgab <- cbind(1,X)
y <- Xgab%*%betaa+epsilon
ridge_model <- glmnet(X, y, alpha = 0)
# Cross-validation to find the best lambda
cv_ridge <- cv.glmnet(X, y, alpha = 0)
best_lambda <- cv_ridge$lambda.min
# Build the final model using the best lambda
modelRidge <- glmnet(X, y, alpha = 0, lambda = best_lambda)
coef_ridge <- as.matrix(coef(modelRidge))
coef_df <- as.data.frame(t(coef_ridge))
koef_ulangan_ridge <- rbind(koef_ulangan_ridge,coef_df)
}
head(koef_ulangan_ridge)
## (Intercept) V1 V2 V3 V4 V5
## s0 2.973918 0.9862325 9.150273 -0.015063629 -1.878155 0.050513848
## s01 3.046271 0.7661604 9.149582 0.055031751 -1.753801 -0.037158051
## s02 3.028510 0.8496726 9.046492 0.028898697 -1.803013 0.091560139
## s03 2.934823 0.9481672 9.231119 -0.012795932 -1.758744 0.128228378
## s04 2.918852 0.9130762 9.137625 -0.034434610 -1.836582 -0.004257273
## s05 2.974383 0.9968178 9.151185 0.007138518 -1.789862 0.036559747
beta <- as.data.frame(t(betaa))
colnames(beta) <- c("B0","B1","B2","B3","B4","B5")
biasRegresi <- colMeans(koef_ulangan_regresi)-beta
biasRegresi
## B0 B1 B2 B3 B4 B5
## 1 0.0005749869 -0.002011822 -0.002314742 -0.004011255 -0.006237716 0.005669872
rel_bias_regresi <- biasRegresi/beta
rel_bias_regresi
## B0 B1 B2 B3 B4 B5
## 1 0.0001916623 -0.002011822 -0.0002314742 -Inf 0.003118858 Inf
sdRegresi <- apply(koef_ulangan_regresi,2,sd)
Konsisten <- sdRegresi^2
Konsisten <- as.data.frame(t(as.matrix(Konsisten)))
Konsisten
## (Intercept) X1 X2 X3 X4 X5
## 1 0.004591028 0.004287043 0.003912072 0.003690721 0.005386647 0.005322408
beta <- as.data.frame(t(betaa))
colnames(beta) <- c("B0","B1","B2","B3","B4","B5")
biasRidge <- colMeans(koef_ulangan_ridge)-beta
biasRidge
## B0 B1 B2 B3 B4 B5
## 1 -0.002984019 -0.08744651 -0.8791029 -0.00288925 0.173209 0.007289323
rel_bias_ridge <- biasRidge/beta
rel_bias_ridge
## B0 B1 B2 B3 B4 B5
## 1 -0.0009946731 -0.08744651 -0.08791029 -Inf -0.0866045 Inf
sdRidge <- apply(koef_ulangan_ridge,2,sd)
Konsistenridge <- sdRidge^2
Konsistenridge <- as.data.frame(t(as.matrix(Konsistenridge)))
Konsistenridge
## (Intercept) V1 V2 V3 V4 V5
## 1 0.005320761 0.004682268 0.003640972 0.005058895 0.003130487 0.003582612
analisis1 <- cbind(t(biasRegresi),t(rel_bias_regresi),t(Konsisten))
colnames(analisis1) <- c("Bias","Relative Bias","Konsisten")
analisis1
## Bias Relative Bias Konsisten
## B0 0.0005749869 0.0001916623 0.004591028
## B1 -0.0020118216 -0.0020118216 0.004287043
## B2 -0.0023147420 -0.0002314742 0.003912072
## B3 -0.0040112547 -Inf 0.003690721
## B4 -0.0062377161 0.0031188581 0.005386647
## B5 0.0056698723 Inf 0.005322408
analisis2 <- cbind(t(biasRidge),t(rel_bias_ridge),t(Konsistenridge))
colnames(analisis1) <- c("Bias","Relative Bias","Konsisten")
analisis2
## [,1] [,2] [,3]
## B0 -0.002984019 -0.0009946731 0.005320761
## B1 -0.087446506 -0.0874465061 0.004682268
## B2 -0.879102908 -0.0879102908 0.003640972
## B3 -0.002889250 -Inf 0.005058895
## B4 0.173208998 -0.0866044991 0.003130487
## B5 0.007289323 Inf 0.003582612
analisis <- as.data.frame(rbind(analisis1,analisis2))
analisis
## Bias Relative Bias Konsisten
## B0 0.0005749869 0.0001916623 0.004591028
## B1 -0.0020118216 -0.0020118216 0.004287043
## B2 -0.0023147420 -0.0002314742 0.003912072
## B3 -0.0040112547 -Inf 0.003690721
## B4 -0.0062377161 0.0031188581 0.005386647
## B5 0.0056698723 Inf 0.005322408
## B0.1 -0.0029840192 -0.0009946731 0.005320761
## B1.1 -0.0874465061 -0.0874465061 0.004682268
## B2.1 -0.8791029080 -0.0879102908 0.003640972
## B3.1 -0.0028892501 -Inf 0.005058895
## B4.1 0.1732089981 -0.0866044991 0.003130487
## B5.1 0.0072893232 Inf 0.003582612
A. Apakah OLS dan Ridge tak Bias?
Jawabannya : Tidak, karena metode OLS tidak ada penduga yang bias tetapi untuk metode ridge memiliki penduga yang bias pada beta 2 dan beta 4 dikarenakan nilainya cukup besar tidak mendekati 0.
B. Apakah OLS Lebih Efisen dari Ridge?
Jawabannya : Iya, karena nilai dugaan dari OLS lebih mendekati dengan parameter aktual dibandingkan dengan metode Ridge
C. Apakah OLS dan Ridge Konsisten ?
Jawabannya : Melihat dari nilai kosisten masing masing metode bernilai kecil dan masing masing memiliki nilai yang hampir mirip maka dari situ OLS dan Ridge merupakan penduga yang konsisten
D. Berdasarkan a-c manakah penduga yang lebih baik? Mengapa demikian?
Jawabannya : Setelah melakukan prosedur yang menjawab A-C, metode yang lebih baik untuk soal ini merupakan metode OLS sebab memiliki relative bias yang lebih kecil dibandingkan Ridge, dan OLS lebih tak bias dibandingkan dengan Ridge.