Kelompok 6

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(mvtnorm)
library(MBESS)
## Warning: package 'MBESS' was built under R version 4.2.3
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.2.3
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3

\[ y = 3 + x_{1}+10x_{2}+0x_{3}-2x_{4}+0x_{5}+\epsilon \] dengan $ $ ~ N(0,2) dan $ x_{j} $ ~ N(0,1) #### Membangkitkan Data

set.seed(1234)
n <- 1000
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)
epsilon <- rnorm(n,mean=0,sd=2)
X <- replicate(5,rnorm(n,mean=0,sd=1))
Xgab <- cbind(1,X)
y <- Xgab%*%betaa+epsilon
datamodel <- data.frame(y,X)

Regresi

regresi <- lm(y~., data=datamodel)
summary(regresi)
## 
## Call:
## lm(formula = y ~ ., data = datamodel)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6562 -1.2780 -0.0338  1.3120  6.3482 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.938611   0.063212  46.488   <2e-16 ***
## X1           1.119032   0.064370  17.384   <2e-16 ***
## X2          10.035844   0.062377 160.890   <2e-16 ***
## X3          -0.003835   0.063512  -0.060    0.952    
## X4          -2.083558   0.064980 -32.065   <2e-16 ***
## X5           0.046863   0.064892   0.722    0.470    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.994 on 994 degrees of freedom
## Multiple R-squared:  0.9642, Adjusted R-squared:  0.9641 
## F-statistic:  5361 on 5 and 994 DF,  p-value: < 2.2e-16
Ridge
ridge_model1 <- glmnet(X, y, alpha = 0,data=datamodel)
  # Cross-validation to find the best lambda
cv_ridge1 <- cv.glmnet(X, y, alpha = 0,data=datamodel)
best_lambda1 <- cv_ridge1$lambda.min
# Build the final model using the best lambda
modelRidge1 <- glmnet(X, y, alpha = 0, lambda = best_lambda1,data=datamodel)

coef(modelRidge1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                      s0
## (Intercept)  2.97391782
## V1           0.98623246
## V2           9.15027341
## V3          -0.01506363
## V4          -1.87815479
## V5           0.05051385

Beta 3 dan Beta 5 tidak signifikan maka berarti bernilai 0

Melakukan Pengulangan Regresi

n <- 1000
ulangan <- 100
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)

koef_ulangan_regresi <- NULL

set.seed(1234)

for(i in seq(ulangan)){
  epsiloni <- rnorm(n,mean=0,sd=2)
  Xi <- replicate(5,rnorm(n,mean=0,sd=1))
  Xgab <- cbind(1,Xi)
  yi <- Xgab%*%betaa+epsiloni
  dataregresii <- data.frame(yi,Xi)
  colnames(dataregresii) <-c("Y","X1","X2","X3","X4","X5")
  modelRegresi <- lm(Y~.,data=dataregresii)
  koef_ulangan_regresi <- rbind(koef_ulangan_regresi,coef(modelRegresi))
}


head(koef_ulangan_regresi)
##      (Intercept)        X1        X2            X3        X4          X5
## [1,]    2.938611 1.1190325 10.035844 -0.0038349925 -2.083558  0.04686346
## [2,]    2.993374 0.9249880  9.974123 -0.0058226133 -2.159328  0.01133593
## [3,]    3.000601 0.9188709  9.966301  0.1330709367 -2.005644 -0.04573861
## [4,]    2.980008 0.9439232 10.055721 -0.0146016785 -1.917212  0.06669851
## [5,]    3.001763 1.0083136  9.997743 -0.0473483804 -1.944890 -0.01738733
## [6,]    2.963483 0.9806898 10.078451  0.0005788618 -1.929531 -0.04193313
Melakukan Pengulangan Ridge
n <- 1000
ulangan <- 100
betaa <- matrix(c(3,1,10,0,-2,0),nrow = 6,ncol = 1)

koef_ulangan_ridge <- NULL

set.seed(1234)

for(i in seq(ulangan)){
  epsilon <- rnorm(n,mean=0,sd=2)
  X <- replicate(5,rnorm(n,mean=0,sd=1))
  Xgab <- cbind(1,X)
  y <- Xgab%*%betaa+epsilon
  ridge_model <- glmnet(X, y, alpha = 0)
  # Cross-validation to find the best lambda
  cv_ridge <- cv.glmnet(X, y, alpha = 0)
  best_lambda <- cv_ridge$lambda.min
  # Build the final model using the best lambda
  modelRidge <- glmnet(X, y, alpha = 0, lambda = best_lambda)
  coef_ridge <- as.matrix(coef(modelRidge))
  coef_df <- as.data.frame(t(coef_ridge))
  koef_ulangan_ridge <- rbind(koef_ulangan_ridge,coef_df)
}

head(koef_ulangan_ridge)
##     (Intercept)        V1       V2           V3        V4           V5
## s0     2.973918 0.9862325 9.150273 -0.015063629 -1.878155  0.050513848
## s01    3.046271 0.7661604 9.149582  0.055031751 -1.753801 -0.037158051
## s02    3.028510 0.8496726 9.046492  0.028898697 -1.803013  0.091560139
## s03    2.934823 0.9481672 9.231119 -0.012795932 -1.758744  0.128228378
## s04    2.918852 0.9130762 9.137625 -0.034434610 -1.836582 -0.004257273
## s05    2.974383 0.9968178 9.151185  0.007138518 -1.789862  0.036559747
Bias Regresi
beta <- as.data.frame(t(betaa))
colnames(beta) <- c("B0","B1","B2","B3","B4","B5")
biasRegresi <- colMeans(koef_ulangan_regresi)-beta
biasRegresi
##             B0           B1           B2           B3           B4          B5
## 1 0.0005749869 -0.002011822 -0.002314742 -0.004011255 -0.006237716 0.005669872
Relative Bias Regresi
rel_bias_regresi <- biasRegresi/beta 
rel_bias_regresi
##             B0           B1            B2   B3          B4  B5
## 1 0.0001916623 -0.002011822 -0.0002314742 -Inf 0.003118858 Inf
Konsisten Regresi
sdRegresi <- apply(koef_ulangan_regresi,2,sd)
Konsisten <- sdRegresi^2
Konsisten <- as.data.frame(t(as.matrix(Konsisten)))
Konsisten
##   (Intercept)          X1          X2          X3          X4          X5
## 1 0.004591028 0.004287043 0.003912072 0.003690721 0.005386647 0.005322408
Bias Ridge
beta <- as.data.frame(t(betaa))
colnames(beta) <- c("B0","B1","B2","B3","B4","B5")
biasRidge <- colMeans(koef_ulangan_ridge)-beta
biasRidge
##             B0          B1         B2          B3       B4          B5
## 1 -0.002984019 -0.08744651 -0.8791029 -0.00288925 0.173209 0.007289323
Relative Bias Regresi
rel_bias_ridge <- biasRidge/beta 
rel_bias_ridge
##              B0          B1          B2   B3         B4  B5
## 1 -0.0009946731 -0.08744651 -0.08791029 -Inf -0.0866045 Inf
Konsisten Ridge
sdRidge <- apply(koef_ulangan_ridge,2,sd)
Konsistenridge <- sdRidge^2
Konsistenridge <- as.data.frame(t(as.matrix(Konsistenridge)))
Konsistenridge
##   (Intercept)          V1          V2          V3          V4          V5
## 1 0.005320761 0.004682268 0.003640972 0.005058895 0.003130487 0.003582612
analisis1 <- cbind(t(biasRegresi),t(rel_bias_regresi),t(Konsisten))
colnames(analisis1) <- c("Bias","Relative Bias","Konsisten")
analisis1
##             Bias Relative Bias   Konsisten
## B0  0.0005749869  0.0001916623 0.004591028
## B1 -0.0020118216 -0.0020118216 0.004287043
## B2 -0.0023147420 -0.0002314742 0.003912072
## B3 -0.0040112547          -Inf 0.003690721
## B4 -0.0062377161  0.0031188581 0.005386647
## B5  0.0056698723           Inf 0.005322408
analisis2 <- cbind(t(biasRidge),t(rel_bias_ridge),t(Konsistenridge))
colnames(analisis1) <- c("Bias","Relative Bias","Konsisten")
analisis2
##            [,1]          [,2]        [,3]
## B0 -0.002984019 -0.0009946731 0.005320761
## B1 -0.087446506 -0.0874465061 0.004682268
## B2 -0.879102908 -0.0879102908 0.003640972
## B3 -0.002889250          -Inf 0.005058895
## B4  0.173208998 -0.0866044991 0.003130487
## B5  0.007289323           Inf 0.003582612
analisis <- as.data.frame(rbind(analisis1,analisis2))

analisis
##               Bias Relative Bias   Konsisten
## B0    0.0005749869  0.0001916623 0.004591028
## B1   -0.0020118216 -0.0020118216 0.004287043
## B2   -0.0023147420 -0.0002314742 0.003912072
## B3   -0.0040112547          -Inf 0.003690721
## B4   -0.0062377161  0.0031188581 0.005386647
## B5    0.0056698723           Inf 0.005322408
## B0.1 -0.0029840192 -0.0009946731 0.005320761
## B1.1 -0.0874465061 -0.0874465061 0.004682268
## B2.1 -0.8791029080 -0.0879102908 0.003640972
## B3.1 -0.0028892501          -Inf 0.005058895
## B4.1  0.1732089981 -0.0866044991 0.003130487
## B5.1  0.0072893232           Inf 0.003582612

Interpretasi Hasil dan Menjawab Pertanyaan

A. Apakah OLS dan Ridge tak Bias?

Jawabannya : Tidak, karena metode OLS tidak ada penduga yang bias tetapi untuk metode ridge memiliki penduga yang bias pada beta 2 dan beta 4 dikarenakan nilainya cukup besar tidak mendekati 0.

B. Apakah OLS Lebih Efisen dari Ridge?

Jawabannya : Iya, karena nilai dugaan dari OLS lebih mendekati dengan parameter aktual dibandingkan dengan metode Ridge

C. Apakah OLS dan Ridge Konsisten ?

Jawabannya : Melihat dari nilai kosisten masing masing metode bernilai kecil dan masing masing memiliki nilai yang hampir mirip maka dari situ OLS dan Ridge merupakan penduga yang konsisten

D. Berdasarkan a-c manakah penduga yang lebih baik? Mengapa demikian?

Jawabannya : Setelah melakukan prosedur yang menjawab A-C, metode yang lebih baik untuk soal ini merupakan metode OLS sebab memiliki relative bias yang lebih kecil dibandingkan Ridge, dan OLS lebih tak bias dibandingkan dengan Ridge.