Kelompok 6

Packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-8

Definisi awal

set.seed(123)
n_obs <- c(15,30,60,100,10000) #number of observations
beta <- matrix(c(3,1,10,0,-2,0), nrow=6,ncol=1)

Penduga OLS

simulate_ols <- function(n, n_sim=1000) {
  set.seed(123)  # Set seed untuk reproduktibilitas
  
  # Inisialisasi matriks untuk menyimpan hasil
  ols_results <- matrix(NA, nrow=n_sim,ncol=6)
  colnames(ols_results) <- c("Intercept", "Beta1", "Beta2", "Beta3", "Beta4", "Beta5")
  
  for(i in 1:n_sim){
    X1 <- rnorm(n,mean=0,sd=1)
    X2 <- rnorm(n,mean=0,sd=1)
    X3 <- rnorm(n,mean=0,sd=1)
    X4 <- rnorm(n,mean=0,sd=1)
    X5 <- rnorm(n,mean=0,sd=1)
    epsilon <- rnorm(n,mean=0,sd=2)
    Y <- beta[1]+beta[2]*X1+beta[3]*X2+beta[4]*X3+beta[5]*X4+beta[6]*X5+epsilon
    
    # Menyesuaikan model regresi linear
    model <- lm(Y~X1+X2+X3+X4+X5)
    
    # Simpan koefisien OLS
    ols_results[i, ] <- coef(model)
  }
  
  return(ols_results)
}
  
  
#Ols untuk setiap jumlah observasi berbeda
ols_results_list <- lapply(n_obs, simulate_ols)

# Menampilkan hasil
names(ols_results_list) <- n_obs

# Menampilkan ringkasan hasil simulasi untuk tiap observasi
n15 <- colMeans(ols_results_list$'15')
n30 <-colMeans(ols_results_list$'30')
n60 <- colMeans(ols_results_list$'60')
n100 <- colMeans(ols_results_list$'100')
n10000 <- colMeans(ols_results_list$'10000')
koefisien_ols <- cbind.data.frame(n15,n30,n60,n100,n10000)
koefisien_ols
##                   n15          n30          n60         n100        n10000
## Intercept  2.96072770  3.005288316  3.008401004  3.015302917  3.0002401249
## Beta1      0.98529883  1.004065690  0.995033477  0.995458484  0.9992025541
## Beta2     10.01181736 10.003472294 10.006818237  9.993575566 10.0000553886
## Beta3     -0.02977120 -0.011153392 -0.009881428  0.002007609  0.0009088149
## Beta4     -1.98044227 -1.989553925 -1.990882066 -1.994487017 -2.0006538343
## Beta5     -0.01447058 -0.003297621  0.021989488  0.001392291  0.0002044333

Penduga Ridge

simulate_ridge <- function(n, n_sim=1000) {
  set.seed(123)  # Set seed untuk reproduktibilitas
  
  # Inisialisasi matriks untuk menyimpan hasil
  ridge_results <- matrix(NA, nrow=n_sim,ncol=6)
  colnames(ridge_results) <- c("Intercept", "Beta1", "Beta2", "Beta3", "Beta4", "Beta5")
  
  for(i in 1:n_sim){
    X1 <- rnorm(n,mean=0,sd=1)
    X2 <- rnorm(n,mean=0,sd=1)
    X3 <- rnorm(n,mean=0,sd=1)
    X4 <- rnorm(n,mean=0,sd=1)
    X5 <- rnorm(n,mean=0,sd=1)
    epsilon <- rnorm(n,mean=0,sd=2)
    Y <- beta[1]+beta[2]*X1+beta[3]*X2+beta[4]*X3+beta[5]*X4+beta[6]*X5+epsilon
    
    #Menyusun matriks X
    X <- cbind(X1,X2,X3,X4,X5)
    
    # Menyesuaikan model Ridge Regression dengan CV
    cv_model <- cv.glmnet(X, Y, alpha = 0, grouped = F)
    
    # Menggunakan lambda yang optimal dari CV
    best_lambda <- cv_model$lambda.min
    
    # Menyimpan koefisien Ridge untuk lambda optimal
    coef_ridge <- as.vector(coef(cv_model, s = best_lambda))
    ridge_results[i, ] <- coef_ridge
  }
  
  return(ridge_results)
}
  
  
#ridge untuk setiap jumlah observasi berbeda
ridge_results_list <- lapply(n_obs, simulate_ridge)

# Menampilkan hasil
names(ridge_results_list) <- n_obs

# Menampilkan ringkasan hasil simulasi untuk tiap observasi
n15 <- colMeans(ridge_results_list$'15')
n30 <-colMeans(ridge_results_list$'30')
n60 <- colMeans(ridge_results_list$'60')
n100 <- colMeans(ridge_results_list$'100')
n10000 <- colMeans(ridge_results_list$'10000')
koefisien_ridge <- cbind.data.frame(n15,n30,n60,n100,n10000)
koefisien_ridge
##                    n15        n30          n60         n100        n10000
## Intercept  3.050873957  3.0101885  2.990014771  2.998192669  3.0003106179
## Beta1      0.861482004  0.8967782  0.917133885  0.903452817  0.9133708966
## Beta2      8.826918287  9.0142686  9.076641634  9.093291258  9.1268737586
## Beta3      0.011047060  0.0279180  0.011489023 -0.001956583 -0.0008619910
## Beta4     -1.777115872 -1.7969787 -1.814568506 -1.814887999 -1.8257423180
## Beta5      0.003367171 -0.0169336  0.002754825  0.004876253  0.0008040057

Ukuran performa

penduga <- c(rep("ols",6),rep("ridge",6))

#Bias
bias_ols <- koefisien_ols-beta
bias_ridge <- koefisien_ridge-beta
performa_bias <- rbind.data.frame(bias_ols,bias_ridge)
performa_bias <- data.frame(penduga,performa_bias)
performa_bias
##            penduga          n15          n30          n60         n100
## Intercept      ols -0.039272301  0.005288316  0.008401004  0.015302917
## Beta1          ols -0.014701169  0.004065690 -0.004966523 -0.004541516
## Beta2          ols  0.011817364  0.003472294  0.006818237 -0.006424434
## Beta3          ols -0.029771203 -0.011153392 -0.009881428  0.002007609
## Beta4          ols  0.019557731  0.010446075  0.009117934  0.005512983
## Beta5          ols -0.014470577 -0.003297621  0.021989488  0.001392291
## Intercept1   ridge  0.050873957  0.010188541 -0.009985229 -0.001807331
## Beta11       ridge -0.138517996 -0.103221847 -0.082866115 -0.096547183
## Beta21       ridge -1.173081713 -0.985731355 -0.923358366 -0.906708742
## Beta31       ridge  0.011047060  0.027918003  0.011489023 -0.001956583
## Beta41       ridge  0.222884128  0.203021349  0.185431494  0.185112001
## Beta51       ridge  0.003367171 -0.016933600  0.002754825  0.004876253
##                   n10000
## Intercept   0.0002401249
## Beta1      -0.0007974459
## Beta2       0.0000553886
## Beta3       0.0009088149
## Beta4      -0.0006538343
## Beta5       0.0002044333
## Intercept1  0.0003106179
## Beta11     -0.0866291034
## Beta21     -0.8731262414
## Beta31     -0.0008619910
## Beta41      0.1742576820
## Beta51      0.0008040057
#Relative Bias
rel_bias_ols <- bias_ols/beta
rel_bias_ridge <- bias_ridge/beta
performa_rel_bias <- rbind.data.frame(rel_bias_ols,rel_bias_ridge)
performa_rel_bias <- data.frame(penduga,performa_rel_bias)
performa_rel_bias
##            penduga          n15           n30           n60          n100
## Intercept      ols -0.013090767  0.0017627720  0.0028003347  0.0051009722
## Beta1          ols -0.014701169  0.0040656897 -0.0049665233 -0.0045415156
## Beta2          ols  0.001181736  0.0003472294  0.0006818237 -0.0006424434
## Beta3          ols         -Inf          -Inf          -Inf           Inf
## Beta4          ols -0.009778866 -0.0052230373 -0.0045589669 -0.0027564914
## Beta5          ols         -Inf          -Inf           Inf           Inf
## Intercept1   ridge  0.016957986  0.0033961802 -0.0033284097 -0.0006024438
## Beta11       ridge -0.138517996 -0.1032218470 -0.0828661154 -0.0965471826
## Beta21       ridge -0.117308171 -0.0985731355 -0.0923358366 -0.0906708742
## Beta31       ridge          Inf           Inf           Inf          -Inf
## Beta41       ridge -0.111442064 -0.1015106746 -0.0927157472 -0.0925560007
## Beta51       ridge          Inf          -Inf           Inf           Inf
##                   n10000
## Intercept   8.004164e-05
## Beta1      -7.974459e-04
## Beta2       5.538860e-06
## Beta3                Inf
## Beta4       3.269171e-04
## Beta5                Inf
## Intercept1  1.035393e-04
## Beta11     -8.662910e-02
## Beta21     -8.731262e-02
## Beta31              -Inf
## Beta41     -8.712884e-02
## Beta51               Inf
#Empirical Varians
n15 <- (apply(ols_results_list$'15',2,sd))^2
n30 <- (apply(ols_results_list$'30',2,sd))^2
n60 <- (apply(ols_results_list$'60',2,sd))^2
n100 <- (apply(ols_results_list$'100',2,sd))^2
n10000 <- (apply(ols_results_list$'10000',2,sd))^2
emp_var_ols <- data.frame(n15,n30,n60,n100,n10000)

n15 <- (apply(ridge_results_list$'15',2,sd))^2
n30 <- (apply(ridge_results_list$'30',2,sd))^2
n60 <- (apply(ridge_results_list$'60',2,sd))^2
n100 <- (apply(ridge_results_list$'100',2,sd))^2
n10000 <- (apply(ridge_results_list$'10000',2,sd))^2
emp_var_ridge <- data.frame(n15,n30,n60,n100,n10000)

performa_emp_var <- rbind.data.frame(emp_var_ols,emp_var_ridge)
performa_emp_var <- data.frame(penduga,performa_emp_var)
performa_emp_var
##            penduga       n15       n30        n60       n100       n10000
## Intercept      ols 0.4229590 0.1656609 0.06909034 0.04388779 0.0004225674
## Beta1          ols 0.5460114 0.1707893 0.07544747 0.04597913 0.0003897106
## Beta2          ols 0.4534137 0.1700935 0.07499676 0.04541360 0.0004081909
## Beta3          ols 0.5039179 0.1949367 0.07068743 0.03976492 0.0003660073
## Beta4          ols 0.4678521 0.1779954 0.07480723 0.04491478 0.0004056293
## Beta5          ols 0.4683790 0.1816330 0.08013757 0.04674696 0.0003915692
## Intercept1   ridge 0.4829744 0.1867271 0.08177916 0.05246098 0.0004901708
## Beta11       ridge 0.4770640 0.1853582 0.07972037 0.04123871 0.0004218655
## Beta21       ridge 0.4178909 0.1502685 0.05959936 0.03379853 0.0003222192
## Beta31       ridge 0.4956147 0.1678557 0.07408385 0.04709911 0.0003713489
## Beta41       ridge 0.5518465 0.1714189 0.07760871 0.04294191 0.0004178630
## Beta51       ridge 0.4662750 0.1865809 0.07363998 0.04473904 0.0004231594

Interpretasi Hasil

  1. Apakah OLS dan Ridge tak Bias?

    Jawabannya : Dikatakan tak bias jika nilai bias mendekati 0. - beta 0 : penduga OLS dan Ridge tak bias - beta 1 : penduga OLS tak bias, Ridge tak bias asimptotik - beta 2 : penduga OLS tak bias, sedankan Ridge bias - beta 3 : penduga OLS dan Ridge tak bias - beta 4 : penduga OLS tak bias, sedankan Ridge bias - beta 5 : penduga OLS dan Ridge tak bias

  2. Apakah OLS Lebih Efisen dari Ridge?

    Jawabannya : Tidak. Penduga OLS dan ridge memiliki nilai emp_var yang hampir sama di setiap estimand, yaitu semakin mendekati 0 atau efisien ketika jumlah observasi semakin besar.

  3. Apakah OLS dan Ridge Konsisten ?

    Jawabannya : Iya. Nilai bias dan emp_var pada ols dan ridge semakin mendekati 0 seiring bertambahnya jumlah observasi.

  4. Berdasarkan a-c manakah penduga yang lebih baik? Mengapa demikian?

    Jawabannya : Metode yang lebih baik untuk soal ini yaitu metode OLS karena memiliki relative bias yang lebih kecil dibandingkan Ridge, dan OLS lebih tak bias dibandingkan dengan Ridge.