Tugas 2 Analisis MSR

Kelompok 2

Doni Oktavianto (G1401211068)

Ubaidillah Al Hakim (G1401211086)

Hanifa Rahmacindia Nasution (G1401211094)

Jonathan Hizkia Burju Simanjuntak (G1401211104)

Megawati Roito Panjaitan (G1401211106)

Import Library

library(caret)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-7

Set Seed

# Set seed untuk reproduksibilitas
set.seed(123)

Jumlah observasi yang dibangun

# Jumlah observasi yang diuji
n_obs_list <- c(5, 15, 30, 60, 100, 1000)

Looping Generate Data

# Loop untuk menghasilkan data dengan jumlah observasi yang berbeda
for (n_obs in n_obs_list) {
  # Generate data
  x1 <- rnorm(n_obs, mean = 0, sd = 1)
  x2 <- rnorm(n_obs, mean = 0, sd = 1)
  x3 <- rnorm(n_obs, mean = 0, sd = 1)
  x4 <- rnorm(n_obs, mean = 0, sd = 1)
  x5 <- rnorm(n_obs, mean = 0, sd = 1)
  eror <- rnorm(n_obs, mean = 0, sd = 2)
}

Skenario 1: Memperhitungkan X4 kedalam model

Membangun Fungsi Model

# Inisialisasi list untuk menyimpan hasil OLS dan Lasso
ols_res <- list()
lasso_res <- list()

# Loop untuk menghasilkan data dengan jumlah observasi yang berbeda
for (n_obs in n_obs_list) {
  y <- 10 + 3*x1 + 5*x2 + 7*x3 + 0*x4  - x5 + eror
  databangkitan <- data.frame(x1 = x1, x2 = x2, x3 = x3, x4 = x4, x5 = x5, y = y)
  
  # Matriks prediktor dan vektor respons
  x <- as.matrix(databangkitan[, c("x1", "x2", "x3", "x4" ,"x5")])
  y <- databangkitan$y
  
  # Estimasi OLS
  ols_model <- lm(y ~ x1 + x2 + x3 + x4+ x5, data = databangkitan)
  
  # Estimasi Lasso
  lasso_model <- cv.glmnet(x, y, alpha = 1)
  best_lambda <- lasso_model$lambda.min
  final_lasso_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
  lasso_coefficients <- coef(final_lasso_model)

  # Simpan hasil OLS dan Lasso
  ols_res[[as.character(n_obs)]] <- coef(ols_model)
  lasso_res[[as.character(n_obs)]] <- as.vector(lasso_coefficients)
}
# Koefisien sebenarnya
true_coefficients <- c(Intercept = 10, x1 = 3, x2 = 5, x3 = 7, x4 = 0, x5 = -1)

# Cetak hasil per n_obs

relative_bias <- function(estimate, parameter){
  (mean(estimate) - parameter) / parameter
}  

empirical_variance <- function(estimate) {
  var(estimate)
}

for (n_obs in n_obs_list) {
  ols_coefficients <- ols_res[[as.character(n_obs)]]
  lasso_coefficients <- lasso_res[[as.character(n_obs)]]
  
  ols_bias <- ols_coefficients - true_coefficients
  lasso_coefficients_vector <- as.vector(lasso_coefficients)
  names(lasso_coefficients_vector) <- c("Intercept", "x1", "x2", "x3", "x5")
  lasso_bias <- lasso_coefficients_vector - true_coefficients
  
  ols_rel_bias <- relative_bias(ols_coefficients, true_coefficients)
  lasso_rel_bias <- relative_bias(lasso_coefficients_vector, true_coefficients)
  
  ols_emp_var <- empirical_variance(ols_coefficients)
  lasso_emp_var <- empirical_variance(lasso_coefficients_vector)
  
  comparison_bias <- data.frame(
    Koefisien = names(true_coefficients),
    Nilai_Sebenarnya = true_coefficients,
    Estimasi_OLS = ols_coefficients,
    Bias_OLS = ols_bias,
    Relative_Bias_OLS = ols_rel_bias,
    Emp_Variance_OLS = ols_emp_var,
    Estimasi_Lasso = lasso_coefficients_vector,
    Bias_Lasso = lasso_bias, 
    Relative_Bias_Lasso = lasso_rel_bias,
    Emp_Variance_Lasso = lasso_emp_var,
    n_obs=n_obs
  )
  cat("Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs =", n_obs, "):\n")
  print(comparison_bias)
}
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 5 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98849064 -0.011509359          -0.6025522
## x1                17.63698     3.00142184  0.001421841           0.3248261
## x2                17.63698     4.98962847 -0.010371526          -0.2051043
## x3                17.63698     6.89837992 -0.101620082          -0.4322174
## x4                17.63698    -0.04049719 -0.040497187                 Inf
## x5                17.63698    -0.99055376  0.009446242          -4.9744783
##           Emp_Variance_Lasso n_obs
## Intercept            17.4933     5
## x1                   17.4933     5
## x2                   17.4933     5
## x3                   17.4933     5
## x4                   17.4933     5
## x5                   17.4933     5
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 15 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98849064 -0.011509359          -0.6025522
## x1                17.63698     3.00142184  0.001421841           0.3248261
## x2                17.63698     4.98962847 -0.010371526          -0.2051043
## x3                17.63698     6.89837992 -0.101620082          -0.4322174
## x4                17.63698    -0.04049719 -0.040497187                 Inf
## x5                17.63698    -0.99055376  0.009446242          -4.9744783
##           Emp_Variance_Lasso n_obs
## Intercept            17.4933    15
## x1                   17.4933    15
## x2                   17.4933    15
## x3                   17.4933    15
## x4                   17.4933    15
## x5                   17.4933    15
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 30 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98849064 -0.011509359          -0.6025522
## x1                17.63698     3.00142184  0.001421841           0.3248261
## x2                17.63698     4.98962847 -0.010371526          -0.2051043
## x3                17.63698     6.89837992 -0.101620082          -0.4322174
## x4                17.63698    -0.04049719 -0.040497187                 Inf
## x5                17.63698    -0.99055376  0.009446242          -4.9744783
##           Emp_Variance_Lasso n_obs
## Intercept            17.4933    30
## x1                   17.4933    30
## x2                   17.4933    30
## x3                   17.4933    30
## x4                   17.4933    30
## x5                   17.4933    30
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 60 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98849064 -0.011509359          -0.6025522
## x1                17.63698     3.00142184  0.001421841           0.3248261
## x2                17.63698     4.98962847 -0.010371526          -0.2051043
## x3                17.63698     6.89837992 -0.101620082          -0.4322174
## x4                17.63698    -0.04049719 -0.040497187                 Inf
## x5                17.63698    -0.99055376  0.009446242          -4.9744783
##           Emp_Variance_Lasso n_obs
## Intercept            17.4933    60
## x1                   17.4933    60
## x2                   17.4933    60
## x3                   17.4933    60
## x4                   17.4933    60
## x5                   17.4933    60
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 100 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98849064 -0.011509359          -0.6025522
## x1                17.63698     3.00142184  0.001421841           0.3248261
## x2                17.63698     4.98962847 -0.010371526          -0.2051043
## x3                17.63698     6.89837992 -0.101620082          -0.4322174
## x4                17.63698    -0.04049719 -0.040497187                 Inf
## x5                17.63698    -0.99055376  0.009446242          -4.9744783
##           Emp_Variance_Lasso n_obs
## Intercept            17.4933   100
## x1                   17.4933   100
## x2                   17.4933   100
## x3                   17.4933   100
## x4                   17.4933   100
## x5                   17.4933   100
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 1000 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10   9.98893598 -0.01106402        -0.6021513
## x1               x1                3   3.03038889  0.03038889         0.3261623
## x2               x2                5   5.01378131  0.01378131        -0.2043026
## x3               x3                7   6.92904816 -0.07095184        -0.4316447
## x4               x4                0  -0.07242146 -0.07242146               Inf
## x5               x5               -1  -1.01881070 -0.01881070        -4.9784870
##           Emp_Variance_OLS Estimasi_Lasso  Bias_Lasso Relative_Bias_Lasso
## Intercept         17.63698     9.98844722 -0.01155278          -0.6025913
## x1                17.63698     2.99859761 -0.00140239           0.3246958
## x2                17.63698     4.98727360 -0.01272640          -0.2051825
## x3                17.63698     6.89538980 -0.10461020          -0.4322732
## x4                17.63698    -0.03738461 -0.03738461                 Inf
## x5                17.63698    -0.98779874  0.01220126          -4.9740875
##           Emp_Variance_Lasso n_obs
## Intercept           17.47938  1000
## x1                  17.47938  1000
## x2                  17.47938  1000
## x3                  17.47938  1000
## x4                  17.47938  1000
## x5                  17.47938  1000

Ketika memasukkan X4 dalam model yang mana koefisiennya sama dengan 0 membuat hasil relative bias menjadi Inf. Hal ini terjadi karena rumusan relative bias yang dicari melalui rumusan (mean (estimate)-parameter) / parameter membuat bagian penyebut = 0 sehingga memunculkan hasil Inf.

Skenario 2: Menghilangkan X4 dari model

Membangun Fungsi Model

for (n_obs in n_obs_list) {
  y <- 10 + 3*x1 + 5*x2 + 7*x3 - x5 + eror
  databangkitan2 <- data.frame(x1 = x1, x2 = x2, x3 = x3, x5 = x5, y = y)
  
  # Matriks prediktor dan vektor respons
  x <- as.matrix(databangkitan2[, c("x1", "x2", "x3", "x5")])
  y <- databangkitan2$y
  
  # Estimasi OLS
  ols_model <- lm(y ~ x1 + x2 + x3 + x5, data = databangkitan2)
  
  # Estimasi Lasso
  lasso_model <- cv.glmnet(x, y, alpha = 1)
  best_lambda <- lasso_model$lambda.min
  final_lasso_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
  lasso_coefficients <- coef(final_lasso_model)

  # Simpan hasil OLS dan Lasso
  ols_res[[as.character(n_obs)]] <- coef(ols_model)
  lasso_res[[as.character(n_obs)]] <- as.vector(lasso_coefficients)
}
# Koefisien sebenarnya
true_coefficients <- c(Intercept = 10, x1 = 3, x2 = 5, x3 = 7, x5 = -1)

# Cetak hasil per n_obs

relative_bias <- function(estimate, parameter){
  (mean(estimate) - parameter) / parameter
}  

empirical_variance <- function(estimate) {
  var(estimate)
}

for (n_obs in n_obs_list) {
  ols_coefficients <- ols_res[[as.character(n_obs)]]
  lasso_coefficients <- lasso_res[[as.character(n_obs)]]
  
  ols_bias <- ols_coefficients - true_coefficients
  lasso_coefficients_vector <- as.vector(lasso_coefficients)
  names(lasso_coefficients_vector) <- c("Intercept", "x1", "x2", "x3", "x5")
  lasso_bias <- lasso_coefficients_vector - true_coefficients
  
  ols_rel_bias <- relative_bias(ols_coefficients, true_coefficients)
  lasso_rel_bias <- relative_bias(lasso_coefficients_vector, true_coefficients)
  
  ols_emp_var <- empirical_variance(ols_coefficients)
  lasso_emp_var <- empirical_variance(lasso_coefficients_vector)
  
  comparison_bias <- data.frame(
    Koefisien = names(true_coefficients),
    Nilai_Sebenarnya = true_coefficients,
    Estimasi_OLS = ols_coefficients,
    Bias_OLS = ols_bias,
    Relative_Bias_OLS = ols_rel_bias,
    Emp_Variance_OLS = ols_emp_var,
    Estimasi_Lasso = lasso_coefficients_vector,
    Bias_Lasso = lasso_bias, 
    Relative_Bias_Lasso = lasso_rel_bias,
    Emp_Variance_Lasso = lasso_emp_var,
    n_obs=n_obs
  )
  cat("Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs =", n_obs, "):\n")
  print(comparison_bias)
}
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 5 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261     5
## x1                  17.02261     5
## x2                  17.02261     5
## x3                  17.02261     5
## x5                  17.02261     5
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 15 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261    15
## x1                  17.02261    15
## x2                  17.02261    15
## x3                  17.02261    15
## x5                  17.02261    15
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 30 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261    30
## x1                  17.02261    30
## x2                  17.02261    30
## x3                  17.02261    30
## x5                  17.02261    30
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 60 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261    60
## x1                  17.02261    60
## x2                  17.02261    60
## x3                  17.02261    60
## x5                  17.02261    60
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 100 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261   100
## x1                  17.02261   100
## x2                  17.02261   100
## x3                  17.02261   100
## x5                  17.02261   100
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 1000 ):
##           Koefisien Nilai_Sebenarnya Estimasi_OLS    Bias_OLS Relative_Bias_OLS
## Intercept Intercept               10     9.989856 -0.01014409       -0.52119506
## x1               x1                3     3.025001  0.02500091        0.59601647
## x2               x2                5     5.015416  0.01541648       -0.04239012
## x3               x3                7     6.927462 -0.07253756       -0.31599294
## x5               x5               -1    -1.017489 -0.01748876       -5.78804940
##           Emp_Variance_OLS Estimasi_Lasso   Bias_Lasso Relative_Bias_Lasso
## Intercept         17.12505      9.9889221 -0.011077903         -0.52239378
## x1                17.12505      2.9958165 -0.004183542          0.59202074
## x2                17.12505      4.9881176 -0.011882368         -0.04478756
## x3                17.12505      6.8945713 -0.105428745         -0.31770540
## x5                17.12505     -0.9871163  0.012883659         -5.77606222
##           Emp_Variance_Lasso n_obs
## Intercept           17.02261  1000
## x1                  17.02261  1000
## x2                  17.02261  1000
## x3                  17.02261  1000
## x5                  17.02261  1000

Pada model tanpa X4 tidak didapat nilai Inf lagi pada output relative bias.