Tugas 2 Analisis MSR
Kelompok 2
Doni Oktavianto (G1401211068)
Ubaidillah Al Hakim (G1401211086)
Hanifa Rahmacindia Nasution (G1401211094)
Jonathan Hizkia Burju Simanjuntak (G1401211104)
Megawati Roito Panjaitan (G1401211106)
Import Library
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
## Loading required package: Matrix
## Loaded glmnet 4.1-7
Jumlah observasi yang dibangun
Looping Generate Data
# Loop untuk menghasilkan data dengan jumlah observasi yang berbeda
for (n_obs in n_obs_list) {
# Generate data
x1 <- rnorm(n_obs, mean = 0, sd = 1)
x2 <- rnorm(n_obs, mean = 0, sd = 1)
x3 <- rnorm(n_obs, mean = 0, sd = 1)
x4 <- rnorm(n_obs, mean = 0, sd = 1)
x5 <- rnorm(n_obs, mean = 0, sd = 1)
eror <- rnorm(n_obs, mean = 0, sd = 2)
}Skenario 1: Memperhitungkan X4 kedalam model
Membangun Fungsi Model
# Inisialisasi list untuk menyimpan hasil OLS dan Lasso
ols_res <- list()
lasso_res <- list()
# Loop untuk menghasilkan data dengan jumlah observasi yang berbeda
for (n_obs in n_obs_list) {
y <- 10 + 3*x1 + 5*x2 + 7*x3 + 0*x4 - x5 + eror
databangkitan <- data.frame(x1 = x1, x2 = x2, x3 = x3, x4 = x4, x5 = x5, y = y)
# Matriks prediktor dan vektor respons
x <- as.matrix(databangkitan[, c("x1", "x2", "x3", "x4" ,"x5")])
y <- databangkitan$y
# Estimasi OLS
ols_model <- lm(y ~ x1 + x2 + x3 + x4+ x5, data = databangkitan)
# Estimasi Lasso
lasso_model <- cv.glmnet(x, y, alpha = 1)
best_lambda <- lasso_model$lambda.min
final_lasso_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
lasso_coefficients <- coef(final_lasso_model)
# Simpan hasil OLS dan Lasso
ols_res[[as.character(n_obs)]] <- coef(ols_model)
lasso_res[[as.character(n_obs)]] <- as.vector(lasso_coefficients)
}# Koefisien sebenarnya
true_coefficients <- c(Intercept = 10, x1 = 3, x2 = 5, x3 = 7, x4 = 0, x5 = -1)
# Cetak hasil per n_obs
relative_bias <- function(estimate, parameter){
(mean(estimate) - parameter) / parameter
}
empirical_variance <- function(estimate) {
var(estimate)
}
for (n_obs in n_obs_list) {
ols_coefficients <- ols_res[[as.character(n_obs)]]
lasso_coefficients <- lasso_res[[as.character(n_obs)]]
ols_bias <- ols_coefficients - true_coefficients
lasso_coefficients_vector <- as.vector(lasso_coefficients)
names(lasso_coefficients_vector) <- c("Intercept", "x1", "x2", "x3", "x5")
lasso_bias <- lasso_coefficients_vector - true_coefficients
ols_rel_bias <- relative_bias(ols_coefficients, true_coefficients)
lasso_rel_bias <- relative_bias(lasso_coefficients_vector, true_coefficients)
ols_emp_var <- empirical_variance(ols_coefficients)
lasso_emp_var <- empirical_variance(lasso_coefficients_vector)
comparison_bias <- data.frame(
Koefisien = names(true_coefficients),
Nilai_Sebenarnya = true_coefficients,
Estimasi_OLS = ols_coefficients,
Bias_OLS = ols_bias,
Relative_Bias_OLS = ols_rel_bias,
Emp_Variance_OLS = ols_emp_var,
Estimasi_Lasso = lasso_coefficients_vector,
Bias_Lasso = lasso_bias,
Relative_Bias_Lasso = lasso_rel_bias,
Emp_Variance_Lasso = lasso_emp_var,
n_obs=n_obs
)
cat("Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs =", n_obs, "):\n")
print(comparison_bias)
}## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 5 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98849064 -0.011509359 -0.6025522
## x1 17.63698 3.00142184 0.001421841 0.3248261
## x2 17.63698 4.98962847 -0.010371526 -0.2051043
## x3 17.63698 6.89837992 -0.101620082 -0.4322174
## x4 17.63698 -0.04049719 -0.040497187 Inf
## x5 17.63698 -0.99055376 0.009446242 -4.9744783
## Emp_Variance_Lasso n_obs
## Intercept 17.4933 5
## x1 17.4933 5
## x2 17.4933 5
## x3 17.4933 5
## x4 17.4933 5
## x5 17.4933 5
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 15 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98849064 -0.011509359 -0.6025522
## x1 17.63698 3.00142184 0.001421841 0.3248261
## x2 17.63698 4.98962847 -0.010371526 -0.2051043
## x3 17.63698 6.89837992 -0.101620082 -0.4322174
## x4 17.63698 -0.04049719 -0.040497187 Inf
## x5 17.63698 -0.99055376 0.009446242 -4.9744783
## Emp_Variance_Lasso n_obs
## Intercept 17.4933 15
## x1 17.4933 15
## x2 17.4933 15
## x3 17.4933 15
## x4 17.4933 15
## x5 17.4933 15
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 30 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98849064 -0.011509359 -0.6025522
## x1 17.63698 3.00142184 0.001421841 0.3248261
## x2 17.63698 4.98962847 -0.010371526 -0.2051043
## x3 17.63698 6.89837992 -0.101620082 -0.4322174
## x4 17.63698 -0.04049719 -0.040497187 Inf
## x5 17.63698 -0.99055376 0.009446242 -4.9744783
## Emp_Variance_Lasso n_obs
## Intercept 17.4933 30
## x1 17.4933 30
## x2 17.4933 30
## x3 17.4933 30
## x4 17.4933 30
## x5 17.4933 30
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 60 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98849064 -0.011509359 -0.6025522
## x1 17.63698 3.00142184 0.001421841 0.3248261
## x2 17.63698 4.98962847 -0.010371526 -0.2051043
## x3 17.63698 6.89837992 -0.101620082 -0.4322174
## x4 17.63698 -0.04049719 -0.040497187 Inf
## x5 17.63698 -0.99055376 0.009446242 -4.9744783
## Emp_Variance_Lasso n_obs
## Intercept 17.4933 60
## x1 17.4933 60
## x2 17.4933 60
## x3 17.4933 60
## x4 17.4933 60
## x5 17.4933 60
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 100 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98849064 -0.011509359 -0.6025522
## x1 17.63698 3.00142184 0.001421841 0.3248261
## x2 17.63698 4.98962847 -0.010371526 -0.2051043
## x3 17.63698 6.89837992 -0.101620082 -0.4322174
## x4 17.63698 -0.04049719 -0.040497187 Inf
## x5 17.63698 -0.99055376 0.009446242 -4.9744783
## Emp_Variance_Lasso n_obs
## Intercept 17.4933 100
## x1 17.4933 100
## x2 17.4933 100
## x3 17.4933 100
## x4 17.4933 100
## x5 17.4933 100
## Perbandingan Bias untuk OLS dan Lasso menggunakan X4 (n_obs = 1000 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.98893598 -0.01106402 -0.6021513
## x1 x1 3 3.03038889 0.03038889 0.3261623
## x2 x2 5 5.01378131 0.01378131 -0.2043026
## x3 x3 7 6.92904816 -0.07095184 -0.4316447
## x4 x4 0 -0.07242146 -0.07242146 Inf
## x5 x5 -1 -1.01881070 -0.01881070 -4.9784870
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.63698 9.98844722 -0.01155278 -0.6025913
## x1 17.63698 2.99859761 -0.00140239 0.3246958
## x2 17.63698 4.98727360 -0.01272640 -0.2051825
## x3 17.63698 6.89538980 -0.10461020 -0.4322732
## x4 17.63698 -0.03738461 -0.03738461 Inf
## x5 17.63698 -0.98779874 0.01220126 -4.9740875
## Emp_Variance_Lasso n_obs
## Intercept 17.47938 1000
## x1 17.47938 1000
## x2 17.47938 1000
## x3 17.47938 1000
## x4 17.47938 1000
## x5 17.47938 1000
Ketika memasukkan X4 dalam model yang mana koefisiennya sama dengan 0 membuat hasil relative bias menjadi Inf. Hal ini terjadi karena rumusan relative bias yang dicari melalui rumusan (mean (estimate)-parameter) / parameter membuat bagian penyebut = 0 sehingga memunculkan hasil Inf.
Skenario 2: Menghilangkan X4 dari model
Membangun Fungsi Model
for (n_obs in n_obs_list) {
y <- 10 + 3*x1 + 5*x2 + 7*x3 - x5 + eror
databangkitan2 <- data.frame(x1 = x1, x2 = x2, x3 = x3, x5 = x5, y = y)
# Matriks prediktor dan vektor respons
x <- as.matrix(databangkitan2[, c("x1", "x2", "x3", "x5")])
y <- databangkitan2$y
# Estimasi OLS
ols_model <- lm(y ~ x1 + x2 + x3 + x5, data = databangkitan2)
# Estimasi Lasso
lasso_model <- cv.glmnet(x, y, alpha = 1)
best_lambda <- lasso_model$lambda.min
final_lasso_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
lasso_coefficients <- coef(final_lasso_model)
# Simpan hasil OLS dan Lasso
ols_res[[as.character(n_obs)]] <- coef(ols_model)
lasso_res[[as.character(n_obs)]] <- as.vector(lasso_coefficients)
}# Koefisien sebenarnya
true_coefficients <- c(Intercept = 10, x1 = 3, x2 = 5, x3 = 7, x5 = -1)
# Cetak hasil per n_obs
relative_bias <- function(estimate, parameter){
(mean(estimate) - parameter) / parameter
}
empirical_variance <- function(estimate) {
var(estimate)
}
for (n_obs in n_obs_list) {
ols_coefficients <- ols_res[[as.character(n_obs)]]
lasso_coefficients <- lasso_res[[as.character(n_obs)]]
ols_bias <- ols_coefficients - true_coefficients
lasso_coefficients_vector <- as.vector(lasso_coefficients)
names(lasso_coefficients_vector) <- c("Intercept", "x1", "x2", "x3", "x5")
lasso_bias <- lasso_coefficients_vector - true_coefficients
ols_rel_bias <- relative_bias(ols_coefficients, true_coefficients)
lasso_rel_bias <- relative_bias(lasso_coefficients_vector, true_coefficients)
ols_emp_var <- empirical_variance(ols_coefficients)
lasso_emp_var <- empirical_variance(lasso_coefficients_vector)
comparison_bias <- data.frame(
Koefisien = names(true_coefficients),
Nilai_Sebenarnya = true_coefficients,
Estimasi_OLS = ols_coefficients,
Bias_OLS = ols_bias,
Relative_Bias_OLS = ols_rel_bias,
Emp_Variance_OLS = ols_emp_var,
Estimasi_Lasso = lasso_coefficients_vector,
Bias_Lasso = lasso_bias,
Relative_Bias_Lasso = lasso_rel_bias,
Emp_Variance_Lasso = lasso_emp_var,
n_obs=n_obs
)
cat("Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs =", n_obs, "):\n")
print(comparison_bias)
}## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 5 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 5
## x1 17.02261 5
## x2 17.02261 5
## x3 17.02261 5
## x5 17.02261 5
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 15 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 15
## x1 17.02261 15
## x2 17.02261 15
## x3 17.02261 15
## x5 17.02261 15
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 30 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 30
## x1 17.02261 30
## x2 17.02261 30
## x3 17.02261 30
## x5 17.02261 30
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 60 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 60
## x1 17.02261 60
## x2 17.02261 60
## x3 17.02261 60
## x5 17.02261 60
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 100 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 100
## x1 17.02261 100
## x2 17.02261 100
## x3 17.02261 100
## x5 17.02261 100
## Perbandingan Bias untuk OLS dan Lasso tanpa X4 (n_obs = 1000 ):
## Koefisien Nilai_Sebenarnya Estimasi_OLS Bias_OLS Relative_Bias_OLS
## Intercept Intercept 10 9.989856 -0.01014409 -0.52119506
## x1 x1 3 3.025001 0.02500091 0.59601647
## x2 x2 5 5.015416 0.01541648 -0.04239012
## x3 x3 7 6.927462 -0.07253756 -0.31599294
## x5 x5 -1 -1.017489 -0.01748876 -5.78804940
## Emp_Variance_OLS Estimasi_Lasso Bias_Lasso Relative_Bias_Lasso
## Intercept 17.12505 9.9889221 -0.011077903 -0.52239378
## x1 17.12505 2.9958165 -0.004183542 0.59202074
## x2 17.12505 4.9881176 -0.011882368 -0.04478756
## x3 17.12505 6.8945713 -0.105428745 -0.31770540
## x5 17.12505 -0.9871163 0.012883659 -5.77606222
## Emp_Variance_Lasso n_obs
## Intercept 17.02261 1000
## x1 17.02261 1000
## x2 17.02261 1000
## x3 17.02261 1000
## x5 17.02261 1000
Pada model tanpa X4 tidak didapat nilai Inf lagi pada output relative bias.