Yosella Diftasari (G1401201028)
Yogi Nur Hamid (G1401211043)
Adinda Putri Alfira (G1401211060)
Muhammad Farhan Adeva (G1401211062)
Rifqi Rustu Andana (G1401211067)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-8
set.seed(123)
n_obs <- c(15,30,60,100,10000) #number of observations
beta <- matrix(c(3,1,10,0,-2,0), nrow=6,ncol=1)
simulate_ols <- function(n, n_sim=1000) {
set.seed(123) # Set seed untuk reproduktibilitas
# Inisialisasi matriks untuk menyimpan hasil
ols_results <- matrix(NA, nrow=n_sim,ncol=6)
colnames(ols_results) <- c("Intercept", "Beta1", "Beta2", "Beta3", "Beta4", "Beta5")
for(i in 1:n_sim){
X1 <- rnorm(n,mean=0,sd=1)
X2 <- rnorm(n,mean=0,sd=1)
X3 <- rnorm(n,mean=0,sd=1)
X4 <- rnorm(n,mean=0,sd=1)
X5 <- rnorm(n,mean=0,sd=1)
epsilon <- rnorm(n,mean=0,sd=2)
Y <- beta[1]+beta[2]*X1+beta[3]*X2+beta[4]*X3+beta[5]*X4+beta[6]*X5+epsilon
# Menyesuaikan model regresi linear
model <- lm(Y~X1+X2+X3+X4+X5)
# Simpan koefisien OLS
ols_results[i, ] <- coef(model)
}
return(ols_results)
}
#Ols untuk setiap jumlah observasi berbeda
ols_results_list <- lapply(n_obs, simulate_ols)
# Menampilkan hasil
names(ols_results_list) <- n_obs
# Menampilkan ringkasan hasil simulasi untuk tiap observasi
n15 <- colMeans(ols_results_list$'15')
n30 <-colMeans(ols_results_list$'30')
n60 <- colMeans(ols_results_list$'60')
n100 <- colMeans(ols_results_list$'100')
n10000 <- colMeans(ols_results_list$'10000')
koefisien_ols <- cbind.data.frame(n15,n30,n60,n100,n10000)
koefisien_ols
## n15 n30 n60 n100 n10000
## Intercept 2.96072770 3.005288316 3.008401004 3.015302917 3.0002401249
## Beta1 0.98529883 1.004065690 0.995033477 0.995458484 0.9992025541
## Beta2 10.01181736 10.003472294 10.006818237 9.993575566 10.0000553886
## Beta3 -0.02977120 -0.011153392 -0.009881428 0.002007609 0.0009088149
## Beta4 -1.98044227 -1.989553925 -1.990882066 -1.994487017 -2.0006538343
## Beta5 -0.01447058 -0.003297621 0.021989488 0.001392291 0.0002044333
simulate_ridge <- function(n, n_sim=1000) {
set.seed(123) # Set seed untuk reproduktibilitas
# Inisialisasi matriks untuk menyimpan hasil
ridge_results <- matrix(NA, nrow=n_sim,ncol=6)
colnames(ridge_results) <- c("Intercept", "Beta1", "Beta2", "Beta3", "Beta4", "Beta5")
for(i in 1:n_sim){
X1 <- rnorm(n,mean=0,sd=1)
X2 <- rnorm(n,mean=0,sd=1)
X3 <- rnorm(n,mean=0,sd=1)
X4 <- rnorm(n,mean=0,sd=1)
X5 <- rnorm(n,mean=0,sd=1)
epsilon <- rnorm(n,mean=0,sd=2)
Y <- beta[1]+beta[2]*X1+beta[3]*X2+beta[4]*X3+beta[5]*X4+beta[6]*X5+epsilon
#Menyusun matriks X
X <- cbind(X1,X2,X3,X4,X5)
# Menyesuaikan model Ridge Regression dengan CV
cv_model <- cv.glmnet(X, Y, alpha = 0, grouped = F)
# Menggunakan lambda yang optimal dari CV
best_lambda <- cv_model$lambda.min
# Menyimpan koefisien Ridge untuk lambda optimal
coef_ridge <- as.vector(coef(cv_model, s = best_lambda))
ridge_results[i, ] <- coef_ridge
}
return(ridge_results)
}
#ridge untuk setiap jumlah observasi berbeda
ridge_results_list <- lapply(n_obs, simulate_ridge)
# Menampilkan hasil
names(ridge_results_list) <- n_obs
# Menampilkan ringkasan hasil simulasi untuk tiap observasi
n15 <- colMeans(ridge_results_list$'15')
n30 <-colMeans(ridge_results_list$'30')
n60 <- colMeans(ridge_results_list$'60')
n100 <- colMeans(ridge_results_list$'100')
n10000 <- colMeans(ridge_results_list$'10000')
koefisien_ridge <- cbind.data.frame(n15,n30,n60,n100,n10000)
koefisien_ridge
## n15 n30 n60 n100 n10000
## Intercept 3.050873957 3.0101885 2.990014771 2.998192669 3.0003106179
## Beta1 0.861482004 0.8967782 0.917133885 0.903452817 0.9133708966
## Beta2 8.826918287 9.0142686 9.076641634 9.093291258 9.1268737586
## Beta3 0.011047060 0.0279180 0.011489023 -0.001956583 -0.0008619910
## Beta4 -1.777115872 -1.7969787 -1.814568506 -1.814887999 -1.8257423180
## Beta5 0.003367171 -0.0169336 0.002754825 0.004876253 0.0008040057
penduga <- c(rep("ols",6),rep("ridge",6))
#Bias
bias_ols <- koefisien_ols-beta
bias_ridge <- koefisien_ridge-beta
performa_bias <- rbind.data.frame(bias_ols,bias_ridge)
performa_bias <- data.frame(penduga,performa_bias)
performa_bias
## penduga n15 n30 n60 n100
## Intercept ols -0.039272301 0.005288316 0.008401004 0.015302917
## Beta1 ols -0.014701169 0.004065690 -0.004966523 -0.004541516
## Beta2 ols 0.011817364 0.003472294 0.006818237 -0.006424434
## Beta3 ols -0.029771203 -0.011153392 -0.009881428 0.002007609
## Beta4 ols 0.019557731 0.010446075 0.009117934 0.005512983
## Beta5 ols -0.014470577 -0.003297621 0.021989488 0.001392291
## Intercept1 ridge 0.050873957 0.010188541 -0.009985229 -0.001807331
## Beta11 ridge -0.138517996 -0.103221847 -0.082866115 -0.096547183
## Beta21 ridge -1.173081713 -0.985731355 -0.923358366 -0.906708742
## Beta31 ridge 0.011047060 0.027918003 0.011489023 -0.001956583
## Beta41 ridge 0.222884128 0.203021349 0.185431494 0.185112001
## Beta51 ridge 0.003367171 -0.016933600 0.002754825 0.004876253
## n10000
## Intercept 0.0002401249
## Beta1 -0.0007974459
## Beta2 0.0000553886
## Beta3 0.0009088149
## Beta4 -0.0006538343
## Beta5 0.0002044333
## Intercept1 0.0003106179
## Beta11 -0.0866291034
## Beta21 -0.8731262414
## Beta31 -0.0008619910
## Beta41 0.1742576820
## Beta51 0.0008040057
#Relative Bias
rel_bias_ols <- bias_ols/beta
rel_bias_ridge <- bias_ridge/beta
performa_rel_bias <- rbind.data.frame(rel_bias_ols,rel_bias_ridge)
performa_rel_bias <- data.frame(penduga,performa_rel_bias)
performa_rel_bias
## penduga n15 n30 n60 n100
## Intercept ols -0.013090767 0.0017627720 0.0028003347 0.0051009722
## Beta1 ols -0.014701169 0.0040656897 -0.0049665233 -0.0045415156
## Beta2 ols 0.001181736 0.0003472294 0.0006818237 -0.0006424434
## Beta3 ols -Inf -Inf -Inf Inf
## Beta4 ols -0.009778866 -0.0052230373 -0.0045589669 -0.0027564914
## Beta5 ols -Inf -Inf Inf Inf
## Intercept1 ridge 0.016957986 0.0033961802 -0.0033284097 -0.0006024438
## Beta11 ridge -0.138517996 -0.1032218470 -0.0828661154 -0.0965471826
## Beta21 ridge -0.117308171 -0.0985731355 -0.0923358366 -0.0906708742
## Beta31 ridge Inf Inf Inf -Inf
## Beta41 ridge -0.111442064 -0.1015106746 -0.0927157472 -0.0925560007
## Beta51 ridge Inf -Inf Inf Inf
## n10000
## Intercept 8.004164e-05
## Beta1 -7.974459e-04
## Beta2 5.538860e-06
## Beta3 Inf
## Beta4 3.269171e-04
## Beta5 Inf
## Intercept1 1.035393e-04
## Beta11 -8.662910e-02
## Beta21 -8.731262e-02
## Beta31 -Inf
## Beta41 -8.712884e-02
## Beta51 Inf
#Empirical Varians
n15 <- (apply(ols_results_list$'15',2,sd))^2
n30 <- (apply(ols_results_list$'30',2,sd))^2
n60 <- (apply(ols_results_list$'60',2,sd))^2
n100 <- (apply(ols_results_list$'100',2,sd))^2
n10000 <- (apply(ols_results_list$'10000',2,sd))^2
emp_var_ols <- data.frame(n15,n30,n60,n100,n10000)
n15 <- (apply(ridge_results_list$'15',2,sd))^2
n30 <- (apply(ridge_results_list$'30',2,sd))^2
n60 <- (apply(ridge_results_list$'60',2,sd))^2
n100 <- (apply(ridge_results_list$'100',2,sd))^2
n10000 <- (apply(ridge_results_list$'10000',2,sd))^2
emp_var_ridge <- data.frame(n15,n30,n60,n100,n10000)
performa_emp_var <- rbind.data.frame(emp_var_ols,emp_var_ridge)
performa_emp_var <- data.frame(penduga,performa_emp_var)
performa_emp_var
## penduga n15 n30 n60 n100 n10000
## Intercept ols 0.4229590 0.1656609 0.06909034 0.04388779 0.0004225674
## Beta1 ols 0.5460114 0.1707893 0.07544747 0.04597913 0.0003897106
## Beta2 ols 0.4534137 0.1700935 0.07499676 0.04541360 0.0004081909
## Beta3 ols 0.5039179 0.1949367 0.07068743 0.03976492 0.0003660073
## Beta4 ols 0.4678521 0.1779954 0.07480723 0.04491478 0.0004056293
## Beta5 ols 0.4683790 0.1816330 0.08013757 0.04674696 0.0003915692
## Intercept1 ridge 0.4829744 0.1867271 0.08177916 0.05246098 0.0004901708
## Beta11 ridge 0.4770640 0.1853582 0.07972037 0.04123871 0.0004218655
## Beta21 ridge 0.4178909 0.1502685 0.05959936 0.03379853 0.0003222192
## Beta31 ridge 0.4956147 0.1678557 0.07408385 0.04709911 0.0003713489
## Beta41 ridge 0.5518465 0.1714189 0.07760871 0.04294191 0.0004178630
## Beta51 ridge 0.4662750 0.1865809 0.07363998 0.04473904 0.0004231594
Apakah OLS dan Ridge tak Bias?
Jawabannya : Dikatakan tak bias jika nilai bias mendekati 0. - beta 0 : penduga OLS dan Ridge tak bias - beta 1 : penduga OLS tak bias, Ridge tak bias asimptotik - beta 2 : penduga OLS tak bias, sedankan Ridge bias - beta 3 : penduga OLS dan Ridge tak bias - beta 4 : penduga OLS tak bias, sedankan Ridge bias - beta 5 : penduga OLS dan Ridge tak bias
Apakah OLS Lebih Efisen dari Ridge?
Jawabannya : Tidak. Penduga OLS dan ridge memiliki nilai emp_var yang hampir sama di setiap estimand, yaitu semakin mendekati 0 atau efisien ketika jumlah observasi semakin besar.
Apakah OLS dan Ridge Konsisten ?
Jawabannya : Iya. Nilai bias dan emp_var pada ols dan ridge semakin mendekati 0 seiring bertambahnya jumlah observasi.
Berdasarkan a-c manakah penduga yang lebih baik? Mengapa demikian?
Jawabannya : Metode yang lebih baik untuk soal ini yaitu metode OLS karena memiliki relative bias yang lebih kecil dibandingkan Ridge, dan OLS lebih tak bias dibandingkan dengan Ridge.