Import Library

Bangkitkan data berdistribusi normal

set.seed(123)
#diambil dari distribusi uniform (0,1)
#fungsi dengan runif
get_normStd <- function(n) {
  u1<-runif(n, 0,1)
  u2<-runif(n, 0,1)
 #using box-muller transformation
  z1<-sqrt(-2*log(u1))*cos(2*pi*u2)
  return(z1)
}

#test using shaprio-wilk test
# where Null Hypothesis = Normal, def alpha = 5%
Z_runif <- get_normStd(100)
data.frame(Z_runif) %>% 
ggplot2::ggplot(aes(Z_runif))+
  geom_density(color="black", fill="lightgreen") +
  labs(title="Distribusi Normal",x="Z-value", y = "Density") +
  theme_minimal()

shapiro.test(Z_runif)
## 
##  Shapiro-Wilk normality test
## 
## data:  Z_runif
## W = 0.99244, p-value = 0.8522

Bangkitkan Data Berdistribusi CHI-SQUARE

get_chiSqr <- function(n, df) {
#penyesuaian normal standard (z) ke chisquare (z^2)
  set.seed(123)
  chi<- rep(0, 100)
  for(i in c(1:df)){
    u1<-runif(n,0,1)
    u2<-runif(n,0,1)
    z1<-sqrt(-2*log(u1))*cos(2*pi*u2)
    z<-z1
    x<-z^2
    chi<-chi+x
  }
  return(chi)
}

#test
df <- 2
xx1 <- get_chiSqr(100,df)
data.frame(xx1) %>% 
ggplot2::ggplot(aes(xx1))+
  geom_density(color="black", fill="pink") +
  labs(title="Distribusi Chi-Square",x="chisqr-value", y = "Density") +
  theme_minimal()

shapiro.test(xx1)
## 
##  Shapiro-Wilk normality test
## 
## data:  xx1
## W = 0.87625, p-value = 1.264e-07

Bangkitkan data regresi dengan sisaan homogen

res_homo <- xx1
x <- runif(100, 0, 1)
b <- c(1,2.7)
independen <- model.matrix(~x)
y <- independen %*% b + res_homo

model <- lm(y~x)
resid <- resid(model)
plot(resid, col = "#009999", lwd=7, main = "Sisaan Homogen ~Chisquare(df=2) 100 amatan")

data_homo <- data.frame(y,x)
summary(model)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0644 -1.2138 -0.3571  0.7769  6.2005 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.4426     0.3239   7.541 2.38e-11 ***
## x             3.6314     0.5742   6.324 7.61e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.65 on 98 degrees of freedom
## Multiple R-squared:  0.2898, Adjusted R-squared:  0.2826 
## F-statistic: 39.99 on 1 and 98 DF,  p-value: 7.609e-09

Bangkitkan data regresi dengan sisaan heterogen

sd <- 1:250
res_hetero <- rnorm(n=100, mean = sd, sd=.2*sd)

x <- runif(100,1,2)
b <- c(1,2.7)
independen <- model.matrix(~x)
y <- independen %*% b + res_hetero

model <- lm(y~x)
residu <- resid(model)

plot(residu,col = "#009999", lwd=7)

data_hetero <- data.frame(y,x)
summary(model)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -50.07 -24.73  -1.14  24.65  72.16 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  55.2817    16.2347   3.405  0.00096 ***
## x             0.7642    10.7312   0.071  0.94337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.1 on 98 degrees of freedom
## Multiple R-squared:  5.175e-05,  Adjusted R-squared:  -0.01015 
## F-statistic: 0.005072 on 1 and 98 DF,  p-value: 0.9434

Membandingkan Standar Error hasil simulasi yang diulang sebanyak 1000 kali

set.seed(Sys.Date())
compare_se <- function(data_homo, data_hetero){
  stdeHOMO <- NULL
  for (i in 1:1000) {
    tesHo <- lm(y~x , data_homo %>%  dplyr::sample_n(10,replace = FALSE))
    stdeHOMO <- c(stdeHOMO, sqrt(deviance(tesHo)/df.residual(tesHo)))
  }
  SE_homo <- var(stdeHOMO)

  stdeHETERO <- NULL
  for (i in 1:1000) {
    tesHe <- lm(y~x , data = data_hetero %>%  dplyr::sample_n(10,replace = FALSE))
    stdeHETERO <- c(stdeHETERO,sqrt(deviance(tesHe)/df.residual(tesHe)))
  }
  SE_hetero <- var(stdeHETERO)
  
  hasil <- data.frame(SE_homo, SE_hetero)
  return(hasil)
}
compare_se(data_homo, data_hetero)
##     SE_homo SE_hetero
## 1 0.2757349  36.39652