Import Library
Bangkitkan data berdistribusi normal
set.seed(123)
#diambil dari distribusi uniform (0,1)
#fungsi dengan runif
get_normStd <- function(n) {
u1<-runif(n, 0,1)
u2<-runif(n, 0,1)
#using box-muller transformation
z1<-sqrt(-2*log(u1))*cos(2*pi*u2)
return(z1)
}
#test using shaprio-wilk test
# where Null Hypothesis = Normal, def alpha = 5%
Z_runif <- get_normStd(100)
data.frame(Z_runif) %>%
ggplot2::ggplot(aes(Z_runif))+
geom_density(color="black", fill="lightgreen") +
labs(title="Distribusi Normal",x="Z-value", y = "Density") +
theme_minimal()

shapiro.test(Z_runif)
##
## Shapiro-Wilk normality test
##
## data: Z_runif
## W = 0.99244, p-value = 0.8522
Bangkitkan Data Berdistribusi CHI-SQUARE
get_chiSqr <- function(n, df) {
#penyesuaian normal standard (z) ke chisquare (z^2)
set.seed(123)
chi<- rep(0, 100)
for(i in c(1:df)){
u1<-runif(n,0,1)
u2<-runif(n,0,1)
z1<-sqrt(-2*log(u1))*cos(2*pi*u2)
z<-z1
x<-z^2
chi<-chi+x
}
return(chi)
}
#test
df <- 2
xx1 <- get_chiSqr(100,df)
data.frame(xx1) %>%
ggplot2::ggplot(aes(xx1))+
geom_density(color="black", fill="pink") +
labs(title="Distribusi Chi-Square",x="chisqr-value", y = "Density") +
theme_minimal()

shapiro.test(xx1)
##
## Shapiro-Wilk normality test
##
## data: xx1
## W = 0.87625, p-value = 1.264e-07
Bangkitkan data regresi dengan sisaan homogen
res_homo <- xx1
x <- runif(100, 0, 1)
b <- c(1,2.7)
independen <- model.matrix(~x)
y <- independen %*% b + res_homo
model <- lm(y~x)
resid <- resid(model)
plot(resid, col = "#009999", lwd=7, main = "Sisaan Homogen ~Chisquare(df=2) 100 amatan")

data_homo <- data.frame(y,x)
summary(model)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0644 -1.2138 -0.3571 0.7769 6.2005
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4426 0.3239 7.541 2.38e-11 ***
## x 3.6314 0.5742 6.324 7.61e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.65 on 98 degrees of freedom
## Multiple R-squared: 0.2898, Adjusted R-squared: 0.2826
## F-statistic: 39.99 on 1 and 98 DF, p-value: 7.609e-09
Bangkitkan data regresi dengan sisaan heterogen
sd <- 1:250
res_hetero <- rnorm(n=100, mean = sd, sd=.2*sd)
x <- runif(100,1,2)
b <- c(1,2.7)
independen <- model.matrix(~x)
y <- independen %*% b + res_hetero
model <- lm(y~x)
residu <- resid(model)
plot(residu,col = "#009999", lwd=7)

data_hetero <- data.frame(y,x)
summary(model)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.07 -24.73 -1.14 24.65 72.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.2817 16.2347 3.405 0.00096 ***
## x 0.7642 10.7312 0.071 0.94337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.1 on 98 degrees of freedom
## Multiple R-squared: 5.175e-05, Adjusted R-squared: -0.01015
## F-statistic: 0.005072 on 1 and 98 DF, p-value: 0.9434
Membandingkan Standar Error hasil simulasi yang diulang sebanyak
1000 kali
set.seed(Sys.Date())
compare_se <- function(data_homo, data_hetero){
stdeHOMO <- NULL
for (i in 1:1000) {
tesHo <- lm(y~x , data_homo %>% dplyr::sample_n(10,replace = FALSE))
stdeHOMO <- c(stdeHOMO, sqrt(deviance(tesHo)/df.residual(tesHo)))
}
SE_homo <- var(stdeHOMO)
stdeHETERO <- NULL
for (i in 1:1000) {
tesHe <- lm(y~x , data = data_hetero %>% dplyr::sample_n(10,replace = FALSE))
stdeHETERO <- c(stdeHETERO,sqrt(deviance(tesHe)/df.residual(tesHe)))
}
SE_hetero <- var(stdeHETERO)
hasil <- data.frame(SE_homo, SE_hetero)
return(hasil)
}
compare_se(data_homo, data_hetero)
## SE_homo SE_hetero
## 1 0.2757349 36.39652