par(mfrow=c(3,1))
library(probs)
## 
## Attaching package: 'probs'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

Central Limit Theorem

The Central Limit Theorem states that as the sample size increases, the distribution of the sample mean will tend to follow a normal distribution, regardless of the shape of the original population distribution.

set.seed(1)
populasi = rpois(20,3)

n1 = 2
pois_1 = urnsamples(populasi, size = n1, replace = FALSE, ordered = FALSE)
mean_1 = matrix(apply(pois_1, 1, mean))

n2 = 6
pois_1 = urnsamples(populasi, size = n2, replace = FALSE, ordered = FALSE)
mean_2 = matrix(apply(pois_1, 1, mean))

n3 = 10
pois_1 = urnsamples(populasi, size = n3, replace = FALSE, ordered = FALSE)
mean_3 = matrix(apply(pois_1, 1, mean))

hist(mean_1, main = "Normal approximation compare to poisson (n=2)", xlab= "xbar")

hist(mean_2, main = "Normal approximation compare to poisson (n=5)", xlab= "xbar")

hist(mean_3, main = "Normal approximation compare to poisson (n=10)", xlab= "xbar")

From the graphs, we can be observed that when the sample size becomes larger, the distribution of the sample means becomes more symmetric and approaches a normal distribution. This result is consistent with the Central Limit Theorem.

Unbiased Predictor Parameter

mean sample and variance are unbiased predictor for population sample and variance ## Unbiased predictor for mean

library(probs)
set.seed(4)
n              = 10
populasi1      = rnorm(20)
mean_pop1      = mean(populasi1)
sampel_normal1 = urnsamples(populasi1, size = 10, replace = F, ordered = F)
mean_normal1   = matrix(apply(sampel_normal1, 1, mean))
median_normal1 = matrix(apply(sampel_normal1, 1, median))
harapan_mean_norm1     = mean(mean_normal1)
harapan_median_norm1   = mean(median_normal1)

library(probs)
set.seed(5)
n              = 10
populasi2      = rexp(20)
mean_pop2      = mean(populasi2)
sampel_exp1    = urnsamples(populasi2, size = 10, replace = F, ordered = F)
mean_exp1      = matrix(apply(sampel_exp1, 1, mean))
median_exp1 = matrix(apply(sampel_exp1, 1, median))
harapan_mean_exp1     = mean(mean_exp1)
harapan_median_exp1   = mean(median_exp1)

library(probs)
set.seed(5)
n              = 10
populasi3      = runif(20)
mean_pop3      = mean(populasi3)
sampel_unif1   = urnsamples(populasi3, size = 10, replace = F, ordered = F)
mean_unif1     = matrix(apply(sampel_unif1, 1, mean))
median_unif1   = matrix(apply(sampel_unif1, 1, median))
harapan_mean_unif1     = mean(mean_unif1)
harapan_median_unif1   = mean(median_unif1)

hasil = data.frame("Hasil"=c("mean_populasi","harapan_mean_contoh","harapan_median_contoh"),"Sebaran Normal"=c(mean_pop1,harapan_mean_norm1,harapan_median_norm1),"Sebaran Eksponensial"=c(mean_pop2,harapan_mean_exp1,harapan_median_exp1),"Sebaran Seragam"=c(mean_pop3,harapan_mean_unif1,harapan_median_unif1))

hasil
##                   Hasil Sebaran.Normal Sebaran.Eksponensial Sebaran.Seragam
## 1         mean_populasi      0.3762963            0.9007143       0.5036803
## 2   harapan_mean_contoh      0.3762963            0.9007143       0.5036803
## 3 harapan_median_contoh      0.2391035            0.7146673       0.4802357

Unbiased predictor for variance

set.seed(2)
n        = 10
population = rgeom(20,0.5)
sigmageom   = var(population)*(20-1)/20

samplegeom  = urnsamples(population, size = 10, replace = F, ordered = F)

s2.n1       = matrix(apply(samplegeom, 1, var))
E.s2.n1     = mean(s2.n1)

s2.n        = s2.n1*(10-1)/10
E.s2.n      = mean(s2.n)

set.seed(3)
n             = 10
population2   = rnorm(20) 
sigmanorm     = var(population2)*(20-1)/20

library(probs)
samplenorm  = urnsamples(population2, size = 10, replace = F, ordered = F)

s2.n1.norm   = matrix(apply(samplenorm, 1, var))
E.s2.n1.norm = mean(s2.n1.norm)

s2.n.norm    = s2.n1.norm*(10-1)/10
E.s2.n.norm  = mean(s2.n.norm)

result = data.frame( "."  = c("ragam populasi","nilai harapan ragam contoh (n-1)","nilai harapan ragam contoh (n)"), 
                    "Sebaran Geometrik" = c(sigmageom, E.s2.n1, E.s2.n),"Sebaran Normal" = c(sigmanorm, E.s2.n1.norm, E.s2.n.norm))
result
##                                  . Sebaran.Geometrik Sebaran.Normal
## 1                   ragam populasi         0.9275000      0.5813930
## 2 nilai harapan ragam contoh (n-1)         0.9763158      0.6119927
## 3   nilai harapan ragam contoh (n)         0.8786842      0.5507934

Confidence Interval

Confidence interval is where we trust how much the interval contain the actual parameter value. For example if we do 95% confidence interval, then we trust that 95% of the interval contain the actual value of parameter, if we take 100 confidence interval then it would likely 95 of them contain parameter and 5 of them arent. The bigger the percentage, itll make the interval wider. Cause it will try to contain more of the parameter. While lower percentage may make the interval narrower cause itll only take a part of the parameter.

n1     = 10
k      = 100
alpha  = 0.05
mu     = 50
std    = 10
set.seed(123)
sampel.norm1 = matrix(rnorm(n1*k,mu,std),k)
xbar.norm1   = apply(sampel.norm1,1,mean)
s.norm1      = apply(sampel.norm1,1,sd)
SE.norm1     = s.norm1/sqrt(n1)
z.norm1      = qnorm(1-alpha/2)
SK.norm1     = (xbar.norm1-z.norm1*SE.norm1 < mu & mu < xbar.norm1+z.norm1*SE.norm1)
x.norm1      = sum(SK.norm1)/k

n2     = 30
k      = 100
alpha  = 0.05
mu     = 50
std    = 10
set.seed(123)
sampel.norm2 = matrix(rnorm(n2*k,mu,std),k)
xbar.norm2   = apply(sampel.norm2,1,mean)
s.norm2      = apply(sampel.norm2,1,sd)
SE.norm2     = s.norm2/sqrt(n2)
z.norm2      = qnorm(1-alpha/2)
SK.norm2     = (xbar.norm2-z.norm2*SE.norm2 < mu & mu < xbar.norm2+z.norm2*SE.norm2)
x.norm2      = sum(SK.norm2)/k

n3     = 100
k      = 100
alpha  = 0.05
mu     = 50
std    = 10
set.seed(123)
sampel.norm3 = matrix(rnorm(n3*k,mu,std),k)
xbar.norm3   = apply(sampel.norm3,1,mean)
s.norm3      = apply(sampel.norm3,1,sd)
SE.norm3     = s.norm3/sqrt(n3)
z.norm3      = qnorm(1-alpha/2)
SK.norm3     = (xbar.norm3-z.norm3*SE.norm3 < mu & mu < xbar.norm3+z.norm3*SE.norm3)
x.norm3      = sum(SK.norm3)/k
matplot(rbind (xbar.norm2-z.norm2*SE.norm2, xbar.norm2+z.norm2*SE.norm2), rbind(1:k,1:k), col=ifelse(SK.norm2,"blue","red"), type = "l", lty = 1,main='Selang Kepercayaan 95% (n=100)', xlab='SK', ylab='banyak ulangan')
abline(v=mu)