Code/Syntax : File.rmd
Kelompok 4 Pararel 2
| Nama | NIM |
|---|---|
| Angga Fathan Rofiqy | G1401211006 |
| Gladys Adya Zafira | G1401211014 |
| Kheni Hikmah Lestari | G1401211029 |
Bagian A
💡Pada bagian A, populasi dianggap tidak terbatas. Sejalan dengan fungsi pembangkit bilaganan acak dalam R merupakan sampling dari sebaran asli. Sehingga ketika kita melakukan pembangkitan bilangan acak, itu bisa langsung dianggap sampling jika langsung menggunakan fungsi pembangkit bilangan acak seperti
rexp. Ini tidak berlaku jika kita mencampur fungsi nya, seperti pada kasus bagian B.
Poin 1
Bangkitkan sebaran Normal, Seragam, dan Eksponensial dengan ukuran contoh 2, 5, 25
Berikut merupakan hasil pembangkitan masing-masing serbaran dengan ukuran contoh 2, 5, 25:
Normal
set.seed(361)
k <- 1000 # ulangan (gak ada ketentuan berapa nya, alias bebas berapa aja)
sample_size <- c(2, 5, 25)
norm_samples <- lapply(sample_size, function(n) matrix(rnorm(n*k),k))
par(mfrow = c(1, 3))
hist(norm_samples[[1]], main = "Sebaran Normal\nUkuran sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#d9eacb")
hist(norm_samples[[2]], main = "Sebaran Normal\nUkuran sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#b4e69e")
hist(norm_samples[[3]], main = "Sebaran Normal\nUkuran sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#8DC16D")
Sebaran Normal dengan ukuran contoh (n) 2, 5, 25
Seragam
set.seed(361)
k <- 1000 # ulangan (gak ada ketentuan berapa nya, alias bebas berapa aja)
sample_size <- c(2, 5, 25)
unif_samples <- lapply(sample_size, function(n) matrix(runif(n*k),k))
par(mfrow = c(1, 3))
hist(unif_samples[[1]], main = "Sebaran Seragam\nUkuran sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#c8daf0")
hist(unif_samples[[2]], main = "Sebaran Seragam\nUkuran sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#a2c6eb")
hist(unif_samples[[3]], main = "Sebaran Seragam\nUkuran sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#4493CA")
Sebaran Seragam dengan ukuran contoh (n) 2, 5, 25
Eksponensial
set.seed(361)
k <- 1000 # ulangan (gak ada ketentuan berapa nya, alias bebas berapa aja)
sample_size <- c(2, 5, 25)
exp_samples <- lapply(sample_size, function(n) matrix(rexp(n*k),k))
par(mfrow = c(1, 3))
hist(exp_samples[[1]], main = "Sebaran Eksponensial\nUkuran sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#E4B3BA")
hist(exp_samples[[2]], main = "Sebaran Eksponensial\nUkuran sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#DE7C8A")
hist(exp_samples[[3]], main = "Sebaran Eksponensial\nUkuran sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#D44155")
Sebaran Eksponensial dengan ukuran contoh (n) 2, 5, 25
Poin 2
Buat histogram dari rataan contohnya
Histogram rata-rata contoh masing-masing sebaran:
Normal
#fungsi penghitung rata-rata sebaran per ukuran sample
dist_mean <- function(n, dist_samples){
dist_means <- list()
for (i in seq_along(n))
dist_means[[i]] <- apply(dist_samples[[i]], 1, mean)
return(dist_means)
}
norm_means <- dist_mean(sample_size, norm_samples)
par(mfrow = c(1, 3))
hist(norm_means[[1]], main = "Sebaran Normal\nRata-rata sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#d9eacb")
hist(norm_means[[2]], main = "Sebaran Normal\nRata-rata sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#b4e69e")
hist(norm_means[[3]], main = "Sebaran Normal\nRata-rata sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#8DC16D")
Seragam
unif_means <- dist_mean(sample_size, unif_samples)
par(mfrow = c(1, 3))
hist(unif_means[[1]], main = "Sebaran Seragam\nRata-rata sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#c8daf0")
hist(unif_means[[2]], main = "Sebaran Seragam\nRata-rata sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#a2c6eb")
hist(unif_means[[3]], main = "Sebaran Seragam\nRata-rata sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#4493CA")
Eksponensial
exp_means <- dist_mean(sample_size, exp_samples)
par(mfrow = c(1, 3))
hist(exp_means[[1]], main = "Sebaran Eksponensial\nRata-rata sample (n=2)",
xlab = "Nilai", ylab = "Frekuensi", col = "#E4B3BA")
hist(exp_means[[2]], main = "Sebaran Eksponensial\nRata-rata sample (n=5)",
xlab = "Nilai", ylab = "Frekuensi", col = "#DE7C8A")
hist(exp_means[[3]], main = "Sebaran Eksponensial\nRata-rata sample (n=25)",
xlab = "Nilai", ylab = "Frekuensi", col = "#D44155")
Poin 3
Buat normal qq-plot dari masing-masing n
Normal
par(mfrow = c(1, 3))
qqnorm(norm_means[[1]], main = "QQ-Plot Normal\nUkuran Sample (n=2)", col = "#d9eacb", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(norm_means[[1]], col = "black", lwd=2)
qqnorm(norm_means[[2]], main = "QQ-Plot Normal\nUkuran Sample (n=5)", col = "#b4e69e", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(norm_means[[2]], col = "black", lwd=2)
qqnorm(norm_means[[3]], main = "QQ-Plot Normal\nUkuran Sample (n=25)", col = "#8DC16D", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(norm_means[[3]], col = "black", lwd=2)
Seragam
par(mfrow = c(1, 3))
qqnorm(unif_means[[1]], main = "QQ-Plot Seragam\nUkuran Sample (n=2)", col = "#c8daf0", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(unif_means[[1]], col = "black", lwd=2)
qqnorm(unif_means[[2]], main = "QQ-Plot Seragam\nUkuran Sample (n=5)", col = "#a2c6eb", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(unif_means[[2]], col = "black", lwd=2)
qqnorm(unif_means[[3]], main = "QQ-Plot Seragam\nUkuran Sample (n=25)", col = "#4493CA", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(unif_means[[3]], col = "black", lwd=2)
Eksponensial
par(mfrow = c(1, 3))
qqnorm(exp_means[[1]], main = "QQ-Plot Eksponensial\nUkuran Sample (n=2)", col = "#E4B3BA", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(exp_means[[1]], col = "black", lwd=2)
qqnorm(exp_means[[2]], main = "QQ-Plot Eksponensial\nUkuran Sample (n=5)", col = "#DE7C8A", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(exp_means[[2]], col = "black", lwd=2)
qqnorm(exp_means[[3]], main = "QQ-Plot Eksponensial\nUkuran Sample (n=25)", col = "#D44155", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(exp_means[[3]], col = "black", lwd=2)
Hasil Keseluruhan
Menggunakan library ggplot2 & cowplot
untuk menggabungkan nya.
sample_size <- c(2, 5, 25)
plot_distribution <- function(data_awal, data_rataan, title, color) {
df_awal <- data.frame(x = c(data_awal))
df_rataan <- data.frame(x = data_rataan)
# Histogram sebaran awal
p1 <- ggplot(data = df_awal, aes(x = x)) +
geom_histogram(bins = 20, fill = color, color="black",alpha = 1, aes(y = ..density..)) +
geom_density(fill = color, adjust = 3, size = 1, alpha=.5) +
labs(title = paste0(title, "\nSebaran Awal")) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
# Histogram rataan sampel
p2 <- ggplot(data = df_rataan, aes(x = x)) +
geom_histogram(bins = 20, fill = color, color="black",alpha = 1, aes(y = ..density..)) +
geom_density(fill = color, adjust = 3, size = 1, alpha=.5) +
labs(title = paste0(title, "\nRata-rata Sampel")) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
# QQ plot
p3 <- ggplot(data = df_rataan, aes(sample = x)) +
stat_qq(color = color, size = 5) +
stat_qq_line(color = "black", linewidth = 1.5) +
labs(title = paste0(title, "\nQQ-Plot")) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
return(list(p1, p2, p3))
}
all_plots <- list()
for (size in sample_size) {
plots_for_size <- list()
colors <- c("#d9eacb", "#c8daf0", "#E4B3BA")
if (size == 5) colors <- c("#b4e69e", "#a2c6eb", "#DE7C8A")
if (size == 25) colors <- c("#8DC16D", "#4493CA", "#D44155")
plots <- plot_distribution(norm_samples[[which(sample_size == size)]], # Data Awal
norm_means[[which(sample_size == size)]], # Data Rataan
paste0("Distribusi Normal (n=", size, ")"), colors[1]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
plots <- plot_distribution(unif_samples[[which(sample_size == size)]], # Data Awal
unif_means[[which(sample_size == size)]], # Data Rataan
paste0("Distribusi Seragam (n=", size, ")"), colors[2]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
plots <- plot_distribution(exp_samples[[which(sample_size == size)]], # Data Awal
exp_means[[which(sample_size == size)]], # Data Rataan
paste0("Distribusi Eksponensial (n=", size, ")"), colors[3]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
all_plots <- c(all_plots, plots_for_size)
}
plot_grid(plotlist = all_plots, ncol = 9)
Bagian B
Poin 1 & 2
Bangkitkan dua gugus data
Data dari sebaran Normal
Data campuran:
50% dari sebaran normal + 50% dari sebaran chi-square
50% sebaran chi-square dengan paramenter a + 50% sebaran chi-square dengan paramenter b
25% sebaran chi-square dengan parameter a + 25% sebaran chi-suqare dengan parameter b + 25% sebaran normal dengan paramater a + 25% sebaran normal parameter b
Normal
set.seed(361)
k <- 1000; n <- 1
norm_data <- matrix(rnorm(n*k, mean = 0, sd = 1), k)
hist(norm_data, main = "Sebaran Populasi Campuran 1",
xlab = "Nilai", ylab = "Frekuensi", col = "#357a87")
Campuran 1
50% dari sebaran normal + 50% dari sebaran chi-square
set.seed(361)
k <- 1000; n <- 1
mix_data_1 <- rbind(matrix(rnorm(n*k/2, mean = 0, sd = 1), k/2),
matrix(rchisq(n *k /2, df =5), k /2))
hist(mix_data_1, main = "Sebaran Populasi Campuran 1",
xlab = "Nilai", ylab = "Frekuensi", col = "#792b9a")
Campuran 2
50% sebaran chi-square dengan paramenter a + 50% sebaran chi-square dengan paramenter b
set.seed(361)
k <- 1000; n <- 1
mix_data_2 <- rbind(matrix(rchisq(n*k/2, df = 3), k/2),
matrix(rchisq(n*k/2, df = 8), k/2))
hist(mix_data_2, main = "Sebaran Populasi Campuran 2",
xlab = "Nilai", ylab = "Frekuensi", col = "#96336c")
Campuran 3
25% sebaran chi-square dengan parameter a + 25% sebaran chi-suqare dengan parameter b + 25% sebaran normal dengan paramater a + 25% sebaran normal parameter b
set.seed(361)
k <- 1000; n <- 1
mix_data_3 <- rbind(matrix(rchisq(n*k/4, df = 3), k/4),
matrix(rchisq(n*k/4, df = 8), k/4),
matrix(rnorm(n*k/4, mean = 0, sd = 1), k/4),
matrix(rnorm(n*k/4, mean = 2, sd = 1.5), k/4) )
hist(mix_data_3, main = "Sebaran Populasi Campuran 3",
xlab = "Nilai", ylab = "Frekuensi", col = "#9a3b1a")
Poin 3 & 4
- Ambil sample dengan ukuran n = 4, 12, 20, 60, 100
- Buat histogram (Rata-rata contoh) dan normal qq-plot
Histogram rata-rata contoh masing-masing sebaran:
Normal
set.seed(361)
k <- 1000
sample_sizes <- c(4, 12, 20, 60, 100)
mean_dist <- function(dist, n){
means <- rep(NA, k)
for (i in c(1:k))
means[i] <- mean(sample(dist, size = n, replace = FALSE))
return(means)
}
normal_data <- lapply(sample_sizes, function(n) mean_dist(norm_data, n))
par(mfrow = c(1, 5))
hist(normal_data[[1]], main = "Sebaran Normal\nUkuran sample (n=4)",
xlab = "Nilai", ylab = "Frekuensi", col = "#e3f4f1")
hist(normal_data[[2]], main = "Sebaran Normal\nUkuran sample (n=12)",
xlab = "Nilai", ylab = "Frekuensi", col = "#a9ded7")
hist(normal_data[[3]], main = "Sebaran Normal\nUkuran sample (n=20)",
xlab = "Nilai", ylab = "Frekuensi", col = "#6ec1c1")
hist(normal_data[[4]], main = "Sebaran Normal\nUkuran sample (n=60)",
xlab = "Nilai", ylab = "Frekuensi", col = "#357a87")
hist(normal_data[[5]], main = "Sebaran Normal\nUkuran sample (n=100)",
xlab = "Nilai", ylab = "Frekuensi", col = "#0f2c3a")
Campuran 1
set.seed(361)
k <- 1000
mixed_data1 <- lapply(sample_sizes, function(n) mean_dist(mix_data_1, n))
par(mfrow = c(1, 5))
hist(mixed_data1[[1]], main = "Sebaran Campuran 1\nUkuran sample (n=4)",
xlab = "Nilai", ylab = "Frekuensi", col = "#eae1f8")
hist(mixed_data1[[2]], main = "Sebaran Campuran 1\nUkuran sample (n=12)",
xlab = "Nilai", ylab = "Frekuensi", col = "#d5bbf3")
hist(mixed_data1[[3]], main = "Sebaran Campuran 1\nUkuran sample (n=20)",
xlab = "Nilai", ylab = "Frekuensi", col = "#9d48c7")
hist(mixed_data1[[4]], main = "Sebaran Campuran 1\nUkuran sample (n=60)",
xlab = "Nilai", ylab = "Frekuensi", col = "#792b9a")
hist(mixed_data1[[5]], main = "Sebaran Campuran 1\nUkuran sample (n=100)",
xlab = "Nilai", ylab = "Frekuensi", col = "#48185b")
Campuran 2
set.seed(361)
mixed_data2 <- lapply(sample_sizes, function(n) mean_dist(mix_data_2, n))
par(mfrow = c(1, 5))
hist(mixed_data2[[1]], main = "Sebaran Campuran 2\nUkuran sample (n=4)",
xlab = "Nilai", ylab = "Frekuensi", col = "#fae7fd")
hist(mixed_data2[[2]], main = "Sebaran Campuran 2\nUkuran sample (n=12)",
xlab = "Nilai", ylab = "Frekuensi", col = "#eb91f2")
hist(mixed_data2[[3]], main = "Sebaran Campuran 2\nUkuran sample (n=20)",
xlab = "Nilai", ylab = "Frekuensi", col = "#e44cb5")
hist(mixed_data2[[4]], main = "Sebaran Campuran 2\nUkuran sample (n=60)",
xlab = "Nilai", ylab = "Frekuensi", col = "#96336c")
hist(mixed_data2[[5]], main = "Sebaran Campuran 2\nUkuran sample (n=100)",
xlab = "Nilai", ylab = "Frekuensi", col = "#61283f")
Campuran 3
set.seed(361)
mixed_data3 <- lapply(sample_sizes, function(n) mean_dist(mix_data_3, n))
par(mfrow = c(1, 5))
hist(mixed_data3[[1]], main = "Sebaran Campuran 3\nUkuran sample (n=4)",
xlab = "Nilai", ylab = "Frekuensi", col = "#f5d0bf")
hist(mixed_data3[[2]], main = "Sebaran Campuran 3\nUkuran sample (n=12)",
xlab = "Nilai", ylab = "Frekuensi", col = "#e9865b")
hist(mixed_data3[[3]], main = "Sebaran Campuran 3\nUkuran sample (n=20)",
xlab = "Nilai", ylab = "Frekuensi", col = "#ce572b")
hist(mixed_data3[[4]], main = "Sebaran Campuran 3\nUkuran sample (n=60)",
xlab = "Nilai", ylab = "Frekuensi", col = "#9a3b1a")
hist(mixed_data3[[5]], main = "Sebaran Campuran 3\nUkuran sample (n=100)",
xlab = "Nilai", ylab = "Frekuensi", col = "#59230c")
QQ-Plot rata-rata contoh masing-masing sebaran:
Normal
par(mfrow = c(1, 5))
qqnorm(normal_data[[1]], main = "QQ-Plot Normal\nUkuran Sample (n=4)", col = "#e3f4f1", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(normal_data[[1]], col = "black", lwd=2)
qqnorm(normal_data[[2]], main = "QQ-Plot Normal\nUkuran Sample (n=12)", col = "#a9ded7", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(normal_data[[2]], col = "black", lwd=2)
qqnorm(normal_data[[3]], main = "QQ-Plot Normal\nUkuran Sample (n=20)", col = "#6ec1c1", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(normal_data[[3]], col = "black", lwd=2)
qqnorm(normal_data[[3]], main = "QQ-Plot Normal\nUkuran Sample (n=60)", col = "#357a87", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(normal_data[[3]], col = "black", lwd=2)
qqnorm(normal_data[[3]], main = "QQ-Plot Normal\nUkuran Sample (n=100)", col = "#0f2c3a", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(normal_data[[3]], col = "black", lwd=2)
Campuran 1
par(mfrow = c(1, 5))
qqnorm(mixed_data1[[1]], main = "QQ-Plot Campuran 1\nUkuran Sample (n=4)", col = "#eae1f8", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data1[[1]], col = "black", lwd=2)
qqnorm(mixed_data1[[2]], main = "QQ-Plot Campuran 1\nUkuran Sample (n=12)", col = "#d5bbf3", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data1[[2]], col = "black", lwd=2)
qqnorm(mixed_data1[[3]], main = "QQ-Plot Campuran 1\nUkuran Sample (n=20)", col = "#9d48c7", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data1[[3]], col = "black", lwd=2)
qqnorm(mixed_data1[[3]], main = "QQ-Plot Campuran 1\nUkuran Sample (n=60)", col = "#792b9a", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data1[[3]], col = "black", lwd=2)
qqnorm(mixed_data1[[3]], main = "QQ-Plot Campuran 1\nUkuran Sample (n=100)", col = "#48185b", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data1[[3]], col = "black", lwd=2)
Campuran 2
par(mfrow = c(1, 5))
qqnorm(mixed_data2[[1]], main = "QQ-Plot Campuran 2\nUkuran Sample (n=4)", col = "#fae7fd", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data2[[1]], col = "black", lwd=2)
qqnorm(mixed_data2[[2]], main = "QQ-Plot Campuran 2\nUkuran Sample (n=12)", col = "#eb91f2", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data2[[2]], col = "black", lwd=2)
qqnorm(mixed_data2[[3]], main = "QQ-Plot Campuran 2\nUkuran Sample (n=20)", col = "#e44cb5", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data2[[3]], col = "black", lwd=2)
qqnorm(mixed_data2[[3]], main = "QQ-Plot Campuran 2\nUkuran Sample (n=60)", col = "#96336c", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data2[[3]], col = "black", lwd=2)
qqnorm(mixed_data2[[3]], main = "QQ-Plot Campuran 2\nUkuran Sample (n=100)", col = "#61283f", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data2[[3]], col = "black", lwd=2)
Campuran 3
par(mfrow = c(1, 5))
qqnorm(mixed_data3[[1]], main = "QQ-Plot Campuran 3\nUkuran Sample (n=4)", col = "#f5d0bf", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data3[[1]], col = "black", lwd=2)
qqnorm(mixed_data3[[2]], main = "QQ-Plot Campuran 3\nUkuran Sample (n=12)", col = "#e9865b", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data3[[2]], col = "black", lwd=2)
qqnorm(mixed_data3[[3]], main = "QQ-Plot Campuran 3\nUkuran Sample (n=20)", col = "#ce572b", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data3[[3]], col = "black", lwd=2)
qqnorm(mixed_data3[[3]], main = "QQ-Plot Campuran 3\nUkuran Sample (n=60)", col = "#9a3b1a", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data3[[3]], col = "black", lwd=2)
qqnorm(mixed_data3[[3]], main = "QQ-Plot Campuran 3\nUkuran Sample (n=100)", col = "#59230c", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(mixed_data3[[3]], col = "black", lwd=2)
Poin 5
- Pada n berapa sebaran rataan dari masing-masing data mulai simeteris atau mendekati sebaran normal?
Menggunakan library ggplot2 & cowplot
untuk menggabungkan nya.
# Plot atas
histo <- function(dist, title, color){
df <- data.frame(x = dist)
# Histogram rataan sampel
p1 <- ggplot(data = df, aes(x = x)) +
geom_histogram(bins = 20, fill = color, color="black",alpha = 1, aes(y = ..density..)) +
geom_density(fill = color, adjust = 3, size = 1, alpha=.5) +
labs(title = paste0("Sebaran Populasi ", title)) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
return(list(p1))
}
atas <- c(
histo(norm_data, paste0("Normal"), color="#357a87"),
histo(mix_data_1, paste0("Campuran 1"), color="#792b9a"),
histo(mix_data_2, paste0("Campuran 2"), color="#96336c"),
histo(mix_data_3, paste0("Campuran 3"), color="#9a3b1a")
)
plot_grid_top <- plot_grid(plotlist = atas, ncol = 4)
# Plot bawah
plot_distribution2 <- function(data_rataan, title, color) {
df_rataan <- data.frame(x = data_rataan)
# Histogram rataan sampel
p2 <- ggplot(data = df_rataan, aes(x = x)) +
geom_histogram(bins = 20, fill = color, color="black",alpha = 1, aes(y = ..density..)) +
geom_density(fill = color, adjust = 3, size = 1, alpha=.5) +
labs(title = paste0(title, "\nRata-rata Sampel")) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
# QQ plot
p3 <- ggplot(data = df_rataan, aes(sample = x)) +
stat_qq(color = color, size = 3.5) +
stat_qq_line(color = "black", linewidth = 1.5) +
labs(title = paste0(title, "\nQQ-Plot")) +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold", hjust = 0.5)
)
return(list(p2, p3))
}
all_plots <- list()
for (size in sample_sizes) {
plots_for_size <- list()
colors <- c("#e3f4f1", "#eae1f8", "#fae7fd", "#f5d0bf")
if (size == 12) colors <- c("#a9ded7", "#d5bbf3", "#eb91f2", "#e9865b")
if (size == 20) colors <- c("#6ec1c1", "#9d48c7", "#e44cb5", "#ce572b")
if (size == 60) colors <- c("#357a87", "#792b9a", "#96336c", "#9a3b1a")
if (size == 100) colors <-c("#0f2c3a", "#48185b", "#61283f", "#59230c")
plots <- plot_distribution2(normal_data[[which(sample_sizes == size)]], # Data Rataan
paste0("Distribusi Normal (n=", size, ")"), colors[1]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
plots <- plot_distribution2(mixed_data1[[which(sample_sizes == size)]], # Data Rataan
paste0("Distribusi Campuran 1 (n=", size, ")"), colors[2]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
plots <- plot_distribution2(mixed_data2[[which(sample_sizes == size)]], # Data Rataan
paste0("Distribusi Campuran 2 (n=", size, ")"), colors[3]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
plots <- plot_distribution2(mixed_data3[[which(sample_sizes == size)]], # Data Rataan
paste0("Distribusi Campuran 3 (n=", size, ")"), colors[4]) # Title & Colors
plots_for_size <- c(plots_for_size, plots)
all_plots <- c(all_plots, plots_for_size)
}
plot_grid_bottom <- plot_grid(plotlist = all_plots, ncol = 8)
# Gabung
plot_grid(plot_grid_top, plot_grid_bottom, ncol=1, rel_heights = c(0.2, 1))
Dalam analisis ini, tiga jenis data campuran dieksplorasi dengan variasi ukuran sampel (n = 4, 12, 20, 60, 100), menghasilkan total 15 histogram dan 15 QQ plot. Melalui observasi visual ini, ditemukan bahwa titik di mana distribusi mulai menyerupai distribusi normal adalah ketika ukuran sampel melebihi 20. Temuan ini secara substansial mendukung teori statistik yang dikenal sebagai teorema limit pusat. Teorema ini menyatakan bahwa, “dengan pertambahan ukuran sampel, distribusi rata-rata dari sampel yang diambil dari populasi, terlepas dari distribusi aslinya, akan cenderung mendekati distribusi normal”. Dalam literatur statistik, penekanan sering diberikan pada ukuran sampel yang besar, umumnya lebih dari 20, sebagai titik di mana pendekatan ke distribusi normal menjadi semakin akurat.