DISTRIBUSI UNIFORM

#==========================#
#   DISTRIBUSI UNIFORM     #
#==========================#

# Ketinggian bibit sayur organic
set.seed(029)
n <- 400
data_bibit <- runif(n, min = 15, max = 45)

#1. Densitas pada x = 28
(densitas_28 <- dunif(15, min = 15, max = 45))
## [1] 0.03333333
#2. CDF pada q = 28
(cdf_28 <- punif(28, min = 15, max = 45))
## [1] 0.4333333
#3. Probabilitas antara 20 & 35 cm
(prob_20_35 <- punif(35, min = 15, max = 45) -
    punif(20, min = 15, max = 45))
## [1] 0.5
#4. Statistik deskriptif 
(mean_bibit <- mean(data_bibit)) # mean
## [1] 29.66902
(median_bibit <- median(data_bibit)) # median
## [1] 29.55684
(sd_bibit <- sd(data_bibit)) # standar deviasi
## [1] 8.858038
(skewness_bibit <- skewness(data_bibit)) # skewness
## [1] 0.02618955
(kurtosis_bibit <- kurtosis(data_bibit)) # kurtosis
## [1] -1.218696
#5. Plot density & histogram (0verlay theoretical uniform df)
a <- min(data_bibit)
b <- max(data_bibit)

x_vals <- seq(a, b, length.out = 200)
y_vals <- dunif(x_vals, min = a, max = b)
kurva_uniform <- data.frame(x = x_vals, y = y_vals)

# density
ggplot(data.frame(x = data_bibit), aes(x)) +
  geom_density(fill = "maroon", alpha = 0.5) +
  geom_line(data = kurva_uniform, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Ketinggian Bibit Sayur Organic",
       x = "Tinggi Bibit (cm)", y = "Densitas")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# histogram
ggplot(data.frame(x = data_bibit), aes(x)) +
  geom_histogram(aes(y = ..density..), bins = 30,
                 fill = "brown", alpha = 0.5, color = "black") +
  geom_line(data = kurva_uniform, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Ketinggian Bibit Sayur Organic",
       x = "Tinggi Bibit (cm)", y = "Frekuensi") 
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#6. Uji Kesesuaian (KS-Test)
(ks_test_bibit <- ks.test(data_bibit, "punif", min = 15, max = 45))
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data_bibit
## D = 0.048829, p-value = 0.296
## alternative hypothesis: two-sided

DISTRIBUSI EXPONENTIAL

#===========================#
#   DISTRIBUSI EXPONENTIAL  #
#===========================#

# Waktu Respon Server (detik)
set.seed(029)
n <- 600
rate <- 1 / 2 # rata-rata 2 detik
data_respon <- rexp(n, rate = rate)

#1. Densitas pada x = 1.5
(densitas_1.5 <- dexp(1.5, rate = rate))
## [1] 0.2361833
#2. CDF pada q = 1.5
(cdf_1.5 <- pexp(1.5, rate = rate))
## [1] 0.5276334
#3.Kuantil pada p = 0.8
(quantile_0.8 <- qexp(0.8, rate = rate))
## [1] 3.218876
#4. Probabilitas antara 0.5 dan 3 detik
(prob_0.5_3 <- pexp(3, rate = rate) - pexp(0.5, rate = rate))
## [1] 0.5556706
#5. Statistik deskriptif
(mean_respon <- mean(data_respon)) # mean
## [1] 2.196955
(median_respon <- median(data_respon)) # median
## [1] 1.526028
(sd_respon <- sd(data_respon)) # standar deviasi
## [1] 2.232543
(skewness_respon <- skewness(data_respon)) # skewness
## [1] 1.691988
(kurtosis_respon <- kurtosis(data_respon)) # kurtosis
## [1] 2.91655
(min_respon <- min(data_respon)) # minimal
## [1] 0.004317578
(max_respon <- max(data_respon)) # maksimal
## [1] 11.41836
#6. Plot density & histogram (bandingkan histogram dengan teoritis)
x_vals <- seq(min(data_respon), max(data_respon), length.out = 200)
y_vals <- dexp(x_vals, rate = rate)
kurva_exp <- data.frame(x = x_vals, y = y_vals)

# density
ggplot(data.frame(x = data_respon), aes(x)) +
  geom_density(fill = "navy", alpha = 0.5) +
  geom_line(data = kurva_exp, aes(x = x, y = y), color = "red", size = 1) +
  labs(title = "Densitas Waktu Respon Server",
       x = "Waktu (detik)", y = "Densitas")

# histogram
ggplot(data.frame(x = data_respon), aes(x)) +
  geom_histogram(aes(y = ..density..), bins = 30,
                 fill = "blue", alpha = 0.5, color = "black") +
  geom_line(data = kurva_exp, aes(x = x, y = y), color = "red", size = 1) +
  labs(title = "Histogram Waktu Respon Server",
       x = "Waktu (detik)", y = "Frekuensi") 

#7. Uji kesesuaian
observed_respon <- table(cut(data_respon, 
                             breaks = seq(0, max(data_respon), by = 1)))
expected_respon <- n * (pexp(seq(1, max(data_respon), by = 1), rate = rate) - 
                           pexp(seq(0, max(data_respon) - 1, by = 1), rate = rate)) 

(chisq_test_respon <- chisq.test(observed_respon, p = expected_respon / sum(expected_respon)))
## Warning in chisq.test(observed_respon, p =
## expected_respon/sum(expected_respon)): Chi-squared approximation may be
## incorrect
## 
##  Chi-squared test for given probabilities
## 
## data:  observed_respon
## X-squared = 19.639, df = 10, p-value = 0.03286
(ks_test_respon <- ks.test(data_respon, "pexp", rate = rate))
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data_respon
## D = 0.049034, p-value = 0.1117
## alternative hypothesis: two-sided

DISTRIBUSI GAMMA

#===========================#
#   DISTRIBUSI GAMMA        #
#===========================#

# Jumlah Kesalahan Pada Batch Produksi
set.seed(029)
n <- 800
shape <- 3.5
scale <- 2
data_batch <- rgamma(n, shape = shape, scale = scale)

#1. Densitas pada x = 6
(densitas_6 <- dgamma(6, shape = shape, scale = scale))
## [1] 0.1167652
#2. CDF pada q = 6
(cdf_6 <- pgamma(6, shape = shape, scale = scale))
## [1] 0.4602506
#3. Kuantil pada p = 0.975
(quantile_0.975 <- qgamma(0.975, shape = shape, scale = scale))
## [1] 16.01276
#4. Probabilitas antara 2 dan 10 kesalahan
(prob_2_10 <- pgamma(10, shape = shape, scale = scale) -
  pgamma(2, shape = shape, scale = scale))
## [1] 0.7712669
#5 Statistik deskriptif
(mean_batch <- mean(data_batch)) # mean
## [1] 7.104359
(median_batch <- median(data_batch)) # median
## [1] 6.35528
(sd_batch <- sd(data_batch)) # standar deviasi
## [1] 3.916589
(skewness_batch <- skewness(data_batch)) # skewness
## [1] 0.9058297
(kurtosis_batch <- kurtosis(data_batch)) # kurtosis
## [1] 0.7244169
(min_batch <- min(data_batch)) # minimal
## [1] 0.4589859
(max_batch <- max(data_batch)) # maksimal
## [1] 22.48951
#6. Plot density & histogram + overlay teoritis gamma
x_vals <- seq(min(data_batch), max(data_batch), length.out = 200)
y_vals <- dgamma(x_vals, shape = shape, rate = rate)
kurva_gamma <- data.frame(x = x_vals, y = y_vals)

# density
ggplot(data.frame(x = data_batch), aes(x)) +
  geom_density(fill = "orange", alpha = 0.5) +
  geom_line(data = kurva_gamma, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Jumlah Kesalahan pada Batch Produksi",
       x = "Jumlah", y = "Densitas")

# histogram
ggplot(data.frame(x = data_batch), aes(x)) +
  geom_histogram(aes(y = ..density..), bins = 30,
                 fill = "yellow", alpha = 0.5, color = "black") +
  geom_line(data = kurva_gamma, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Jumlah Kesalahan pada Batch Produksi",
       x = "Jumlah", y = "Frekuensi") 

#7. Uji Kesesuaian
observed_batch <- table(cut(data_batch, breaks = seq(0, max(data_batch), by = 2))) 
expected_batch <- n * (pgamma(seq(2, max(data_batch), by = 2), shape = shape, scale = scale) -
                   pgamma(seq(0, max(data_batch) - 2, by = 2), shape = shape, scale = scale)) 

(chisq_test_batch <- chisq.test(observed_batch, p = expected_batch / sum(expected_batch)))
## Warning in chisq.test(observed_batch, p = expected_batch/sum(expected_batch)):
## Chi-squared approximation may be incorrect
## 
##  Chi-squared test for given probabilities
## 
## data:  observed_batch
## X-squared = 9.5355, df = 10, p-value = 0.4821

DISTRIBUSI CHI-SQUARED

#===========================#
#      CHI-SQUARED          #
#===========================#

# Statistik X^2 dari Simulasi (df berbeda)
set.seed(029)
n <- 500
df <- 4
data_chi <- rchisq(n, df = df)

#1. Densitas pada x = 7
(densitas_7 <- dchisq(7, df))
## [1] 0.05284542
#2. CDF pada q = 7
(cdf_7 <- pchisq(7, df))
## [1] 0.8641118
#3. Kuantil pada p = 0.90
(quantile_0.90 <- qchisq(0.90, df))
## [1] 7.77944
#4. Probabilitas antara 1.5 dan 6
(prob_1.5_6 <- pchisq(6, df) - pchisq(1.5, df))
## [1] 0.6274932
#5. Statistik deskriptif
(mean_chi <- mean(data_chi)) # mean
## [1] 4.253899
(median_chi <- median(data_chi)) # median
## [1] 3.724902
(sd_chi <- sd(data_chi)) # standar deviasi
## [1] 2.893705
(skewness_chi <- skewness(data_chi)) # skewness
## [1] 1.160393
(kurtosis_chi <- kurtosis(data_chi)) # kurtosis
## [1] 1.621538
(min_chi <- min(data_chi)) # minimal
## [1] 0.02504956
(max_chi <- max(data_chi)) # maksimal
## [1] 17.35485
#6. Plot density & histogram (bandingkan dengan X^2 (df = 4))
x_vals <- seq(min(data_chi), max(data_chi), length.out = 200)
y_vals <- dchisq(x_vals, df = df)
kurva_chi <- data.frame(x = x_vals, y = y_vals)

# density
ggplot(data.frame(x = data_chi), aes(x)) +
  geom_density(fill = "purple", alpha = 0.5) +
  geom_line(data = kurva_chi, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Statistik X^2 dari Simulasi",
       x = "df", y = "Densitas")

# histogram
ggplot(data.frame(x = data_chi), aes(x)) +
  geom_histogram(aes(y = ..density..), bins = 30, 
                 fill = "pink", alpha = 0.5, color = "black") +
  geom_line(data = kurva_chi, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Statistik X^2 dari Simulasi",
       x = "df", y = "Frekuensi") 

#7. Uji Kesesuaian X^2
observed_chi <- table(cut(data_chi, breaks = seq(0, max(data_chi), by = 2))) 
expected_chi <- n * (pchisq(seq(2, max(data_chi), by = 2), df) - 
                       pchisq(seq(0, max(data_chi) - 2, by = 2), df)) 
(chisq_test_chi <- chisq.test(observed_chi, p = expected_chi / sum(expected_chi)))
## Warning in chisq.test(observed_chi, p = expected_chi/sum(expected_chi)):
## Chi-squared approximation may be incorrect
## 
##  Chi-squared test for given probabilities
## 
## data:  observed_chi
## X-squared = 9.01, df = 7, p-value = 0.2519

DISTRIBUSI NORMAL

#===========================#
#         NORMAL            #
#===========================#

# Waktu Belajar Mahasiswa per Minggu (jam)
set.seed(029)
n <- 1200
mu <- 12
sigma <- 3
data_belajar <- rnorm(n, mean = mu, sd = sigma)

#1. Densitas pada x = 10
(densitas_10 <- dnorm(10, mu, sigma))
## [1] 0.1064827
#2. CDF pada q = 10
(cdf_10 <- pnorm(10, mu, sigma))
## [1] 0.2524925
#3.Kuantil pada p = 0.975
(quantile_0.975 <- qnorm(0.975, mu, sigma))
## [1] 17.87989
#4. Probabilitas antara 8 dan 15 jam
(prob_8_15 <- pnorm(15, mu, sigma) - pnorm(8, mu, sigma))
## [1] 0.7501335
#5.Statistik deskriptif + uji normalitas 
(mean_belajar <- mean(data_belajar)) # mean
## [1] 11.86746
(median_belajar <- median(data_belajar)) # median
## [1] 11.83171
(sd_belajar <- sd(data_belajar)) # standar deviasi
## [1] 3.198594
(skewness_belajar <- skewness(data_belajar)) # skewness
## [1] -0.0348522
(kurtosis_belajar <- kurtosis(data_belajar)) # kurtosis
## [1] -0.1669535
(min_belajar <- min(data_belajar)) # minimal
## [1] 2.374704
(max_belajar <- max(data_belajar)) # maksimal
## [1] 21.39125
# uji normalitas (Shapiro-Wilk)
(shapiro_test_belajar <- shapiro.test(data_belajar))
## 
##  Shapiro-Wilk normality test
## 
## data:  data_belajar
## W = 0.99889, p-value = 0.6822
#6. Plot density & histogram + theoretical normal curve
x_vals <- seq(min(data_belajar), max(data_belajar), length.out = 200)
y_vals <- dnorm(x_vals, mean = mu, sd = sigma)
kurva_normal <- data.frame(x = x_vals, y = y_vals)

# density
ggplot(data.frame(x = data_belajar), aes(x)) +
  geom_density(fill = "gray", alpha = 0.5) +
  geom_line(data = kurva_normal, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Waktu Belajar Mahasiswa per Minggu",
       x = "Jam", y = "Densitas")

# histogram
ggplot(data.frame(x = data_belajar), aes(x)) +
  geom_histogram(aes(y = ..density..), bins = 30,
                 fill = "skyblue", alpha = 0.5, color = "black") +
  geom_line(data = kurva_normal, aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Waktu Belajar Mahasiswa per Minggu",
       x = "Jam", y = "Frekuensi") 

#7. Uji kesesuaian
(ad_test_belajar <- ad.test(data_belajar))
## 
##  Anderson-Darling normality test
## 
## data:  data_belajar
## A = 0.24586, p-value = 0.7576

DISTRIBUSI F

#===========================#
#         F-DIST            #
#===========================#

#Latihan Perbandingan Tinggi Badan Atlet Basket
set.seed(029)
n1 <- 40 
n2 <- 50
df1 <- n1 - 1
df2 <- n2 - 1
# data tinggi badan dalam cm
data_grup1 <- rnorm(n1, mean = 175, sd = 8)
data_grup2 <- rnorm(n2, mean = 185, sd = 7)


#1. Densitas pada nilai F yang dihitung
(var_grup1 <- var(data_grup1))
## [1] 74.35085
(var_grup2 <- var(data_grup2))
## [1] 43.25206
(f_value <- var_grup1 / var_grup2)
## [1] 1.719013
(densitas_f <- df(f_value, df1, df2))
## [1] 0.1524712
#2.CDF untuk nilai F yang dihitung
(cdf_f <- pf(f_value, df1, df2))
## [1] 0.9635909
#3. Quantile pada p = 0.95
(quantile_0.95 <- qf(0.95, df1, df2))
## [1] 1.642751
#4. Menghitung probabilitas nilai f < 5
(prob_f <- pf(5, df1, df2))
## [1] 0.9999999
#5. Menghitung statistik deskriptif
(stat_grup1 <- data.frame(mean = mean(data_grup1),
                          median = median(data_grup1),
                          sd = sd(data_grup1),
                          skewness = skewness(data_grup1),
                          kurtosis = kurtosis(data_grup1),
                          min = min(data_grup1),
                          max = max(data_grup1)))
##       mean   median       sd  skewness   kurtosis      min      max
## 1 174.8062 175.1451 8.622694 0.1207106 -0.3223487 155.5261 192.5864
(stat_grup2 <- data.frame(mean = mean(data_grup2),
                          median = median(data_grup2),
                          sd = sd(data_grup2),
                          skewness = skewness(data_grup2),
                          kurtosis = kurtosis(data_grup2),
                          min = min(data_grup2),
                          max = max(data_grup2)))
##       mean   median      sd    skewness   kurtosis     min      max
## 1 183.1273 183.0491 6.57663 -0.07312303 -0.5880549 168.733 197.3289
#6. Plot densitas f
x <- seq(0, 5, length.out = 100)
y <- df(x, df1, df2)
data_plot <- data.frame(x = x, y = y)
ggplot(data_plot, aes(x, y)) + geom_line(color = "black") +
         labs(title = "Distribusi F", x = "Nilai F", y = "Densitas") 

#7. Uji kesesuaian distribusi f 
(f_test <- var.test(data_grup1, data_grup2))
## 
##  F test to compare two variances
## 
## data:  data_grup1 and data_grup2
## F = 1.719, num df = 39, denom df = 49, p-value = 0.07282
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9506721 3.1737900
## sample estimates:
## ratio of variances 
##           1.719013

DISTRIBUSI T

#===========================#
#         T-DIST            #
#===========================#

# Nilai Ujian Komputasi Statistika
set.seed(029)
n <- 900
df <- 8
data_t <- rt(n, df = df) * 6 + 75

#1. Densitas pada x = 80
(densitas_t <- dt(80, df))
## [1] 3.319149e-14
#2. CDF pada q = 80
(cdf_t <- pt(80, df))
## [1] 1
#3. Kuantil pada p = 0.95
(quantile_t <- qt(0.95, df))
## [1] 1.859548
#4. Probabilitas antara 70 dan 85
(prob_70_85 <- pt(85, df) - pt(70, df))
## [1] 7.609469e-13
#5. Statistik deskriptif
(stat_t <- data.frame(mean = mean(data_t),
                          median = median(data_t),
                          sd = sd(data_t),
                          skewness = skewness(data_t),
                          kurtosis = kurtosis(data_t),
                          min = min(data_t),
                          max = max(data_t)))
##      mean   median       sd    skewness  kurtosis      min      max
## 1 74.7313 74.91825 6.909966 -0.09820397 0.6590203 47.22424 98.48208
#6. Plot density & histogram (bandingkan dengan t-theorical)
x_vals <- seq(min(data_t), max(data_t), length.out = 200)
y_vals <- dt(x_vals, df)

# density
ggplot(data.frame(x = data_t), aes(x)) +
  geom_density(fill = "gold", alpha = 0.5) +
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Nilai Ujian Komputasi Statistika",
       x = "Nilai", y = "Densitas")

# histogram
ggplot(data.frame(x = data_t), aes(x)) +
  geom_histogram(aes(y = ..density..),fill = "yellow", alpha = 0.5, color = "black") +
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Nilai Ujian Komputasi Statistika",
       x = "Nilai", y = "Frekuensi") 
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

#7. Uji kesesuaian distribusi t
(shapiro_test <- shapiro.test(data_t))  # Uji Shapiro-Wilk 
## 
##  Shapiro-Wilk normality test
## 
## data:  data_t
## W = 0.99481, p-value = 0.003605
(ks_test <- ks.test(data_t, "pnorm", mean(data_t), sd(data_t)))  # Uji Kolmogorov-Smirnov 
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data_t
## D = 0.032088, p-value = 0.3122
## alternative hypothesis: two-sided

DISTRIBUSI WEIBULL

#===========================#
#         WEIBULL           #
#===========================#

# Umur Rata-rata Komponen Elektronik (jam)
set.seed(029)
n <- 100
shape <- 1.8
scale <- 400
data_komponen <- rweibull(n, shape = shape, scale = scale)

#1. Densitas pada x = 350
(densitas <- dweibull(350, shape, scale))
## [1] 0.001842097
#2. CDF pada q = 350
(cdf_350 <- pweibull(350, shape, scale))
## [1] 0.5444945
#3. Kuantil pada p = 0.6
(quantile_0.6 <- qweibull(0.6, shape, scale))
## [1] 381.0372
#4. Probabilitas antara 200 dan 450 jam
(prob_200_450 <- pweibull(450, shape, scale) - 
    pweibull(200, shape, scale))
## [1] 0.459883
#5. Statistik deskriptif
(stat_weibull <- data.frame(mean = mean(data_komponen),
                      median = median(data_komponen),
                      sd = sd(data_komponen),
                      skewness = skewness(data_komponen),
                      kurtosis = kurtosis(data_komponen),
                      min = min(data_komponen),
                      max = max(data_komponen)))
##      mean   median       sd  skewness  kurtosis      min      max
## 1 373.638 337.8169 208.3732 0.7793839 0.6374501 37.42833 1092.799
#6. Plot density & histogram + overlay weibull teoritis
x_vals <- seq(0, max(data_komponen), length.out = 200)
y_vals <- dweibull(x_vals, shape = shape, scale = scale)

# density
ggplot(data.frame(x = data_komponen), aes(x)) +
  geom_density(fill = "maroon", alpha = 0.5) +
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) + 
  labs(title = "Densitas Distribusi Weibull", x = "Umur Komponen Elektronik (jam)", y = "Densitas") 

# histogram
ggplot(data.frame(x = data_komponen), aes(x)) + 
  geom_histogram(aes(y = ..density..), bins = 30, fill = "brown", alpha = 0.7, color = "black") + 
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Distribusi Weibull", x = "Umur Komponen Elektronik (jam)", y = "Frekuensi") 

#7. Uji kesesuaian distribusi weibull
(fit <- fitdist(data_komponen, "weibull")) # fit Weibull
## Fitting of the distribution ' weibull ' by maximum likelihood 
## Parameters:
##         estimate Std. Error
## shape   1.870806  0.1450688
## scale 420.696140 23.6739097
(ks_test_weibull <- ks.test(data_komponen, "pweibull", 
                            shape = fit$estimate[1], scale = fit$estimate[2])) # ks test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data_komponen
## D = 0.052544, p-value = 0.9453
## alternative hypothesis: two-sided

DISTRIBUSI BETA

#===========================#
#        BETA DIST          #
#===========================#

# Proporsi Sukses Kampanye 0-1 
set.seed(029)
n <- 800
alpha <- 3
beta <- 4
data_prop <- rbeta(n, shape1 = alpha, shape2 = beta)

#1. Densitas pada x = 0.4
(densitas_0.4 <- dbeta(0.4, shape1 = alpha, shape2 = beta))
## [1] 2.0736
#2. CDF pada q = 0.4
(cdf_0.4 <- pbeta(0.4, shape1 = alpha, shape2 = beta))
## [1] 0.45568
#3. Kuantil pada p = 0.90
(quantile_0.90 <- qbeta(0.90, shape1 = alpha, shape2 = beta))
## [1] 0.6668056
#4. Probabilitas antara 0.25 dan 0.6
(prob_0.25_06 <- pbeta(0.6, shape1 = alpha, shape2 = beta) -
  pbeta(0.25, shape1 = alpha, shape2 = beta))
## [1] 0.6513664
#5. Statistik Deskriptif
(stat_beta <- data.frame(mean = mean(data_prop),
                            median = median(data_prop),
                            sd = sd(data_prop),
                            skewness = skewness(data_prop),
                            min = min(data_prop),
                            max = max(data_prop)))
##        mean    median       sd  skewness        min       max
## 1 0.4174228 0.4090355 0.181414 0.1930978 0.01481242 0.9380668
#6. Plot density & histogram + overlay beta teoritis
x_vals <- seq(0, 1, length.out = 200)
y_vals <- dbeta(x_vals, alpha, beta)

# density
ggplot(data.frame(x = data_prop), aes(x)) + 
  geom_density(fill = "orange", alpha = 0.5) +
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Densitas Proporsi Sukses Kampanye 0-1", 
       x = "Proporsi", y = "Densitas") 

# histogram
ggplot(data.frame(x = data_prop), aes(x)) + 
  geom_histogram(bins = 30, fill = "red", alpha = 0.7, color = "black") +
  geom_line(data = data.frame(x = x_vals, y = y_vals),
            aes(x = x, y = y), color = "navy", size = 1) +
  labs(title = "Histogram Proporsi Sukses Kampanye 0-1", 
       x = "Proporsi", y = "Frekuensi") 

#7. Uji kesesuaian distribusi beta
observed_beta <- table(cut(data_prop, breaks = seq(0, 1, by = 0.1))) 
expected_beta <- n * (pbeta(seq(0.1, 1, by = 0.1), shape1 = alpha, shape2 = beta) -  
                        pbeta(seq(0, 0.9, by = 0.1), shape1 = alpha, shape2 = beta)) 

(chisq_test2 <- chisq.test(observed_beta, p = expected_beta /sum(expected_beta)))
## Warning in chisq.test(observed_beta, p = expected_beta/sum(expected_beta)):
## Chi-squared approximation may be incorrect
## 
##  Chi-squared test for given probabilities
## 
## data:  observed_beta
## X-squared = 16.589, df = 9, p-value = 0.05556
knitr::opts_chunk$set(echo = TRUE)