MINGGU 1 - “IMPORT DATA”

MINGGU 2 - “STATISTIKA DESKRIPTIF”

#Ringkasan Statistik

summary(dataset_lingkungan)
##       hari           suhu_c      kelembapan_persen curah_hujan_mm  
##  Min.   : 1.00   Min.   :25.00   Min.   :65.00     Min.   : 0.600  
##  1st Qu.:15.75   1st Qu.:28.57   1st Qu.:75.00     1st Qu.: 3.675  
##  Median :30.50   Median :29.95   Median :78.00     Median : 7.800  
##  Mean   :30.50   Mean   :29.90   Mean   :78.77     Mean   : 9.083  
##  3rd Qu.:45.25   3rd Qu.:31.20   3rd Qu.:83.00     3rd Qu.:12.275  
##  Max.   :60.00   Max.   :34.90   Max.   :94.00     Max.   :31.800  
##    pm25_ugm3   
##  Min.   :12.0  
##  1st Qu.:35.0  
##  Median :43.0  
##  Mean   :43.1  
##  3rd Qu.:52.0  
##  Max.   :63.0
ringkasan <- dataset_lingkungan %>%
  summarise(across(everything(), list(
    mean = ~mean(.x, na.rm=TRUE),
    median = ~median(.x, na.rm=TRUE),
    sd = ~sd(.x, na.rm=TRUE),
    min = ~min(.x, na.rm=TRUE),
    max = ~max(.x, na.rm=TRUE)
  )))

ringkasan
## # A tibble: 1 × 25
##   hari_mean hari_median hari_sd hari_min hari_max suhu_c_mean suhu_c_median
##       <dbl>       <dbl>   <dbl>    <dbl>    <dbl>       <dbl>         <dbl>
## 1      30.5        30.5    17.5        1       60        29.9          30.0
## # ℹ 18 more variables: suhu_c_sd <dbl>, suhu_c_min <dbl>, suhu_c_max <dbl>,
## #   kelembapan_persen_mean <dbl>, kelembapan_persen_median <dbl>,
## #   kelembapan_persen_sd <dbl>, kelembapan_persen_min <dbl>,
## #   kelembapan_persen_max <dbl>, curah_hujan_mm_mean <dbl>,
## #   curah_hujan_mm_median <dbl>, curah_hujan_mm_sd <dbl>,
## #   curah_hujan_mm_min <dbl>, curah_hujan_mm_max <dbl>, pm25_ugm3_mean <dbl>,
## #   pm25_ugm3_median <dbl>, pm25_ugm3_sd <dbl>, pm25_ugm3_min <dbl>, …

#Histogram Dataset Lingkungan

hist(dataset_lingkungan$suhu_c, main="Histogram hari", xlab="hari", col="light blue")

hist(dataset_lingkungan$suhu_c, main="Histogram suhu_c", xlab="suhu_c", col="orange")

hist(dataset_lingkungan$kelembapan_persen, main="Histogram kelembapan_persen", xlab="kelembapan_persen", col="lightgreen")

hist(dataset_lingkungan$curah_hujan_mm, main="Histogram curah_hujan_mm", xlab="curah_hujan_mm", col="azure")

hist(dataset_lingkungan$pm25_ugm3, main="Histogram pm25_ugm3", xlab="pm25_ugm3", col="pink")

MINGGU 3 - “PROBABILITAS EMPIRIS”

# Ambil satu variabel utama
x <- dataset_lingkungan$hari

# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)

# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)

prob_empiris <- data.frame(
  kejadian = paste0(names(dataset_lingkungan$hari)[1], " > ", round(ambang,2)),
  peluang = p_emp
)
prob_empiris
##   kejadian peluang
## 1   > 30.5     0.5
# Ambil satu variabel utama
x <- dataset_lingkungan$suhu_c

# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)

# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)

prob_empiris <- data.frame(
  kejadian = paste0(names(dataset_lingkungan$suhu_c)[1], " > ", round(ambang,2)),
  peluang = p_emp
)
prob_empiris
##   kejadian peluang
## 1   > 29.9     0.5
# Ambil satu variabel utama
x <- dataset_lingkungan$kelembapan_persen

# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)

# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)

prob_empiris <- data.frame(
  kejadian = paste0(names(dataset_lingkungan$kelembapan_persen)[1], " > ", round(ambang,2)),
  peluang = p_emp
)
prob_empiris
##   kejadian   peluang
## 1  > 78.77 0.4666667
# Ambil satu variabel utama
x <- dataset_lingkungan$curah_hujan_mm

# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)

# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)

prob_empiris <- data.frame(
  kejadian = paste0(names(dataset_lingkungan$curah_hujan_mm)[1], " > ", round(ambang,2)),
  peluang = p_emp
)
prob_empiris
##   kejadian   peluang
## 1   > 9.08 0.4166667
# Ambil satu variabel utama
x <- dataset_lingkungan$pm25_ugm3

# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)

# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)

prob_empiris <- data.frame(
  kejadian = paste0(names(dataset_lingkungan$pm25_ugm3)[1], " > ", round(ambang,2)),
  peluang = p_emp
)
prob_empiris
##   kejadian   peluang
## 1   > 43.1 0.4833333

MINGGU 4 - “DISTRIBUSI DISKRIT”

x_target <- dataset_lingkungan$pm25_ugm3

# Ubah jadi diskrit (pembulatan)
x_diskrit <- round(x_target)

# Frekuensi & probabilitas
freq <- table(x_diskrit)
pmf <- prop.table(freq)

pmf_df <- data.frame(
  nilai = as.numeric(names(pmf)),
  probabilitas = as.numeric(pmf)
)
print(pmf_df)
##    nilai probabilitas
## 1     12   0.01666667
## 2     17   0.03333333
## 3     22   0.01666667
## 4     26   0.01666667
## 5     27   0.01666667
## 6     31   0.01666667
## 7     33   0.06666667
## 8     34   0.03333333
## 9     35   0.05000000
## 10    36   0.03333333
## 11    37   0.01666667
## 12    38   0.01666667
## 13    39   0.01666667
## 14    40   0.01666667
## 15    41   0.08333333
## 16    42   0.01666667
## 17    43   0.05000000
## 18    44   0.03333333
## 19    45   0.03333333
## 20    47   0.01666667
## 21    48   0.06666667
## 22    50   0.05000000
## 23    51   0.01666667
## 24    52   0.05000000
## 25    54   0.05000000
## 26    56   0.05000000
## 27    57   0.01666667
## 28    58   0.01666667
## 29    61   0.05000000
## 30    62   0.01666667
## 31    63   0.01666667
barplot(pmf, main="PMF Empiris Data Lingkungan", xlab="Nilai", ylab="Probabilitas", col="skyblue")

MINGGU 5 - “DISTRIBUSI KONTINU”

# Parameter distribusi normal
mu <- mean(x, na.rm=TRUE)
sigma <- sd(x, na.rm=TRUE)

# Probabilitas model
p_model <- 1 - pnorm(ambang, mean=mu, sd=sigma)

model_normal <- data.frame(
  distribusi="Normal Aproksimasi",
  mean=mu,
  sd=sigma,
  peluang=p_model
)

model_normal
##           distribusi mean       sd peluang
## 1 Normal Aproksimasi 43.1 11.64838     0.5

#Histogram & Kurva Normal

hist(x, probability=TRUE,
     main="Histogram Data Lingkungan + Kurva Normal",
     xlab="Nilai", col="green")

curve(dnorm(x, mean=mu, sd=sigma),
      add=TRUE, col="yellow",
      lwd=2)

MINGGU 6 - Distribusi Sampling

set.seed(123)

B <- 2000
n1 <- 10
n2 <- 30

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$hari, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$hari, n2, replace=TRUE)))

hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel", col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel", col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n2, replace=TRUE)))

hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel", col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel", col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$kelembapan_persen, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$kelembapan_persen, n2, replace=TRUE)))

hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$curah_hujan_mm, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$curah_hujan_mm, n2, replace=TRUE)))

hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$pm25_ugm3, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$pm25_ugm3, n2, replace=TRUE)))

hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

MINGGU 7 - Estimasi Parameter

#Rata-Rata

mean(dataset_lingkungan$hari)
## [1] 30.5
mean(dataset_lingkungan$suhu_c)
## [1] 29.90333
mean(dataset_lingkungan$kelembapan_persen)
## [1] 78.76667
mean(dataset_lingkungan$curah_hujan_mm)
## [1] 9.083333
mean(dataset_lingkungan$pm25_ugm3)
## [1] 43.1

#Interval Kepercayaan

t.test(dataset_lingkungan$hari, conf.level=0.95)$conf.int
## [1] 25.98851 35.01149
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$suhu_c, conf.level=0.95)$conf.int
## [1] 29.29877 30.50789
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$kelembapan_persen, conf.level=0.95)$conf.int
## [1] 77.08284 80.45050
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$curah_hujan_mm, conf.level=0.95)$conf.int
## [1]  7.23709 10.92958
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$pm25_ugm3, conf.level=0.95)$conf.int
## [1] 40.0909 46.1091
## attr(,"conf.level")
## [1] 0.95