MINGGU 1 - “IMPORT DATA”
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
dataset_lingkungan <- read_excel("dataset_lingkungan.xlsx")
# cek data
head(dataset_lingkungan)
## # A tibble: 6 × 5
## hari suhu_c kelembapan_persen curah_hujan_mm pm25_ugm3
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.7 75 5.3 41
## 2 2 29 65 3.2 41
## 3 3 30.1 77 7.1 31
## 4 4 30.9 90 14.2 43
## 5 5 28.3 75 16.1 48
## 6 6 30 72 9.7 33
names(dataset_lingkungan)
## [1] "hari" "suhu_c" "kelembapan_persen"
## [4] "curah_hujan_mm" "pm25_ugm3"
str(dataset_lingkungan)
## tibble [60 × 5] (S3: tbl_df/tbl/data.frame)
## $ hari : num [1:60] 1 2 3 4 5 6 7 8 9 10 ...
## $ suhu_c : num [1:60] 33.7 29 30.1 30.9 28.3 30 30 26.1 32.2 31.3 ...
## $ kelembapan_persen: num [1:60] 75 65 77 90 75 72 70 71 76 70 ...
## $ curah_hujan_mm : num [1:60] 5.3 3.2 7.1 14.2 16.1 9.7 4.1 5.4 4.6 9.1 ...
## $ pm25_ugm3 : num [1:60] 41 41 31 43 48 33 43 52 43 36 ...
MINGGU 2 - “STATISTIKA DESKRIPTIF”
#Ringkasan Statistik
summary(dataset_lingkungan)
## hari suhu_c kelembapan_persen curah_hujan_mm
## Min. : 1.00 Min. :25.00 Min. :65.00 Min. : 0.600
## 1st Qu.:15.75 1st Qu.:28.57 1st Qu.:75.00 1st Qu.: 3.675
## Median :30.50 Median :29.95 Median :78.00 Median : 7.800
## Mean :30.50 Mean :29.90 Mean :78.77 Mean : 9.083
## 3rd Qu.:45.25 3rd Qu.:31.20 3rd Qu.:83.00 3rd Qu.:12.275
## Max. :60.00 Max. :34.90 Max. :94.00 Max. :31.800
## pm25_ugm3
## Min. :12.0
## 1st Qu.:35.0
## Median :43.0
## Mean :43.1
## 3rd Qu.:52.0
## Max. :63.0
ringkasan <- dataset_lingkungan %>%
summarise(across(everything(), list(
mean = ~mean(.x, na.rm=TRUE),
median = ~median(.x, na.rm=TRUE),
sd = ~sd(.x, na.rm=TRUE),
min = ~min(.x, na.rm=TRUE),
max = ~max(.x, na.rm=TRUE)
)))
ringkasan
## # A tibble: 1 × 25
## hari_mean hari_median hari_sd hari_min hari_max suhu_c_mean suhu_c_median
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 30.5 30.5 17.5 1 60 29.9 30.0
## # ℹ 18 more variables: suhu_c_sd <dbl>, suhu_c_min <dbl>, suhu_c_max <dbl>,
## # kelembapan_persen_mean <dbl>, kelembapan_persen_median <dbl>,
## # kelembapan_persen_sd <dbl>, kelembapan_persen_min <dbl>,
## # kelembapan_persen_max <dbl>, curah_hujan_mm_mean <dbl>,
## # curah_hujan_mm_median <dbl>, curah_hujan_mm_sd <dbl>,
## # curah_hujan_mm_min <dbl>, curah_hujan_mm_max <dbl>, pm25_ugm3_mean <dbl>,
## # pm25_ugm3_median <dbl>, pm25_ugm3_sd <dbl>, pm25_ugm3_min <dbl>, …
#Histogram Dataset Lingkungan
hist(dataset_lingkungan$suhu_c, main="Histogram hari", xlab="hari", col="light blue")

hist(dataset_lingkungan$suhu_c, main="Histogram suhu_c", xlab="suhu_c", col="orange")

hist(dataset_lingkungan$kelembapan_persen, main="Histogram kelembapan_persen", xlab="kelembapan_persen", col="lightgreen")

hist(dataset_lingkungan$curah_hujan_mm, main="Histogram curah_hujan_mm", xlab="curah_hujan_mm", col="azure")

hist(dataset_lingkungan$pm25_ugm3, main="Histogram pm25_ugm3", xlab="pm25_ugm3", col="pink")

MINGGU 3 - “PROBABILITAS EMPIRIS”
# Ambil satu variabel utama
x <- dataset_lingkungan$hari
# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)
# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)
prob_empiris <- data.frame(
kejadian = paste0(names(dataset_lingkungan$hari)[1], " > ", round(ambang,2)),
peluang = p_emp
)
prob_empiris
## kejadian peluang
## 1 > 30.5 0.5
# Ambil satu variabel utama
x <- dataset_lingkungan$suhu_c
# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)
# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)
prob_empiris <- data.frame(
kejadian = paste0(names(dataset_lingkungan$suhu_c)[1], " > ", round(ambang,2)),
peluang = p_emp
)
prob_empiris
## kejadian peluang
## 1 > 29.9 0.5
# Ambil satu variabel utama
x <- dataset_lingkungan$kelembapan_persen
# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)
# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)
prob_empiris <- data.frame(
kejadian = paste0(names(dataset_lingkungan$kelembapan_persen)[1], " > ", round(ambang,2)),
peluang = p_emp
)
prob_empiris
## kejadian peluang
## 1 > 78.77 0.4666667
# Ambil satu variabel utama
x <- dataset_lingkungan$curah_hujan_mm
# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)
# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)
prob_empiris <- data.frame(
kejadian = paste0(names(dataset_lingkungan$curah_hujan_mm)[1], " > ", round(ambang,2)),
peluang = p_emp
)
prob_empiris
## kejadian peluang
## 1 > 9.08 0.4166667
# Ambil satu variabel utama
x <- dataset_lingkungan$pm25_ugm3
# Tentukan ambang (mean)
ambang <- mean(x, na.rm=TRUE)
# Hitung probabilitas empiris
p_emp <- mean(x > ambang, na.rm=TRUE)
prob_empiris <- data.frame(
kejadian = paste0(names(dataset_lingkungan$pm25_ugm3)[1], " > ", round(ambang,2)),
peluang = p_emp
)
prob_empiris
## kejadian peluang
## 1 > 43.1 0.4833333
MINGGU 4 - “DISTRIBUSI DISKRIT”
x_target <- dataset_lingkungan$pm25_ugm3
# Ubah jadi diskrit (pembulatan)
x_diskrit <- round(x_target)
# Frekuensi & probabilitas
freq <- table(x_diskrit)
pmf <- prop.table(freq)
pmf_df <- data.frame(
nilai = as.numeric(names(pmf)),
probabilitas = as.numeric(pmf)
)
print(pmf_df)
## nilai probabilitas
## 1 12 0.01666667
## 2 17 0.03333333
## 3 22 0.01666667
## 4 26 0.01666667
## 5 27 0.01666667
## 6 31 0.01666667
## 7 33 0.06666667
## 8 34 0.03333333
## 9 35 0.05000000
## 10 36 0.03333333
## 11 37 0.01666667
## 12 38 0.01666667
## 13 39 0.01666667
## 14 40 0.01666667
## 15 41 0.08333333
## 16 42 0.01666667
## 17 43 0.05000000
## 18 44 0.03333333
## 19 45 0.03333333
## 20 47 0.01666667
## 21 48 0.06666667
## 22 50 0.05000000
## 23 51 0.01666667
## 24 52 0.05000000
## 25 54 0.05000000
## 26 56 0.05000000
## 27 57 0.01666667
## 28 58 0.01666667
## 29 61 0.05000000
## 30 62 0.01666667
## 31 63 0.01666667
barplot(pmf, main="PMF Empiris Data Lingkungan", xlab="Nilai", ylab="Probabilitas", col="skyblue")

MINGGU 5 - “DISTRIBUSI KONTINU”
# Parameter distribusi normal
mu <- mean(x, na.rm=TRUE)
sigma <- sd(x, na.rm=TRUE)
# Probabilitas model
p_model <- 1 - pnorm(ambang, mean=mu, sd=sigma)
model_normal <- data.frame(
distribusi="Normal Aproksimasi",
mean=mu,
sd=sigma,
peluang=p_model
)
model_normal
## distribusi mean sd peluang
## 1 Normal Aproksimasi 43.1 11.64838 0.5
#Histogram & Kurva Normal
hist(x, probability=TRUE,
main="Histogram Data Lingkungan + Kurva Normal",
xlab="Nilai", col="green")
curve(dnorm(x, mean=mu, sd=sigma),
add=TRUE, col="yellow",
lwd=2)

MINGGU 6 - Distribusi Sampling
set.seed(123)
B <- 2000
n1 <- 10
n2 <- 30
mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$hari, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$hari, n2, replace=TRUE)))
hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel", col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel", col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n2, replace=TRUE)))
hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel", col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel", col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$kelembapan_persen, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$kelembapan_persen, n2, replace=TRUE)))
hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$curah_hujan_mm, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$curah_hujan_mm, n2, replace=TRUE)))
hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$pm25_ugm3, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$pm25_ugm3, n2, replace=TRUE)))
hist(mean_n10, main="Distribusi Sampling Mean (n=10)", xlab="Mean Sampel",col="green")

hist(mean_n30, main="Distribusi Sampling Mean (n=30)", xlab="Mean Sampel",col="green")

MINGGU 7 - Estimasi Parameter
#Rata-Rata
mean(dataset_lingkungan$hari)
## [1] 30.5
mean(dataset_lingkungan$suhu_c)
## [1] 29.90333
mean(dataset_lingkungan$kelembapan_persen)
## [1] 78.76667
mean(dataset_lingkungan$curah_hujan_mm)
## [1] 9.083333
mean(dataset_lingkungan$pm25_ugm3)
## [1] 43.1
#Interval Kepercayaan
t.test(dataset_lingkungan$hari, conf.level=0.95)$conf.int
## [1] 25.98851 35.01149
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$suhu_c, conf.level=0.95)$conf.int
## [1] 29.29877 30.50789
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$kelembapan_persen, conf.level=0.95)$conf.int
## [1] 77.08284 80.45050
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$curah_hujan_mm, conf.level=0.95)$conf.int
## [1] 7.23709 10.92958
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$pm25_ugm3, conf.level=0.95)$conf.int
## [1] 40.0909 46.1091
## attr(,"conf.level")
## [1] 0.95