==============================================================================
ANALISIS DATASET LINGKUNGAN - KELOMPOK 3 STATISTIKA DASAR
Tanggal: 2026-03-22
==============================================================================
— PERSIAPAN LINGKUNGAN —
rm(list=ls())
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
==============================================================================
MINGGU 1: IMPORT & EKSPLORASI DATA
==============================================================================
Load data
dataset_lingkungan <- read_excel("dataset_lingkungan.xlsx")
# Cek struktur data
print("--- Struktur Data ---")
## [1] "--- Struktur Data ---"
head(dataset_lingkungan)
## # A tibble: 6 × 5
## hari suhu_c kelembapan_persen curah_hujan_mm pm25_ugm3
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 33.7 75 5.3 41
## 2 2 29 65 3.2 41
## 3 3 30.1 77 7.1 31
## 4 4 30.9 90 14.2 43
## 5 5 28.3 75 16.1 48
## 6 6 30 72 9.7 33
names(dataset_lingkungan)
## [1] "hari" "suhu_c" "kelembapan_persen"
## [4] "curah_hujan_mm" "pm25_ugm3"
str(dataset_lingkungan)
## tibble [60 × 5] (S3: tbl_df/tbl/data.frame)
## $ hari : num [1:60] 1 2 3 4 5 6 7 8 9 10 ...
## $ suhu_c : num [1:60] 33.7 29 30.1 30.9 28.3 30 30 26.1 32.2 31.3 ...
## $ kelembapan_persen: num [1:60] 75 65 77 90 75 72 70 71 76 70 ...
## $ curah_hujan_mm : num [1:60] 5.3 3.2 7.1 14.2 16.1 9.7 4.1 5.4 4.6 9.1 ...
## $ pm25_ugm3 : num [1:60] 41 41 31 43 48 33 43 52 43 36 ...
==============================================================================
MINGGU 2: STATISTIKA DESKRIPTIF
==============================================================================
Ringkasan Statistik Dasar
summary(dataset_lingkungan)
## hari suhu_c kelembapan_persen curah_hujan_mm
## Min. : 1.00 Min. :25.00 Min. :65.00 Min. : 0.600
## 1st Qu.:15.75 1st Qu.:28.57 1st Qu.:75.00 1st Qu.: 3.675
## Median :30.50 Median :29.95 Median :78.00 Median : 7.800
## Mean :30.50 Mean :29.90 Mean :78.77 Mean : 9.083
## 3rd Qu.:45.25 3rd Qu.:31.20 3rd Qu.:83.00 3rd Qu.:12.275
## Max. :60.00 Max. :34.90 Max. :94.00 Max. :31.800
## pm25_ugm3
## Min. :12.0
## 1st Qu.:35.0
## Median :43.0
## Mean :43.1
## 3rd Qu.:52.0
## Max. :63.0
# Ringkasan Kustom menggunakan dplyr
ringkasan <- dataset_lingkungan %>%
summarise(across(everything(), list(
mean = ~mean(.x, na.rm=TRUE),
median = ~median(.x, na.rm=TRUE),
sd = ~sd(.x, na.rm=TRUE),
min = ~min(.x, na.rm=TRUE),
max = ~max(.x, na.rm=TRUE)
)))
print(ringkasan)
## # A tibble: 1 × 25
## hari_mean hari_median hari_sd hari_min hari_max suhu_c_mean suhu_c_median
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 30.5 30.5 17.5 1 60 29.9 30.0
## # ℹ 18 more variables: suhu_c_sd <dbl>, suhu_c_min <dbl>, suhu_c_max <dbl>,
## # kelembapan_persen_mean <dbl>, kelembapan_persen_median <dbl>,
## # kelembapan_persen_sd <dbl>, kelembapan_persen_min <dbl>,
## # kelembapan_persen_max <dbl>, curah_hujan_mm_mean <dbl>,
## # curah_hujan_mm_median <dbl>, curah_hujan_mm_sd <dbl>,
## # curah_hujan_mm_min <dbl>, curah_hujan_mm_max <dbl>, pm25_ugm3_mean <dbl>,
## # pm25_ugm3_median <dbl>, pm25_ugm3_sd <dbl>, pm25_ugm3_min <dbl>, …
# Visualisasi Histogram untuk semua variabel
par(mfrow=c(2,3)) # Menampilkan 6 grafik sekaligus dalam satu frame
hist(dataset_lingkungan$hari, main="Histogram Hari", xlab="Hari", col="lightblue")
hist(dataset_lingkungan$suhu_c, main="Histogram Suhu", xlab="Celsius", col="orange")
hist(dataset_lingkungan$kelembapan_persen, main="Histogram Kelembapan", xlab="%", col="lightgreen")
hist(dataset_lingkungan$curah_hujan_mm, main="Histogram Curah Hujan", xlab="mm", col="azure")
hist(dataset_lingkungan$pm25_ugm3, main="Histogram PM2.5", xlab="ug/m3", col="pink")
par(mfrow=c(1,1)) # Reset tampilan grafik ke default

==============================================================================
MINGGU 3: PROBABILITAS EMPIRIS
==============================================================================
Fungsi untuk menghitung probabilitas empiris terhadap mean
hitung_p_empiris <- function(kolom, nama_kolom) {
ambang <- mean(kolom, na.rm=TRUE)
p_emp <- mean(kolom > ambang, na.rm=TRUE)
return(data.frame(Variabel = nama_kolom, Ambang_Mean = round(ambang, 2), Peluang = p_emp))
}
# Gabungkan hasil untuk semua variabel
prob_empiris_all <- rbind(
hitung_p_empiris(dataset_lingkungan$hari, "hari"),
hitung_p_empiris(dataset_lingkungan$suhu_c, "suhu_c"),
hitung_p_empiris(dataset_lingkungan$kelembapan_persen, "kelembapan"),
hitung_p_empiris(dataset_lingkungan$curah_hujan_mm, "curah_hujan"),
hitung_p_empiris(dataset_lingkungan$pm25_ugm3, "pm25")
)
print(prob_empiris_all)
## Variabel Ambang_Mean Peluang
## 1 hari 30.50 0.5000000
## 2 suhu_c 29.90 0.5000000
## 3 kelembapan 78.77 0.4666667
## 4 curah_hujan 9.08 0.4166667
## 5 pm25 43.10 0.4833333
MINGGU 4 - “DISTRIBUSI DISKRIT”
Pilih satu variabel spesifik, misal pm25_ugm3
x_target <- dataset_lingkungan$pm25_ugm3
# Ubah jadi diskrit (pembulatan)
x_diskrit <- round(x_target)
# Frekuensi & probabilitas
freq <- table(x_diskrit)
pmf <- prop.table(freq)
pmf_df <- data.frame(
nilai = as.numeric(names(pmf)),
probabilitas = as.numeric(pmf)
)
print(pmf_df)
## nilai probabilitas
## 1 12 0.01666667
## 2 17 0.03333333
## 3 22 0.01666667
## 4 26 0.01666667
## 5 27 0.01666667
## 6 31 0.01666667
## 7 33 0.06666667
## 8 34 0.03333333
## 9 35 0.05000000
## 10 36 0.03333333
## 11 37 0.01666667
## 12 38 0.01666667
## 13 39 0.01666667
## 14 40 0.01666667
## 15 41 0.08333333
## 16 42 0.01666667
## 17 43 0.05000000
## 18 44 0.03333333
## 19 45 0.03333333
## 20 47 0.01666667
## 21 48 0.06666667
## 22 50 0.05000000
## 23 51 0.01666667
## 24 52 0.05000000
## 25 54 0.05000000
## 26 56 0.05000000
## 27 57 0.01666667
## 28 58 0.01666667
## 29 61 0.05000000
## 30 62 0.01666667
## 31 63 0.01666667
barplot(pmf, main="PMF Empiris Data Lingkungan", xlab="Nilai", ylab="Probabilitas", col="skyblue")

MINGGU 5 - “DISTRIBUSI KONTINU”
Gunakan variabel yang sama untuk konsistensi
mu <- mean(x_target, na.rm=TRUE)
sigma <- sd(x_target, na.rm=TRUE)
hist(x_target, probability=TRUE, main="Histogram + Kurva Normal", xlab="Nilai")
curve(dnorm(x, mean=mu, sd=sigma), add=TRUE, col="red", lwd=2)

==============================================================================
MINGGU 6: DISTRIBUSI SAMPLING
==============================================================================
set.seed(123)
B <- 2000
n1 <- 10
n2 <- 30
# Simulasi Sampling Mean untuk Suhu
mean_n10 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n1, replace=TRUE)))
mean_n30 <- replicate(B, mean(sample(dataset_lingkungan$suhu_c, n2, replace=TRUE)))
par(mfrow=c(1,2))
hist(mean_n10, main="Sampling Mean n=10", col="yellow")
hist(mean_n30, main="Sampling Mean n=30", col="gold")

par(mfrow=c(1,1))
==============================================================================
MINGGU 7: ESTIMASI PARAMETER (CONFIDENCE INTERVAL)
==============================================================================
Menghitung Interval Kepercayaan 95% untuk semua variabel utama
print("--- Interval Kepercayaan 95% (t-test) ---")
## [1] "--- Interval Kepercayaan 95% (t-test) ---"
t.test(dataset_lingkungan$suhu_c, conf.level=0.95)$conf.int
## [1] 29.29877 30.50789
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$kelembapan_persen, conf.level=0.95)$conf.int
## [1] 77.08284 80.45050
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$curah_hujan_mm, conf.level=0.95)$conf.int
## [1] 7.23709 10.92958
## attr(,"conf.level")
## [1] 0.95
t.test(dataset_lingkungan$pm25_ugm3, conf.level=0.95)$conf.int
## [1] 40.0909 46.1091
## attr(,"conf.level")
## [1] 0.95