Visualisasi membantu memahami distribusi, pola, dan hubungan antar variabel. Melalui visualisasi, kita dapat melihat bagaimana data tersebar, apakah data tersebut simetris, miring, atau memiliki pencilan (outlier). Selain itu, visualisasi juga memudahkan dalam mengidentifikasi pola tertentu, seperti tren kenaikan atau penurunan dari waktu ke waktu, pola musiman, maupun fluktuasi yang terjadi dalam data.
| Fungsi | Jenis Plot | Kegunaan |
|---|---|---|
hist() |
Histogram | Distribusi frekuensi |
boxplot() |
Boxplot | Distribusi + outlier |
barplot() |
Bar chart | Frekuensi kategorik |
plot() |
Scatter/Line | Hubungan 2 variabel |
# HISTOGRAM: hist()
x <- c(23, 21, 24, 12, 15, 24, 34, 45, 9, 13)
hist(x,
breaks = "Sturges",
main = "Distribusi Data",
xlab = "Nilai",
ylab = "Frekuensi",
col = "steelblue",
border = "white")
# BOXPLOT: boxplot()
boxplot(x,
main = "Boxplot Data",
ylab = "Nilai",
col = "lightblue",
border = "navy",
notch = FALSE,
horizontal = FALSE)
Data nilai ujian 20 mahasiswa:
nilai = 72,85,91,63,78,88,70,95,82,69,77,84,90,65,73,87,92,68,79,86
Lakukan visualisasi!
# Contoh Soal Bab 6
nilai <- c(72,85,91,63,78,88,70,95,82,69,77,84,90,65,73,87,92,68,79,86)
# Setup: 2×2 panel
par(mfrow = c(2,2), mar = c(4,4,3,1))
# a) Histogram 5 kelas
hist(nilai,
breaks = 5,
main = "a) Histogram Nilai Ujian",
xlab = "Nilai", ylab = "Frekuensi",
col = "steelblue", border = "white")
abline(v = mean(nilai), col = "red", lwd=2, lty=2)
legend("topright", legend="Mean", col="red", lty=2, bty="n")
# b) Boxplot
boxplot(nilai,
main = "b) Boxplot Nilai Ujian",
ylab = "Nilai",
col = "lightblue", border = "navy")
# c) Scatter plot + garis tren
urut <- 1:20
plot(urut, nilai,
main = "c) Scatter plot Nilai Ujian",
xlab = "No. Urut", ylab = "Nilai",
pch = 16, col = "steelblue", cex = 1.2)
abline(lm(nilai ~ urut), col="red", lwd=2)
legend("topright", legend="Tren", col="red", lty=1, bty="n")
par(mfrow=c(1,1)) # reset panel
hujan = 312,245,198,87,45,22,18,30,68,142,228,290) (dalam mm)
bulan = Januari - Desember
# Latihan Soal Bab 6
hujan <- c(312,245,198,87,45,22,18,30,68,142,228,290)
bulan <- month.abb
# Warna berdasarkan intensitas
warna <- ifelse(hujan > 200, "firebrick",
ifelse(hujan >= 100, "goldenrod", "forestgreen"))
par(mfrow=c(2,2), mar=c(4,4,3,1))
# a) Barplot horizontal
barplot(hujan,
names.arg = bulan,
col = warna,
main = "a) Barplot Curah Hujan",
xlab = "Curah hujan (mm)",
horiz = TRUE,
las = 1,
cex.names = 0.8)
legend("bottomright",
legend = c(">200mm","100-200mm","<100mm"),
fill = c("firebrick","goldenrod","forestgreen"),
bty = "n", cex=0.8)
# b) Histogram
hist(hujan,
main = "b) Histogram Curah Hujan",
xlab = "Curah hujan (mm)", ylab = "Frekuensi",
col = "steelblue", border="white")
abline(v=mean(hujan), col="red", lwd=2, lty=2) # c) garis mean
text(mean(hujan)+15, 3, paste("Mean =", round(mean(hujan),1)),
col="red", cex=0.8)
# c) Time series
plot(1:12, hujan,
type = "b",
main = "c) Time Series Curah Hujan",
xlab = "Bulan", ylab = "Curah hujan (mm)",
xaxt = "n",
pch = 16, col="steelblue", lwd=2)
axis(1, at=1:12, labels=bulan, cex.axis=0.8)
abline(h=mean(hujan), col="red", lty=2, lwd=1.5)
Uji statistik digunakan untuk membuat keputusan berbasis data dengan tingkat kepercayaan tertentu.
| Uji | Fungsi R | Kegunaan |
|---|---|---|
| Uji t satu sampel | t.test(x, mu=μ₀) |
Bandingkan rata-rata sampel vs nilai tertentu |
| Uji t dua sampel independen | t.test(x, y) |
Bandingkan 2 kelompok berbeda |
| Uji t berpasangan | t.test(x, y, paired=TRUE) |
Bandingkan sebelum-sesudah |
| ANOVA | aov() + summary() |
Bandingkan ≥ 3 kelompok |
| Post-hoc | TukeyHSD() |
Pasangan kelompok mana yang berbeda |
x <- c(23, 21, 24, 12, 15, 24, 34, 45, 9, 13)
hasil <- t.test(x, mu=20)
print(hasil)
##
## One Sample t-test
##
## data: x
## t = 0.57682, df = 9, p-value = 0.5782
## alternative hypothesis: true mean is not equal to 20
## 95 percent confidence interval:
## 14.15641 29.84359
## sample estimates:
## mean of x
## 22
cat("Keputusan:",
ifelse(hasil$p.value < 0.05,
"TOLAK H0",
"GAGAL TOLAK H0"))
## Keputusan: GAGAL TOLAK H0
A <- c(72, 75, 68, 80, 71)
B <- c(85, 88, 82, 90, 87)
hasil <- t.test(A, B)
print(hasil)
##
## Welch Two Sample t-test
##
## data: A and B
## t = -5.3889, df = 6.9906, p-value = 0.001025
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.993708 -7.406292
## sample estimates:
## mean of x mean of y
## 73.2 86.4
cat("Keputusan:",
ifelse(hasil$p.value < 0.05,
"TOLAK H0",
"GAGAL TOLAK H0"))
## Keputusan: TOLAK H0
Data tekanan darah sebelum dan sesudah pemberian obat pada 10 pasien.
sebelum = 145,138,152,141,149,136,155,143,148,140
sesudah = 138,130,145,135,141,129,147,136,140,133
Lakukan Uji t berpasangan!
sebelum <- c(145,138,152,141,149,136,155,143,148,140)
sesudah <- c(138,130,145,135,141,129,147,136,140,133)
hasil <- t.test(sebelum, sesudah,
paired = TRUE,
alternative = "greater")
print(hasil)
##
## Paired t-test
##
## data: sebelum and sesudah
## t = 34.202, df = 9, p-value = 3.853e-11
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
## 6.908745 Inf
## sample estimates:
## mean difference
## 7.3
cat("Keputusan:",
ifelse(hasil$p.value < 0.05,
"TOLAK H0 — ada penurunan signifikan",
"GAGAL TOLAK H0"))
## Keputusan: TOLAK H0 — ada penurunan signifikan
nilai <- c(72,75,68,80,71, 85,88,82,90,87, 78,82,79,85,81)
metode <- rep(c("Konvensional","Online","Hybrid"), each=5)
df <- data.frame(nilai, metode=factor(metode))
hasil_aov <- aov(nilai ~ metode, data=df)
print(summary(hasil_aov))
## Df Sum Sq Mean Sq F value Pr(>F)
## metode 2 440.4 220.2 17.62 0.000269 ***
## Residuals 12 150.0 12.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_val <- summary(hasil_aov)[[1]][["Pr(>F)"]][1]
cat("Keputusan:",
ifelse(p_val < 0.05,
"TOLAK H0 - terdapat perbedaan signifikan",
"GAGAL TOLAK H0 - tidak terdapat perbedaan signifikan"))
## Keputusan: TOLAK H0 - terdapat perbedaan signifikan
#UJI POST-HOC (Tukey HSD)
tukey <- TukeyHSD(hasil_aov)
print(tukey)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = nilai ~ metode, data = df)
##
## $metode
## diff lwr upr p adj
## Konvensional-Hybrid -7.8 -13.7655245 -1.834476 0.0115376
## Online-Hybrid 5.4 -0.5655245 11.365524 0.0775970
## Online-Konvensional 13.2 7.2344755 19.165524 0.0001972
cat("Interpretasi:\n")
## Interpretasi:
cat("p adj < 0.05 = pasangan berbeda signifikan")
## p adj < 0.05 = pasangan berbeda signifikan
Data Nilai Kalkulus 3 prodi (10 mahasiswa/prodi):
Statistika : 78,82,75,88,71,85,79,83,77,80
Matematika : 88,92,85,95,79,90,87,93,84,91
Ilmu Komputer : 70,74,68,80,65,76,72,78,69,73
stat <- c(78,82,75,88,71,85,79,83,77,80)
mat <- c(88,92,85,95,79,90,87,93,84,91)
ik <- c(70,74,68,80,65,76,72,78,69,73)
nilai_all <- c(stat, mat, ik)
prodi <- rep(c("Statistika","Matematika","IlmuKomputer"), each=10)
df_l72 <- data.frame(nilai=nilai_all, prodi=factor(prodi))
# a) UJI NORMALITAS: Shapiro-Wilk
for (p in c("Statistika","Matematika","IlmuKomputer")) {
d <- nilai_all[prodi==p]
sw <- shapiro.test(d)
cat(p, ": W =", round(sw$statistic,4), ", p =", round(sw$p.value,4),
"=", ifelse(sw$p.value>=0.05, "Normal ", "Tidak Normal ✗"), "\n")
}
## Statistika : W = 0.9965 , p = 1 = Normal
## Matematika : W = 0.9692 , p = 0.8832 = Normal
## IlmuKomputer : W = 0.9871 , p = 0.9919 = Normal
# b) UJI HOMOGENITAS VARIANCE: Bartlett
bart <- bartlett.test(nilai ~ prodi, data=df_l72)
cat("p-value:", round(bart$p.value,4), "=",
ifelse(bart$p.value>=0.05, "Variance homogen ", "Variance tidak homogen ✗"), "\n")
## p-value: 0.9846 = Variance homogen
# c) One-Way ANOVA
aov_l72 <- aov(nilai ~ prodi, data=df_l72)
print(summary(aov_l72))
## Df Sum Sq Mean Sq F value Pr(>F)
## prodi 2 1266.9 633.4 27.3 3.28e-07 ***
## Residuals 27 626.5 23.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_aov <- summary(aov_l72)[[1]][["Pr(>F)"]][1]
cat("\nKeputusan:",
ifelse(p_aov < 0.05,
"TOLAK H0 = ada perbedaan nilai signifikan antar prodi",
"GAGAL TOLAK H0"), "\n")
##
## Keputusan: TOLAK H0 = ada perbedaan nilai signifikan antar prodi
# d) Tukey HSD
print(TukeyHSD(aov_l72))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = nilai ~ prodi, data = df_l72)
##
## $prodi
## diff lwr upr p adj
## Matematika-IlmuKomputer 15.9 10.558745 21.241255 0.0000002
## Statistika-IlmuKomputer 7.3 1.958745 12.641255 0.0059567
## Statistika-Matematika -8.6 -13.941255 -3.258745 0.0012733
# e) Visualisasi lengkap
par(mfrow=c(1,2), mar=c(5,4,4,2))
# Boxplot
boxplot(nilai ~ prodi, data=df_l72,
col = c("steelblue","coral","gold"),
main = "Distribusi Nilai per Prodi",
xlab = "Program Studi", ylab="Nilai",
cex.axis=0.8)
# Mean ± SD
means <- tapply(df_l72$nilai, df_l72$prodi, mean)
sds <- tapply(df_l72$nilai, df_l72$prodi, sd)
prodi_unik <- c("IlmuKomputer","Matematika","Statistika") # alfabetis
mp <- barplot(means[prodi_unik],
col = c("steelblue","coral","gold"),
main = "Mean ± SD per Prodi",
ylab = "Nilai", ylim=c(0,110),
names.arg=c("IK","Mat","Stat"))
arrows(mp, means[prodi_unik]-sds[prodi_unik],
mp, means[prodi_unik]+sds[prodi_unik],
angle=90, code=3, length=0.1, lwd=2)