BAB 6: Visualisasi Statistik

Visualisasi membantu memahami distribusi, pola, dan hubungan antar variabel. Melalui visualisasi, kita dapat melihat bagaimana data tersebar, apakah data tersebut simetris, miring, atau memiliki pencilan (outlier). Selain itu, visualisasi juga memudahkan dalam mengidentifikasi pola tertentu, seperti tren kenaikan atau penurunan dari waktu ke waktu, pola musiman, maupun fluktuasi yang terjadi dalam data.

Fungsi Jenis Plot Kegunaan
hist() Histogram Distribusi frekuensi
boxplot() Boxplot Distribusi + outlier
barplot() Bar chart Frekuensi kategorik
plot() Scatter/Line Hubungan 2 variabel
# HISTOGRAM: hist()
x <- c(23, 21, 24, 12, 15, 24, 34, 45, 9, 13)

hist(x,
     breaks = "Sturges",
     main   = "Distribusi Data",
     xlab   = "Nilai",
     ylab   = "Frekuensi",
     col    = "steelblue",
     border = "white")

# BOXPLOT: boxplot()
boxplot(x,
        main       = "Boxplot Data",
        ylab       = "Nilai",
        col        = "lightblue",
        border     = "navy",
        notch      = FALSE,
        horizontal = FALSE)

Contoh Soal

Data nilai ujian 20 mahasiswa:

nilai = 72,85,91,63,78,88,70,95,82,69,77,84,90,65,73,87,92,68,79,86

Lakukan visualisasi!

  1. Histogram 5 kelas | b) Boxplot | c) Scatter plot
# Contoh Soal Bab 6
nilai <- c(72,85,91,63,78,88,70,95,82,69,77,84,90,65,73,87,92,68,79,86)

# Setup: 2×2 panel
par(mfrow = c(2,2), mar = c(4,4,3,1))

# a) Histogram 5 kelas
hist(nilai,
     breaks = 5,
     main   = "a) Histogram Nilai Ujian",
     xlab   = "Nilai", ylab = "Frekuensi",
     col    = "steelblue", border = "white")
abline(v = mean(nilai), col = "red", lwd=2, lty=2)
legend("topright", legend="Mean", col="red", lty=2, bty="n")

# b) Boxplot
boxplot(nilai,
        main   = "b) Boxplot Nilai Ujian",
        ylab   = "Nilai",
        col    = "lightblue", border = "navy")

# c) Scatter plot + garis tren
urut <- 1:20
plot(urut, nilai,
     main = "c) Scatter plot Nilai Ujian",
     xlab = "No. Urut", ylab = "Nilai",
     pch  = 16, col = "steelblue", cex = 1.2)
abline(lm(nilai ~ urut), col="red", lwd=2)
legend("topright", legend="Tren", col="red", lty=1, bty="n")

par(mfrow=c(1,1))  # reset panel

Latihan Soal

hujan = 312,245,198,87,45,22,18,30,68,142,228,290) (dalam mm)

bulan = Januari - Desember

  1. Barplot dengan warna berdasarkan intensitas | b) Histogram + Garis mean | c) Time series plot + Garis mean
# Latihan Soal Bab 6
hujan <- c(312,245,198,87,45,22,18,30,68,142,228,290)
bulan <- month.abb

# Warna berdasarkan intensitas
warna <- ifelse(hujan > 200, "firebrick",
         ifelse(hujan >= 100, "goldenrod", "forestgreen"))

par(mfrow=c(2,2), mar=c(4,4,3,1))

# a) Barplot horizontal
barplot(hujan,
        names.arg = bulan,
        col       = warna,
        main      = "a) Barplot Curah Hujan",
        xlab      = "Curah hujan (mm)",
        horiz     = TRUE,
        las       = 1,
        cex.names = 0.8)
legend("bottomright",
       legend = c(">200mm","100-200mm","<100mm"),
       fill   = c("firebrick","goldenrod","forestgreen"),
       bty    = "n", cex=0.8)

# b) Histogram
hist(hujan,
     main = "b) Histogram Curah Hujan",
     xlab = "Curah hujan (mm)", ylab = "Frekuensi",
     col  = "steelblue", border="white")
abline(v=mean(hujan), col="red", lwd=2, lty=2)  # c) garis mean
text(mean(hujan)+15, 3, paste("Mean =", round(mean(hujan),1)),
     col="red", cex=0.8)

# c) Time series
plot(1:12, hujan,
     type = "b",
     main = "c) Time Series Curah Hujan",
     xlab = "Bulan", ylab = "Curah hujan (mm)",
     xaxt = "n",
     pch  = 16, col="steelblue", lwd=2)
axis(1, at=1:12, labels=bulan, cex.axis=0.8)
abline(h=mean(hujan), col="red", lty=2, lwd=1.5)

BAB 7: Uji Statistik Parametrik

Uji statistik digunakan untuk membuat keputusan berbasis data dengan tingkat kepercayaan tertentu.

Uji Fungsi R Kegunaan
Uji t satu sampel t.test(x, mu=μ₀) Bandingkan rata-rata sampel vs nilai tertentu
Uji t dua sampel independen t.test(x, y) Bandingkan 2 kelompok berbeda
Uji t berpasangan t.test(x, y, paired=TRUE) Bandingkan sebelum-sesudah
ANOVA aov() + summary() Bandingkan ≥ 3 kelompok
Post-hoc TukeyHSD() Pasangan kelompok mana yang berbeda

7.1 Uji t (t-test)

7.1.1 Uji t satu sampel (One sample t-test)

x <- c(23, 21, 24, 12, 15, 24, 34, 45, 9, 13)

hasil <- t.test(x, mu=20)
print(hasil)
## 
##  One Sample t-test
## 
## data:  x
## t = 0.57682, df = 9, p-value = 0.5782
## alternative hypothesis: true mean is not equal to 20
## 95 percent confidence interval:
##  14.15641 29.84359
## sample estimates:
## mean of x 
##        22
cat("Keputusan:",
    ifelse(hasil$p.value < 0.05,
           "TOLAK H0",
           "GAGAL TOLAK H0"))
## Keputusan: GAGAL TOLAK H0

7.1.2 Uji t dua sampel independen (Independent t-test)

A <- c(72, 75, 68, 80, 71)
B <- c(85, 88, 82, 90, 87)

hasil <- t.test(A, B)
print(hasil)
## 
##  Welch Two Sample t-test
## 
## data:  A and B
## t = -5.3889, df = 6.9906, p-value = 0.001025
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.993708  -7.406292
## sample estimates:
## mean of x mean of y 
##      73.2      86.4
cat("Keputusan:",
    ifelse(hasil$p.value < 0.05,
           "TOLAK H0",
           "GAGAL TOLAK H0"))
## Keputusan: TOLAK H0

7.1.3 Uji t berpasangan (Paired t-test)

Data tekanan darah sebelum dan sesudah pemberian obat pada 10 pasien.

sebelum = 145,138,152,141,149,136,155,143,148,140

sesudah = 138,130,145,135,141,129,147,136,140,133

Lakukan Uji t berpasangan!

sebelum <- c(145,138,152,141,149,136,155,143,148,140)
sesudah <- c(138,130,145,135,141,129,147,136,140,133)

hasil <- t.test(sebelum, sesudah,
                paired = TRUE,
                alternative = "greater")
print(hasil)
## 
##  Paired t-test
## 
## data:  sebelum and sesudah
## t = 34.202, df = 9, p-value = 3.853e-11
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
##  6.908745      Inf
## sample estimates:
## mean difference 
##             7.3
cat("Keputusan:",
    ifelse(hasil$p.value < 0.05,
           "TOLAK H0 — ada penurunan signifikan",
           "GAGAL TOLAK H0"))
## Keputusan: TOLAK H0 — ada penurunan signifikan

7.2 ANOVA (Analysis of Variance)

nilai <- c(72,75,68,80,71, 85,88,82,90,87, 78,82,79,85,81)
metode <- rep(c("Konvensional","Online","Hybrid"), each=5)
df <- data.frame(nilai, metode=factor(metode))

hasil_aov <- aov(nilai ~ metode, data=df)
print(summary(hasil_aov))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## metode       2  440.4   220.2   17.62 0.000269 ***
## Residuals   12  150.0    12.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_val <- summary(hasil_aov)[[1]][["Pr(>F)"]][1]

cat("Keputusan:",
    ifelse(p_val < 0.05,
           "TOLAK H0 - terdapat perbedaan signifikan",
           "GAGAL TOLAK H0 - tidak terdapat perbedaan signifikan"))
## Keputusan: TOLAK H0 - terdapat perbedaan signifikan

7.3 Uji Post-hoc

#UJI POST-HOC (Tukey HSD) 

tukey <- TukeyHSD(hasil_aov)
print(tukey)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = nilai ~ metode, data = df)
## 
## $metode
##                     diff         lwr       upr     p adj
## Konvensional-Hybrid -7.8 -13.7655245 -1.834476 0.0115376
## Online-Hybrid        5.4  -0.5655245 11.365524 0.0775970
## Online-Konvensional 13.2   7.2344755 19.165524 0.0001972
cat("Interpretasi:\n")
## Interpretasi:
cat("p adj < 0.05 = pasangan berbeda signifikan")
## p adj < 0.05 = pasangan berbeda signifikan

7.4 Latihan Soal

Data Nilai Kalkulus 3 prodi (10 mahasiswa/prodi):

Statistika : 78,82,75,88,71,85,79,83,77,80

Matematika : 88,92,85,95,79,90,87,93,84,91

Ilmu Komputer : 70,74,68,80,65,76,72,78,69,73

  1. Uji normalitas | b) Homogenitas variance | c) ANOVA | d) Tukey HSD | e) Visualisasi
stat <- c(78,82,75,88,71,85,79,83,77,80)
mat  <- c(88,92,85,95,79,90,87,93,84,91)
ik   <- c(70,74,68,80,65,76,72,78,69,73)

nilai_all <- c(stat, mat, ik)
prodi     <- rep(c("Statistika","Matematika","IlmuKomputer"), each=10)
df_l72    <- data.frame(nilai=nilai_all, prodi=factor(prodi))

# a) UJI NORMALITAS: Shapiro-Wilk
for (p in c("Statistika","Matematika","IlmuKomputer")) {
  d <- nilai_all[prodi==p]
  sw <- shapiro.test(d)
  cat(p, ": W =", round(sw$statistic,4), ", p =", round(sw$p.value,4),
      "=", ifelse(sw$p.value>=0.05, "Normal  ", "Tidak Normal ✗"), "\n")
}
## Statistika : W = 0.9965 , p = 1 = Normal   
## Matematika : W = 0.9692 , p = 0.8832 = Normal   
## IlmuKomputer : W = 0.9871 , p = 0.9919 = Normal
# b) UJI HOMOGENITAS VARIANCE: Bartlett
bart <- bartlett.test(nilai ~ prodi, data=df_l72)
cat("p-value:", round(bart$p.value,4), "=",
    ifelse(bart$p.value>=0.05, "Variance homogen  ", "Variance tidak homogen ✗"), "\n")
## p-value: 0.9846 = Variance homogen
# c) One-Way ANOVA
aov_l72 <- aov(nilai ~ prodi, data=df_l72)
print(summary(aov_l72))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## prodi        2 1266.9   633.4    27.3 3.28e-07 ***
## Residuals   27  626.5    23.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_aov <- summary(aov_l72)[[1]][["Pr(>F)"]][1]
cat("\nKeputusan:",
    ifelse(p_aov < 0.05,
           "TOLAK H0 = ada perbedaan nilai signifikan antar prodi",
           "GAGAL TOLAK H0"), "\n")
## 
## Keputusan: TOLAK H0 = ada perbedaan nilai signifikan antar prodi
# d) Tukey HSD
print(TukeyHSD(aov_l72))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = nilai ~ prodi, data = df_l72)
## 
## $prodi
##                         diff        lwr       upr     p adj
## Matematika-IlmuKomputer 15.9  10.558745 21.241255 0.0000002
## Statistika-IlmuKomputer  7.3   1.958745 12.641255 0.0059567
## Statistika-Matematika   -8.6 -13.941255 -3.258745 0.0012733
# e) Visualisasi lengkap
par(mfrow=c(1,2), mar=c(5,4,4,2))

# Boxplot
boxplot(nilai ~ prodi, data=df_l72,
        col    = c("steelblue","coral","gold"),
        main   = "Distribusi Nilai per Prodi",
        xlab   = "Program Studi", ylab="Nilai",
        cex.axis=0.8)

# Mean ± SD
means <- tapply(df_l72$nilai, df_l72$prodi, mean)
sds   <- tapply(df_l72$nilai, df_l72$prodi, sd)
prodi_unik <- c("IlmuKomputer","Matematika","Statistika")  # alfabetis

mp <- barplot(means[prodi_unik],
              col  = c("steelblue","coral","gold"),
              main = "Mean ± SD per Prodi",
              ylab = "Nilai", ylim=c(0,110),
              names.arg=c("IK","Mat","Stat"))
arrows(mp, means[prodi_unik]-sds[prodi_unik],
       mp, means[prodi_unik]+sds[prodi_unik],
       angle=90, code=3, length=0.1, lwd=2)