# MEMUAT PACKAGE DAN DATA
library(ppcor)
## Warning: package 'ppcor' was built under R version 4.5.2
## Loading required package: MASS
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
# -------------------------------------------
# ANALISIS KORELASI DATA STUDENTS PERFORMANCE
# Prodi Matematika - Statistik Dasar
# -------------------------------------------
# Membaca data CSV
data_siswa <- read.csv("C:/Users/Msi user/Downloads/StudentsPerformance.csv")
# Mengambil variabel yang akan dianalisis
reading_score <- data_siswa$reading.score
writing_score <- data_siswa$writing.score
# Membuat data frame
data_mahasiswa <- data.frame(reading_score, writing_score)
# Menampilkan data
print(head(data_mahasiswa))
## reading_score writing_score
## 1 72 74
## 2 90 88
## 3 95 93
## 4 57 44
## 5 78 75
## 6 83 78
# Statistik deskriptif sederhana
summary(data_mahasiswa)
## reading_score writing_score
## Min. : 17.00 Min. : 10.00
## 1st Qu.: 59.00 1st Qu.: 57.75
## Median : 70.00 Median : 69.00
## Mean : 69.17 Mean : 68.05
## 3rd Qu.: 79.00 3rd Qu.: 79.00
## Max. :100.00 Max. :100.00
# Standar deviasi
sd(reading_score)
## [1] 14.60019
sd(writing_score)
## [1] 15.19566
# Uji korelasi Pearson
hasil_korelasi <- cor.test(reading_score, writing_score, method = "pearson")
# Menampilkan hasil
print(hasil_korelasi)
##
## Pearson's product-moment correlation
##
## data: reading_score and writing_score
## t = 101.23, df = 998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9487506 0.9597921
## sample estimates:
## cor
## 0.9545981
alpha <- 0.05
if (hasil_korelasi$p.value < alpha) {
cat("Keputusan Pearson: Tolak H0\n")
cat("Artinya terdapat hubungan signifikan antara Reading Score dan Writing Score.\n\n")
} else {
cat("Keputusan Pearson: Gagal Tolak H0\n")
cat("Artinya tidak terdapat hubungan signifikan antara Reading Score dan Writing Score.\n\n")
}
## Keputusan Pearson: Tolak H0
## Artinya terdapat hubungan signifikan antara Reading Score dan Writing Score.
# Membuat scatter plot
plot(reading_score, writing_score,
main = "Scatter Plot Reading Score vs Writing Score",
xlab = "Reading Score",
ylab = "Writing Score",
pch = 19,
col = "blue")
# Menambahkan garis regresi
abline(lm(writing_score ~ reading_score),
col = "red",
lwd = 2)

# -------------------------------------------
# Uji Korelasi Spearman (Data Tidak Normal)
# Data Students Performance
# -------------------------------------------
# Uji korelasi Spearman
hasil_spearman <- cor.test(reading_score, writing_score,
method = "spearman",
exact = FALSE)
print(hasil_spearman)
##
## Spearman's rank correlation rho
##
## data: reading_score and writing_score
## S = 8507905, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9489525
# Keputusan Hipotesis
if (hasil_spearman$p.value < alpha) {
cat("Keputusan Spearman: Tolak H0\n")
cat("Artinya terdapat hubungan signifikan (non-parametrik).\n\n")
} else {
cat("Keputusan Spearman: Gagal Tolak H0\n")
cat("Artinya tidak terdapat hubungan signifikan.\n\n")
}
## Keputusan Spearman: Tolak H0
## Artinya terdapat hubungan signifikan (non-parametrik).
# -------------------------------------------
# DATA SIMULASI (menggunakan variabel lain)
# -------------------------------------------
set.seed(123)
math_score <- data_siswa$math.score
reading_score <- data_siswa$reading.score
writing_score <- data_siswa$writing.score
# Membuat data frame
data_mahasiswa <- data.frame(math_score, reading_score, writing_score)
# Scatter plot Math vs Reading
plot(math_score, reading_score,
main = "Scatter Plot Math Score vs Reading Score",
xlab = "Math Score",
ylab = "Reading Score",
pch = 19)
# Menambahkan garis regresi
abline(lm(reading_score ~ math_score), lwd = 2)

library(ggplot2)
ggplot(data_mahasiswa, aes(x = reading_score, y = writing_score)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Hubungan Reading Score dan Writing Score",
x = "Reading Score",
y = "Writing Score") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Menghitung matriks korelasi
matriks_korelasi <- cor(data_mahasiswa)
# Heatmap
heatmap(matriks_korelasi)

# Uji korelasi Kendall
hasil_kendall <- cor.test(reading_score, writing_score, method = "kendall")
print(hasil_kendall)
##
## Kendall's rank correlation tau
##
## data: reading_score and writing_score
## z = 38.114, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.8200575
# Keputusan Hipotesis
if (hasil_kendall$p.value < alpha) {
cat("Keputusan Kendall: Tolak H0\n")
cat("Artinya terdapat hubungan signifikan.\n\n")
} else {
cat("Keputusan Kendall: Gagal Tolak H0\n")
cat("Artinya tidak terdapat hubungan signifikan.\n\n")
}
## Keputusan Kendall: Tolak H0
## Artinya terdapat hubungan signifikan.
library(ppcor)
hasil_parsial <- pcor.test(data_mahasiswa$reading_score,
data_mahasiswa$writing_score,
data_mahasiswa$math_score)
print(hasil_parsial)
## estimate p.value statistic n gp Method
## 1 0.8687522 1.587758e-306 55.38877 1000 1 pearson
if (hasil_parsial$p.value < alpha) {
cat("Keputusan Parsial: Tolak H0\n")
cat("Artinya terdapat hubungan signifikan antara Reading dan Writing setelah dikontrol oleh Math Score.\n")
} else {
cat("Keputusan Parsial: Gagal Tolak H0\n")
cat("Artinya tidak terdapat hubungan signifikan setelah dikontrol oleh Math Score.\n")
}
## Keputusan Parsial: Tolak H0
## Artinya terdapat hubungan signifikan antara Reading dan Writing setelah dikontrol oleh Math Score.