# MEMUAT PACKAGE DAN DATA
library(ppcor)
## Warning: package 'ppcor' was built under R version 4.5.2
## Loading required package: MASS
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
# ------------------------------------------- 
# ANALISIS KORELASI DATA STUDENTS PERFORMANCE
# Prodi Matematika - Statistik Dasar 
# ------------------------------------------- 

# Membaca data CSV
data_siswa <- read.csv("C:/Users/Msi user/Downloads/StudentsPerformance.csv")

# Mengambil variabel yang akan dianalisis
reading_score <- data_siswa$reading.score
writing_score <- data_siswa$writing.score

# Membuat data frame
data_mahasiswa <- data.frame(reading_score, writing_score)
# Menampilkan data
print(head(data_mahasiswa))
##   reading_score writing_score
## 1            72            74
## 2            90            88
## 3            95            93
## 4            57            44
## 5            78            75
## 6            83            78
# Statistik deskriptif sederhana
summary(data_mahasiswa)
##  reading_score    writing_score   
##  Min.   : 17.00   Min.   : 10.00  
##  1st Qu.: 59.00   1st Qu.: 57.75  
##  Median : 70.00   Median : 69.00  
##  Mean   : 69.17   Mean   : 68.05  
##  3rd Qu.: 79.00   3rd Qu.: 79.00  
##  Max.   :100.00   Max.   :100.00
# Standar deviasi
sd(reading_score)
## [1] 14.60019
sd(writing_score)
## [1] 15.19566
# Uji korelasi Pearson
hasil_korelasi <- cor.test(reading_score, writing_score, method = "pearson")
# Menampilkan hasil
print(hasil_korelasi)
## 
##  Pearson's product-moment correlation
## 
## data:  reading_score and writing_score
## t = 101.23, df = 998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9487506 0.9597921
## sample estimates:
##       cor 
## 0.9545981
alpha <- 0.05

if (hasil_korelasi$p.value < alpha) {
  cat("Keputusan Pearson: Tolak H0\n")
  cat("Artinya terdapat hubungan signifikan antara Reading Score dan Writing Score.\n\n")
} else {
  cat("Keputusan Pearson: Gagal Tolak H0\n")
  cat("Artinya tidak terdapat hubungan signifikan antara Reading Score dan Writing Score.\n\n")
}
## Keputusan Pearson: Tolak H0
## Artinya terdapat hubungan signifikan antara Reading Score dan Writing Score.
# Membuat scatter plot
plot(reading_score, writing_score,
     main = "Scatter Plot Reading Score vs Writing Score",
     xlab = "Reading Score",
     ylab = "Writing Score",
     pch = 19,
     col = "blue")

# Menambahkan garis regresi
abline(lm(writing_score ~ reading_score),
       col = "red",
       lwd = 2)

# ------------------------------------------- 
# Uji Korelasi Spearman (Data Tidak Normal)
# Data Students Performance
# ------------------------------------------- 

# Uji korelasi Spearman
hasil_spearman <- cor.test(reading_score, writing_score, 
                           method = "spearman",
                           exact = FALSE)

print(hasil_spearman)
## 
##  Spearman's rank correlation rho
## 
## data:  reading_score and writing_score
## S = 8507905, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9489525
# Keputusan Hipotesis
if (hasil_spearman$p.value < alpha) {
  cat("Keputusan Spearman: Tolak H0\n")
  cat("Artinya terdapat hubungan signifikan (non-parametrik).\n\n")
} else {
  cat("Keputusan Spearman: Gagal Tolak H0\n")
  cat("Artinya tidak terdapat hubungan signifikan.\n\n")
}
## Keputusan Spearman: Tolak H0
## Artinya terdapat hubungan signifikan (non-parametrik).
# ------------------------------------------- 
# DATA SIMULASI (menggunakan variabel lain)
# ------------------------------------------- 

set.seed(123)

math_score      <- data_siswa$math.score
reading_score   <- data_siswa$reading.score
writing_score   <- data_siswa$writing.score

# Membuat data frame
data_mahasiswa <- data.frame(math_score, reading_score, writing_score)

# Scatter plot Math vs Reading
plot(math_score, reading_score,
     main = "Scatter Plot Math Score vs Reading Score",
     xlab = "Math Score",
     ylab = "Reading Score",
     pch = 19)

# Menambahkan garis regresi
abline(lm(reading_score ~ math_score), lwd = 2)

library(ggplot2)

ggplot(data_mahasiswa, aes(x = reading_score, y = writing_score)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "Hubungan Reading Score dan Writing Score",
       x = "Reading Score",
       y = "Writing Score") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Menghitung matriks korelasi 
matriks_korelasi <- cor(data_mahasiswa)
# Heatmap
heatmap(matriks_korelasi)

# Uji korelasi Kendall
hasil_kendall <- cor.test(reading_score, writing_score, method = "kendall")

print(hasil_kendall)
## 
##  Kendall's rank correlation tau
## 
## data:  reading_score and writing_score
## z = 38.114, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.8200575
# Keputusan Hipotesis
if (hasil_kendall$p.value < alpha) {
  cat("Keputusan Kendall: Tolak H0\n")
  cat("Artinya terdapat hubungan signifikan.\n\n")
} else {
  cat("Keputusan Kendall: Gagal Tolak H0\n")
  cat("Artinya tidak terdapat hubungan signifikan.\n\n")
}
## Keputusan Kendall: Tolak H0
## Artinya terdapat hubungan signifikan.
library(ppcor)
hasil_parsial <- pcor.test(data_mahasiswa$reading_score,
                           data_mahasiswa$writing_score,
                           data_mahasiswa$math_score)

print(hasil_parsial)
##    estimate       p.value statistic    n gp  Method
## 1 0.8687522 1.587758e-306  55.38877 1000  1 pearson
if (hasil_parsial$p.value < alpha) {
  cat("Keputusan Parsial: Tolak H0\n")
  cat("Artinya terdapat hubungan signifikan antara Reading dan Writing setelah dikontrol oleh Math Score.\n")
} else {
  cat("Keputusan Parsial: Gagal Tolak H0\n")
  cat("Artinya tidak terdapat hubungan signifikan setelah dikontrol oleh Math Score.\n")
}
## Keputusan Parsial: Tolak H0
## Artinya terdapat hubungan signifikan antara Reading dan Writing setelah dikontrol oleh Math Score.