# 1. MEMUAT PACKAGE DAN DATA
library(ggplot2)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Membaca data
data <- read.csv("~/Kuliah/sem/analisis regresi/university_student_retention_dataset_2134.csv")
# Melihat apakah attendance_rate (X) berpengaruh terhadap gpa_current (Y)
# 2. DATA CLEANING
data_clean <- data %>% filter(!is.na(gpa_current), !is.na(attendance_rate))
# 3. ANALISIS DESKRIPTIF
cat("\n=== Statistik Deskriptif ===\n")
##
## === Statistik Deskriptif ===
summary(data_clean[, c("gpa_current", "attendance_rate")])
## gpa_current attendance_rate
## Min. :1.500 Min. :0.5000
## 1st Qu.:2.140 1st Qu.:0.6400
## Median :2.770 Median :0.7500
## Mean :2.757 Mean :0.7528
## 3rd Qu.:3.370 3rd Qu.:0.8700
## Max. :4.000 Max. :1.0000
# Scatter plot
ggplot(data_clean, aes(x = attendance_rate, y = gpa_current)) +
geom_point(color = "blue", size = 3) +
labs(title = "Hubungan Attendance Rate dan GPA Current",
x = "Attendance Rate",
y = "GPA Current") +
theme_minimal()

# Korelasi Pearson
cor_test <- cor.test(data_clean$attendance_rate, data_clean$gpa_current)
cat("\n=== Korelasi Pearson ===\n")
##
## === Korelasi Pearson ===
cat("Korelasi =", round(cor_test$estimate, 4), "\n")
## Korelasi = 0.0051
cat("p-value =", round(cor_test$p.value, 6), "\n")
## p-value = 0.815401
# 4. MEMBANGUN MODEL REGRESI
model <- lm(gpa_current ~ attendance_rate, data = data_clean)
cat("\n=== Ringkasan Model Regresi ===\n")
##
## === Ringkasan Model Regresi ===
summary(model)
##
## Call:
## lm(formula = gpa_current ~ attendance_rate, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26256 -0.61984 0.01491 0.61409 1.24327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.73798 0.08318 32.916 <2e-16 ***
## attendance_rate 0.02534 0.10855 0.233 0.815
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7201 on 2132 degrees of freedom
## Multiple R-squared: 2.557e-05, Adjusted R-squared: -0.0004435
## F-statistic: 0.05452 on 1 and 2132 DF, p-value: 0.8154
# 5. UJI ASUMSI REGRESI LINEAR
cat("\n=== UJI ASUMSI REGRESI LINEAR ===\n")
##
## === UJI ASUMSI REGRESI LINEAR ===
# 5.1 Normalitas residual
shapiro_test <- shapiro.test(residuals(model))
cat("\n1. UJI NORMALITAS (Shapiro-Wilk):\n")
##
## 1. UJI NORMALITAS (Shapiro-Wilk):
cat(" W =", round(shapiro_test$statistic, 4), "\n")
## W = 0.9549
cat(" p-value =", round(shapiro_test$p.value, 6), "\n")
## p-value = 0
if(shapiro_test$p.value > 0.05) {
cat(" Keputusan: Residual berdistribusi normal\n")
} else {
cat(" Keputusan: Residual tidak normal\n")
}
## Keputusan: Residual tidak normal
# Q-Q Plot
qqnorm(residuals(model), main = "Q-Q Plot Residual")
qqline(residuals(model), col = "red")

# 5.2 Homoskedastisitas
bp_test <- bptest(model)
cat("\n2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):\n")
##
## 2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):
cat(" LM =", round(bp_test$statistic, 4), "\n")
## LM = 0.0546
cat(" p-value =", round(bp_test$p.value, 6), "\n")
## p-value = 0.815318
if(bp_test$p.value > 0.05) {
cat(" Keputusan: Varian residual homogen\n")
} else {
cat(" Keputusan: Ada heteroskedastisitas\n")
}
## Keputusan: Varian residual homogen
# Plot residual vs fitted
plot(fitted(model), residuals(model),
main = "Residual vs Fitted Values",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 19, col = "blue")
abline(h = 0, col = "red", lty = 2)

# 5.3 Autokorelasi
dw_test <- dwtest(model)
cat("\n3. UJI AUTOKORELASI (Durbin-Watson):\n")
##
## 3. UJI AUTOKORELASI (Durbin-Watson):
cat(" DW =", round(dw_test$statistic, 4), "\n")
## DW = 1.9511
cat(" p-value =", round(dw_test$p.value, 6), "\n")
## p-value = 0.129258
if(dw_test$p.value > 0.05) {
cat(" Keputusan: Tidak ada autokorelasi\n")
} else {
cat(" Keputusan: Ada autokorelasi\n")
}
## Keputusan: Tidak ada autokorelasi
# 6. INTERPRETASI KOEFISIEN
cat("\n=== INTERPRETASI KOEFISIEN ===\n")
##
## === INTERPRETASI KOEFISIEN ===
intercept <- coef(model)[1]
slope <- coef(model)[2]
cat("Persamaan Regresi:\n")
## Persamaan Regresi:
cat("gpa_current =", round(intercept, 3), "+", round(slope, 3), "* attendance_rate\n")
## gpa_current = 2.738 + 0.025 * attendance_rate
cat("\nInterpretasi:\n")
##
## Interpretasi:
cat("1. Intercept (β0 =", round(intercept, 3), "):\n")
## 1. Intercept (β0 = 2.738 ):
cat(" GPA saat attendance_rate = 0 adalah", round(intercept, 3), "\n")
## GPA saat attendance_rate = 0 adalah 2.738
cat("2. Slope (β1 =", round(slope, 3), "):\n")
## 2. Slope (β1 = 0.025 ):
cat(" Setiap kenaikan 1 unit attendance_rate, GPA berubah sebesar",
round(slope, 3), "poin\n")
## Setiap kenaikan 1 unit attendance_rate, GPA berubah sebesar 0.025 poin
# 7. INTERVAL KEPERCAYAAN
cat("\n=== INTERVAL KEPERCAYAAN 95% ===\n")
##
## === INTERVAL KEPERCAYAAN 95% ===
conf_int <- confint(model, level = 0.95)
cat("Intercept: [", round(conf_int[1,1], 3), ", ", round(conf_int[1,2], 3), "]\n", sep="")
## Intercept: [2.575, 2.901]
cat("Slope: [", round(conf_int[2,1], 3), ", ", round(conf_int[2,2], 3), "]\n", sep="")
## Slope: [-0.188, 0.238]
# 8. UJI HIPOTESIS UNTUK SLOPE
summary_model <- summary(model)
r_squared <- summary_model$r.squared
slope_pvalue <- summary_model$coefficients[2, 4]
cat("\n=== UJI HIPOTESIS (β1) ===\n")
##
## === UJI HIPOTESIS (β1) ===
cat("H0: β1 = 0 (tidak ada hubungan linear)\n")
## H0: β1 = 0 (tidak ada hubungan linear)
cat("H1: β1 ≠ 0 (ada hubungan linear)\n")
## H1: β1 ≠ 0 (ada hubungan linear)
cat("p-value =", round(slope_pvalue, 6), "\n")
## p-value = 0.815401
if(slope_pvalue < 0.05) {
cat("Keputusan: Tolak H0, hubungan signifikan\n")
} else {
cat("Keputusan: Gagal tolak H0, hubungan tidak signifikan\n")
}
## Keputusan: Gagal tolak H0, hubungan tidak signifikan
# 9. KOEFISIEN DETERMINASI
new_data <- data.frame(attendance_rate = c(0.6, 0.8, 1.0))
prediction <- predict(model, newdata = new_data, interval = "confidence", level = 0.95)
cat("\n=== PREDIKSI + INTERVAL KEPERCAYAAN 95% ===\n")
##
## === PREDIKSI + INTERVAL KEPERCAYAAN 95% ===
for(i in 1:nrow(new_data)) {
cat("Attendance_rate =", new_data$attendance_rate[i], "\n")
cat(" Prediksi GPA =", round(prediction[i, "fit"], 3), "\n")
cat(" CI 95%: [", round(prediction[i, "lwr"], 3), ", ", round(prediction[i, "upr"], 3), "]\n\n", sep = "")
}
## Attendance_rate = 0.6
## Prediksi GPA = 2.753
## CI 95%: [2.709, 2.798]
##
## Attendance_rate = 0.8
## Prediksi GPA = 2.758
## CI 95%: [2.726, 2.79]
##
## Attendance_rate = 1
## Prediksi GPA = 2.763
## CI 95%: [2.702, 2.824]
# Tampilkan tabel prediksi
hasil_prediksi <- data.frame(new_data, prediction)
print(hasil_prediksi)
## attendance_rate fit lwr upr
## 1 0.6 2.753186 2.708556 2.797815
## 2 0.8 2.758254 2.726075 2.790434
## 3 1.0 2.763323 2.702461 2.824186
# 10. VISUALISASI MODEL
ggplot(data_clean, aes(x = attendance_rate, y = gpa_current)) +
geom_point(color = "blue", size = 3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "pink") +
labs(title = "Garis Regresi Linear",
subtitle = paste("Y =", round(intercept, 2), "+", round(slope, 2), "X"),
x = "Attendance Rate",
y = "GPA Current") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# 9. PREDIKSI
new_data <- data.frame(attendance_rate = c(0.6, 0.8))
prediction <- predict(model, newdata = new_data, interval = "confidence")
cat("\n=== PREDIKSI ===\n")
##
## === PREDIKSI ===
cat("Untuk attendance_rate = 0.6, prediksi GPA =", round(prediction[1, "fit"], 2), "\n")
## Untuk attendance_rate = 0.6, prediksi GPA = 2.75
cat("Untuk attendance_rate = 0.8, prediksi GPA =", round(prediction[2, "fit"], 2), "\n")
## Untuk attendance_rate = 0.8, prediksi GPA = 2.76
# 10. DIAGNOSTIC PLOTS
par(mfrow = c(2, 2))
plot(model, which = 1:4)

par(mfrow = c(1, 1))
# 11. RINGKASAN LENGKAP
cat("\n=== RINGKASAN ANALISIS ===\n")
##
## === RINGKASAN ANALISIS ===
cat("1. Model: gpa_current = β0 + β1*attendance_rate + ε\n")
## 1. Model: gpa_current = β0 + β1*attendance_rate + ε
cat("2. Estimasi: Y =", round(intercept, 3), "+", round(slope, 3), "* X\n")
## 2. Estimasi: Y = 2.738 + 0.025 * X
cat("3. R² =", round(r_squared, 4), "(", round(r_squared*100, 1), "%)\n")
## 3. R² = 0 ( 0 %)
# Uji F (model) -> yang benar ambil p-value dari F-test
f_pvalue <- pf(summary_model$fstatistic[1],
summary_model$fstatistic[2],
summary_model$fstatistic[3],
lower.tail = FALSE)
cat("4. Uji F (model): p-value =", round(f_pvalue, 6), "\n")
## 4. Uji F (model): p-value = 0.815401
cat("5. Asumsi:\n")
## 5. Asumsi:
cat(" - Normalitas: p =", round(shapiro_test$p.value, 4), "\n")
## - Normalitas: p = 0
cat(" - Homoskedastisitas: p =", round(bp_test$p.value, 4), "\n")
## - Homoskedastisitas: p = 0.8153
cat(" - Autokorelasi: p =", round(dw_test$p.value, 4), "\n")
## - Autokorelasi: p = 0.1293
# Simpan hasil
hasil <- list(
model = model,
coefficients = coef(model),
r_squared = r_squared,
assumptions = list(
normality = shapiro_test$p.value,
homoscedasticity = bp_test$p.value,
autocorrelation = dw_test$p.value
),
confidence_intervals = conf_int
)
print("Analisis regresi linear sederhana selesai!")
## [1] "Analisis regresi linear sederhana selesai!"