mandiri

# 1. MEMUAT PACKAGE DAN DATA
library(ggplot2)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(car)

## Loading required package: carData

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Membaca data
data <- read.csv("~/Kuliah/sem/analisis regresi/university_student_retention_dataset_2134.csv")

# Melihat apakah attendance_rate (X) berpengaruh terhadap gpa_current (Y)

# 2. DATA CLEANING 
data_clean <- data %>% filter(!is.na(gpa_current), !is.na(attendance_rate)) 

# 3. ANALISIS DESKRIPTIF
cat("\n=== Statistik Deskriptif ===\n")

## 
## === Statistik Deskriptif ===

summary(data_clean[, c("gpa_current", "attendance_rate")])

##   gpa_current    attendance_rate 
##  Min.   :1.500   Min.   :0.5000  
##  1st Qu.:2.140   1st Qu.:0.6400  
##  Median :2.770   Median :0.7500  
##  Mean   :2.757   Mean   :0.7528  
##  3rd Qu.:3.370   3rd Qu.:0.8700  
##  Max.   :4.000   Max.   :1.0000

# Scatter plot
ggplot(data_clean, aes(x = attendance_rate, y = gpa_current)) +
  geom_point(color = "blue", size = 3) +
  labs(title = "Hubungan Attendance Rate dan GPA Current",
       x = "Attendance Rate",
       y = "GPA Current") +
  theme_minimal()

# Korelasi Pearson
cor_test <- cor.test(data_clean$attendance_rate, data_clean$gpa_current)
cat("\n=== Korelasi Pearson ===\n")

## 
## === Korelasi Pearson ===

cat("Korelasi =", round(cor_test$estimate, 4), "\n")

## Korelasi = 0.0051

cat("p-value =", round(cor_test$p.value, 6), "\n")

## p-value = 0.815401

# 4. MEMBANGUN MODEL REGRESI
model <- lm(gpa_current ~ attendance_rate, data = data_clean)

cat("\n=== Ringkasan Model Regresi ===\n")

## 
## === Ringkasan Model Regresi ===

summary(model)

## 
## Call:
## lm(formula = gpa_current ~ attendance_rate, data = data_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26256 -0.61984  0.01491  0.61409  1.24327 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.73798    0.08318  32.916   <2e-16 ***
## attendance_rate  0.02534    0.10855   0.233    0.815    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7201 on 2132 degrees of freedom
## Multiple R-squared:  2.557e-05,  Adjusted R-squared:  -0.0004435 
## F-statistic: 0.05452 on 1 and 2132 DF,  p-value: 0.8154

# 5. UJI ASUMSI REGRESI LINEAR
cat("\n=== UJI ASUMSI REGRESI LINEAR ===\n")

## 
## === UJI ASUMSI REGRESI LINEAR ===

# 5.1 Normalitas residual
shapiro_test <- shapiro.test(residuals(model))
cat("\n1. UJI NORMALITAS (Shapiro-Wilk):\n")

## 
## 1. UJI NORMALITAS (Shapiro-Wilk):

cat("   W =", round(shapiro_test$statistic, 4), "\n")

##    W = 0.9549

cat("   p-value =", round(shapiro_test$p.value, 6), "\n")

##    p-value = 0

if(shapiro_test$p.value > 0.05) {
  cat("   Keputusan: Residual berdistribusi normal\n")
} else {
  cat("   Keputusan: Residual tidak normal\n")
}

##    Keputusan: Residual tidak normal

# Q-Q Plot
qqnorm(residuals(model), main = "Q-Q Plot Residual")
qqline(residuals(model), col = "red")

# 5.2 Homoskedastisitas
bp_test <- bptest(model)
cat("\n2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):\n")

## 
## 2. UJI HOMOSKEDASTISITAS (Breusch-Pagan):

cat("   LM =", round(bp_test$statistic, 4), "\n")

##    LM = 0.0546

cat("   p-value =", round(bp_test$p.value, 6), "\n")

##    p-value = 0.815318

if(bp_test$p.value > 0.05) {
  cat("   Keputusan: Varian residual homogen\n")
} else {
  cat("   Keputusan: Ada heteroskedastisitas\n")
}

##    Keputusan: Varian residual homogen

# Plot residual vs fitted
plot(fitted(model), residuals(model),
     main = "Residual vs Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     pch = 19, col = "blue")
abline(h = 0, col = "red", lty = 2)

# 5.3 Autokorelasi
dw_test <- dwtest(model)
cat("\n3. UJI AUTOKORELASI (Durbin-Watson):\n")

## 
## 3. UJI AUTOKORELASI (Durbin-Watson):

cat("   DW =", round(dw_test$statistic, 4), "\n")

##    DW = 1.9511

cat("   p-value =", round(dw_test$p.value, 6), "\n")

##    p-value = 0.129258

if(dw_test$p.value > 0.05) {
  cat("   Keputusan: Tidak ada autokorelasi\n")
} else {
  cat("   Keputusan: Ada autokorelasi\n")
}

##    Keputusan: Tidak ada autokorelasi

# 6. INTERPRETASI KOEFISIEN
cat("\n=== INTERPRETASI KOEFISIEN ===\n")

## 
## === INTERPRETASI KOEFISIEN ===

intercept <- coef(model)[1]
slope <- coef(model)[2]

cat("Persamaan Regresi:\n")

## Persamaan Regresi:

cat("gpa_current =", round(intercept, 3), "+", round(slope, 3), "* attendance_rate\n")

## gpa_current = 2.738 + 0.025 * attendance_rate

cat("\nInterpretasi:\n")

## 
## Interpretasi:

cat("1. Intercept (β0 =", round(intercept, 3), "):\n")

## 1. Intercept (β0 = 2.738 ):

cat("   GPA saat attendance_rate = 0 adalah", round(intercept, 3), "\n")

##    GPA saat attendance_rate = 0 adalah 2.738

cat("2. Slope (β1 =", round(slope, 3), "):\n")

## 2. Slope (β1 = 0.025 ):

cat("   Setiap kenaikan 1 unit attendance_rate, GPA berubah sebesar",
    round(slope, 3), "poin\n")

##    Setiap kenaikan 1 unit attendance_rate, GPA berubah sebesar 0.025 poin

# 7. INTERVAL KEPERCAYAAN
cat("\n=== INTERVAL KEPERCAYAAN 95% ===\n")

## 
## === INTERVAL KEPERCAYAAN 95% ===

conf_int <- confint(model, level = 0.95)
cat("Intercept: [", round(conf_int[1,1], 3), ", ", round(conf_int[1,2], 3), "]\n", sep="")

## Intercept: [2.575, 2.901]

cat("Slope:     [", round(conf_int[2,1], 3), ", ", round(conf_int[2,2], 3), "]\n", sep="")

## Slope:     [-0.188, 0.238]

# 8. UJI HIPOTESIS UNTUK SLOPE
summary_model <- summary(model)
r_squared <- summary_model$r.squared
slope_pvalue <- summary_model$coefficients[2, 4]


cat("\n=== UJI HIPOTESIS (β1) ===\n")

## 
## === UJI HIPOTESIS (β1) ===

cat("H0: β1 = 0 (tidak ada hubungan linear)\n")

## H0: β1 = 0 (tidak ada hubungan linear)

cat("H1: β1 ≠ 0 (ada hubungan linear)\n")

## H1: β1 ≠ 0 (ada hubungan linear)

cat("p-value =", round(slope_pvalue, 6), "\n")

## p-value = 0.815401

if(slope_pvalue < 0.05) {
  cat("Keputusan: Tolak H0, hubungan signifikan\n")
} else {
  cat("Keputusan: Gagal tolak H0, hubungan tidak signifikan\n")
}

## Keputusan: Gagal tolak H0, hubungan tidak signifikan

# 9. KOEFISIEN DETERMINASI
new_data <- data.frame(attendance_rate = c(0.6, 0.8, 1.0))

prediction <- predict(model, newdata = new_data, interval = "confidence", level = 0.95)

cat("\n=== PREDIKSI + INTERVAL KEPERCAYAAN 95% ===\n")

## 
## === PREDIKSI + INTERVAL KEPERCAYAAN 95% ===

for(i in 1:nrow(new_data)) {
  cat("Attendance_rate =", new_data$attendance_rate[i], "\n")
  cat("  Prediksi GPA =", round(prediction[i, "fit"], 3), "\n")
  cat("  CI 95%: [", round(prediction[i, "lwr"], 3), ", ", round(prediction[i, "upr"], 3), "]\n\n", sep = "")
}

## Attendance_rate = 0.6 
##   Prediksi GPA = 2.753 
##   CI 95%: [2.709, 2.798]
## 
## Attendance_rate = 0.8 
##   Prediksi GPA = 2.758 
##   CI 95%: [2.726, 2.79]
## 
## Attendance_rate = 1 
##   Prediksi GPA = 2.763 
##   CI 95%: [2.702, 2.824]

# Tampilkan tabel prediksi
hasil_prediksi <- data.frame(new_data, prediction)
print(hasil_prediksi)

##   attendance_rate      fit      lwr      upr
## 1             0.6 2.753186 2.708556 2.797815
## 2             0.8 2.758254 2.726075 2.790434
## 3             1.0 2.763323 2.702461 2.824186

# 10. VISUALISASI MODEL
ggplot(data_clean, aes(x = attendance_rate, y = gpa_current)) +
  geom_point(color = "blue", size = 3) +
  geom_smooth(method = "lm", se = TRUE, color = "red", fill = "pink") +
  labs(title = "Garis Regresi Linear",
       subtitle = paste("Y =", round(intercept, 2), "+", round(slope, 2), "X"),
       x = "Attendance Rate",
       y = "GPA Current") +
  theme_minimal()

## `geom_smooth()` using formula = 'y ~ x'

# 9. PREDIKSI
new_data <- data.frame(attendance_rate = c(0.6, 0.8))
prediction <- predict(model, newdata = new_data, interval = "confidence")

cat("\n=== PREDIKSI ===\n")

## 
## === PREDIKSI ===

cat("Untuk attendance_rate = 0.6, prediksi GPA =", round(prediction[1, "fit"], 2), "\n")

## Untuk attendance_rate = 0.6, prediksi GPA = 2.75

cat("Untuk attendance_rate = 0.8, prediksi GPA =", round(prediction[2, "fit"], 2), "\n")

## Untuk attendance_rate = 0.8, prediksi GPA = 2.76

# 10. DIAGNOSTIC PLOTS
par(mfrow = c(2, 2))
plot(model, which = 1:4)

par(mfrow = c(1, 1))

# 11. RINGKASAN LENGKAP
cat("\n=== RINGKASAN ANALISIS ===\n")

## 
## === RINGKASAN ANALISIS ===

cat("1. Model: gpa_current = β0 + β1*attendance_rate + ε\n")

## 1. Model: gpa_current = β0 + β1*attendance_rate + ε

cat("2. Estimasi: Y =", round(intercept, 3), "+", round(slope, 3), "* X\n")

## 2. Estimasi: Y = 2.738 + 0.025 * X

cat("3. R² =", round(r_squared, 4), "(", round(r_squared*100, 1), "%)\n")

## 3. R² = 0 ( 0 %)

# Uji F (model) -> yang benar ambil p-value dari F-test
f_pvalue <- pf(summary_model$fstatistic[1],
               summary_model$fstatistic[2],
               summary_model$fstatistic[3],
               lower.tail = FALSE)

cat("4. Uji F (model): p-value =", round(f_pvalue, 6), "\n")

## 4. Uji F (model): p-value = 0.815401

cat("5. Asumsi:\n")

## 5. Asumsi:

cat("   - Normalitas: p =", round(shapiro_test$p.value, 4), "\n")

##    - Normalitas: p = 0

cat("   - Homoskedastisitas: p =", round(bp_test$p.value, 4), "\n")

##    - Homoskedastisitas: p = 0.8153

cat("   - Autokorelasi: p =", round(dw_test$p.value, 4), "\n")

##    - Autokorelasi: p = 0.1293

# Simpan hasil
hasil <- list(
  model = model,
  coefficients = coef(model),
  r_squared = r_squared,
  assumptions = list(
    normality = shapiro_test$p.value,
    homoscedasticity = bp_test$p.value,
    autocorrelation = dw_test$p.value
  ),
  confidence_intervals = conf_int
)

print("Analisis regresi linear sederhana selesai!")

## [1] "Analisis regresi linear sederhana selesai!"

mandiri

dinda

2026-02-11