#1. LOAD DATA
df <- read.csv2(file.choose())

# Cek data
names(df)
##  [1] "school"     "sex"        "age"        "address"    "famsize"   
##  [6] "Pstatus"    "Medu"       "Fedu"       "Mjob"       "Fjob"      
## [11] "reason"     "guardian"   "traveltime" "studytime"  "failures"  
## [16] "schoolsup"  "famsup"     "paid"       "activities" "nursery"   
## [21] "higher"     "internet"   "romantic"   "famrel"     "freetime"  
## [26] "goout"      "Dalc"       "Walc"       "health"     "absences"  
## [31] "G1"         "G2"         "G3"
str(df)
## 'data.frame':    395 obs. of  33 variables:
##  $ school    : chr  "GP" "GP" "GP" "GP" ...
##  $ sex       : chr  "F" "F" "F" "F" ...
##  $ age       : int  18 17 15 15 16 16 16 17 15 15 ...
##  $ address   : chr  "U" "U" "U" "U" ...
##  $ famsize   : chr  "GT3" "GT3" "LE3" "GT3" ...
##  $ Pstatus   : chr  "A" "T" "T" "T" ...
##  $ Medu      : int  4 1 1 4 3 4 2 4 3 3 ...
##  $ Fedu      : int  4 1 1 2 3 3 2 4 2 4 ...
##  $ Mjob      : chr  "at_home" "at_home" "at_home" "health" ...
##  $ Fjob      : chr  "teacher" "other" "other" "services" ...
##  $ reason    : chr  "course" "course" "other" "home" ...
##  $ guardian  : chr  "mother" "father" "mother" "mother" ...
##  $ traveltime: int  2 1 1 1 1 1 1 2 1 1 ...
##  $ studytime : int  2 2 2 3 2 2 2 2 2 2 ...
##  $ failures  : int  0 0 3 0 0 0 0 0 0 0 ...
##  $ schoolsup : chr  "yes" "no" "yes" "no" ...
##  $ famsup    : chr  "no" "yes" "no" "yes" ...
##  $ paid      : chr  "no" "no" "yes" "yes" ...
##  $ activities: chr  "no" "no" "no" "yes" ...
##  $ nursery   : chr  "yes" "no" "yes" "yes" ...
##  $ higher    : chr  "yes" "yes" "yes" "yes" ...
##  $ internet  : chr  "no" "yes" "yes" "yes" ...
##  $ romantic  : chr  "no" "no" "no" "yes" ...
##  $ famrel    : int  4 5 4 3 4 5 4 4 4 5 ...
##  $ freetime  : int  3 3 3 2 3 4 4 1 2 5 ...
##  $ goout     : int  4 3 2 2 2 2 4 4 2 1 ...
##  $ Dalc      : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ Walc      : int  1 1 3 1 2 2 1 1 1 1 ...
##  $ health    : int  3 3 3 5 5 5 3 1 1 5 ...
##  $ absences  : int  6 4 10 2 4 10 0 6 0 0 ...
##  $ G1        : int  5 5 7 15 6 15 12 6 16 14 ...
##  $ G2        : int  6 5 8 14 10 15 12 5 18 15 ...
##  $ G3        : int  6 6 10 15 10 15 11 6 19 15 ...
#2. PREPROCESSING
# Factor
df$sex <- as.factor(df$sex)
df$schoolsup <- as.factor(df$schoolsup)
df$internet <- as.factor(df$internet)
df$famsup <- as.factor(df$famsup)

# Numeric
df$Medu <- as.numeric(df$Medu)
df$studytime <- as.numeric(df$studytime)
df$absences <- as.numeric(df$absences)
df$G3 <- as.numeric(df$G3)
#3. MANOVA
manova_model <- manova(cbind(G1, G2, G3) ~ sex + schoolsup, data = df)
summary(manova_model, test = "Pillai")
##            Df   Pillai approx F num Df den Df    Pr(>F)    
## sex         1 0.011296   1.4853      3    390    0.2181    
## schoolsup   1 0.065415   9.0992      3    390 7.796e-06 ***
## Residuals 392                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)

# Ubah ke long format
df_long <- melt(df, 
                id.vars = c("sex", "schoolsup"),
                measure.vars = c("G1", "G2", "G3"),
                variable.name = "Ujian",
                value.name = "Nilai")

# Plot
ggplot(df_long, aes(x = Ujian, y = Nilai, 
                    color = schoolsup, group = schoolsup)) +
  stat_summary(fun = mean, geom = "line", size = 1.2) +
  stat_summary(fun = mean, geom = "point", size = 3) +
  labs(
    title = "Profil Rata-rata Nilai Akademik Berdasarkan Dukungan Sekolah",
    x = "Komponen Nilai",
    y = "Rata-rata Nilai",
    color = "School Support"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#4. TWO-WAY MANOVA
manova_interaksi <- manova(cbind(G1, G2, G3) ~ sex * schoolsup, data = df)
summary(manova_interaksi, test = "Pillai")
##                Df   Pillai approx F num Df den Df    Pr(>F)    
## sex             1 0.011315   1.4839      3    389    0.2184    
## schoolsup       1 0.065454   9.0817      3    389 7.991e-06 ***
## sex:schoolsup   1 0.005713   0.7450      3    389    0.5258    
## Residuals     391                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(reshape2)

# Ubah ke long format
df_long <- melt(df,
                id.vars = c("sex", "schoolsup"),
                measure.vars = c("G1", "G2", "G3"),
                variable.name = "Ujian",
                value.name = "Nilai")

# Interaction plot
ggplot(df_long, aes(x = Ujian, y = Nilai,
                    color = sex, linetype = schoolsup,
                    group = interaction(sex, schoolsup))) +
  stat_summary(fun = mean, geom = "line", size = 1.1) +
  stat_summary(fun = mean, geom = "point", size = 3) +
  labs(
    title = "Interaksi Jenis Kelamin dan Dukungan Sekolah terhadap Nilai Akademik",
    x = "Komponen Nilai",
    y = "Rata-rata Nilai",
    color = "Sex",
    linetype = "School Support"
  ) +
  theme_minimal()

#5. MANCOVA
mancova_model <- manova(
  cbind(G1, G2, G3) ~ sex + schoolsup + internet + famsup + Medu + studytime + absences,
  data = df
)
summary(mancova_model, test = "Pillai")
##            Df   Pillai approx F num Df den Df    Pr(>F)    
## sex         1 0.012155   1.5791      3    385 0.1939358    
## schoolsup   1 0.069135   9.5313      3    385 4.369e-06 ***
## internet    1 0.017832   2.3300      3    385 0.0739564 .  
## famsup      1 0.006776   0.8755      3    385 0.4538239    
## Medu        1 0.047993   6.4697      3    385 0.0002787 ***
## studytime   1 0.044005   5.9073      3    385 0.0005992 ***
## absences    1 0.023534   3.0930      3    385 0.0269793 *  
## Residuals 387                                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(ggplot2)
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.3
# Plot 1: studytime
p1 <- ggplot(df, aes(x = studytime, y = G3, color = schoolsup)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "(a) Study Time vs G3",
       x = "Study Time", y = "G3") +
  theme_minimal()

# Plot 2: absences
p2 <- ggplot(df, aes(x = absences, y = G3, color = schoolsup)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(title = "(b) Absences vs G3",
       x = "Absences", y = "G3") +
  theme_minimal()

# Plot 3: Medu
p3 <- ggplot(df, aes(x = factor(Medu), y = G3)) +
  geom_boxplot(fill = "skyblue") +
  labs(title = "(c) Medu vs G3",
       x = "Pendidikan Ibu", y = "G3") +
  theme_minimal()

# Gabungan
p1 + p2 + p3
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#6. ANCOVA
# ANCOVA
ancova_model <- aov(G3 ~ sex + schoolsup + internet + famsup + Medu + studytime + absences, data = df)
summary(ancova_model)
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## sex           1     89    88.5   4.507  0.0344 *  
## schoolsup     1     40    39.5   2.013  0.1567    
## internet      1     73    72.7   3.702  0.0551 .  
## famsup        1      7     6.8   0.345  0.5574    
## Medu          1    337   337.2  17.169 4.2e-05 ***
## studytime     1    119   118.6   6.039  0.0144 *  
## absences      1      6     6.1   0.311  0.5775    
## Residuals   387   7600    19.6                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.3
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggplot2)
library(gridExtra)

emm <- emmeans(ancova_model, ~ sex)
emm_df <- as.data.frame(emm)

# (a) Studytime vs G3 + sex
p1 <- ggplot(df, aes(x = studytime, y = G3, color = sex)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "(a) Study Time vs G3",
    x = "Study Time",
    y = "Nilai Akhir (G3)"
  ) +
  theme_minimal()

# (b) Medu vs G3
p2 <- ggplot(df, aes(x = as.factor(Medu), y = G3)) +
  geom_boxplot(fill = "skyblue") +
  labs(
    title = "(b) Pendidikan Ibu (Medu) vs G3",
    x = "Tingkat Pendidikan Ibu",
    y = "Nilai Akhir (G3)"
  ) +
  theme_minimal()

# (c) Adjusted Mean (sex)
p3 <- ggplot(emm_df, aes(x = sex, y = emmean)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  labs(
    title = "(c) Adjusted Mean G3 (by sex)",
    x = "Jenis Kelamin",
    y = "Adjusted Mean G3"
  ) +
  theme_minimal()

# Gabungan
grid.arrange(p1, p2, p3, ncol = 3)
## `geom_smooth()` using formula = 'y ~ x'

# 8. UJI ASUMSI
# Normalitas
shapiro.test(residuals(ancova_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(ancova_model)
## W = 0.94438, p-value = 5.158e-11
# Homogenitas varians
library(car)
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
leveneTest(G3 ~ sex * schoolsup, data = df)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   3  3.4578 0.01655 *
##       391                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogenitas kovarians (MANOVA)
library(biotools)
## Warning: package 'biotools' was built under R version 4.5.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:patchwork':
## 
##     area
## ---
## biotools version 4.3
boxM(df[, c("G1","G2","G3")], df$sex)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  df[, c("G1", "G2", "G3")]
## Chi-Sq (approx.) = 18.232, df = 6, p-value = 0.005677
#9. DESKRIPSI DATA
summary(df)
##     school          sex          age         address         
##  Length:395         F:208   Min.   :15.0   Length:395        
##  Class :character   M:187   1st Qu.:16.0   Class :character  
##  Mode  :character           Median :17.0   Mode  :character  
##                             Mean   :16.7                     
##                             3rd Qu.:18.0                     
##                             Max.   :22.0                     
##    famsize            Pstatus               Medu            Fedu      
##  Length:395         Length:395         Min.   :0.000   Min.   :0.000  
##  Class :character   Class :character   1st Qu.:2.000   1st Qu.:2.000  
##  Mode  :character   Mode  :character   Median :3.000   Median :2.000  
##                                        Mean   :2.749   Mean   :2.522  
##                                        3rd Qu.:4.000   3rd Qu.:3.000  
##                                        Max.   :4.000   Max.   :4.000  
##      Mjob               Fjob              reason            guardian        
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    traveltime      studytime        failures      schoolsup famsup   
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   no :344   no :153  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.0000   yes: 51   yes:242  
##  Median :1.000   Median :2.000   Median :0.0000                      
##  Mean   :1.448   Mean   :2.035   Mean   :0.3342                      
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.0000                      
##  Max.   :4.000   Max.   :4.000   Max.   :3.0000                      
##      paid            activities          nursery             higher         
##  Length:395         Length:395         Length:395         Length:395        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  internet    romantic             famrel         freetime         goout      
##  no : 66   Length:395         Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  yes:329   Class :character   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:2.000  
##            Mode  :character   Median :4.000   Median :3.000   Median :3.000  
##                               Mean   :3.944   Mean   :3.235   Mean   :3.109  
##                               3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:4.000  
##                               Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       Dalc            Walc           health         absences     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:3.000   1st Qu.: 0.000  
##  Median :1.000   Median :2.000   Median :4.000   Median : 4.000  
##  Mean   :1.481   Mean   :2.291   Mean   :3.554   Mean   : 5.709  
##  3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:5.000   3rd Qu.: 8.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :75.000  
##        G1              G2              G3       
##  Min.   : 3.00   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 8.00   1st Qu.: 9.00   1st Qu.: 8.00  
##  Median :11.00   Median :11.00   Median :11.00  
##  Mean   :10.91   Mean   :10.71   Mean   :10.42  
##  3rd Qu.:13.00   3rd Qu.:13.00   3rd Qu.:14.00  
##  Max.   :19.00   Max.   :19.00   Max.   :20.00
# Karakteristik Data
library(ggplot2)

ggplot(df, aes(x = absences, y = G3)) +
  geom_point(alpha = 0.5, color = "steelblue") +   # transparansi biar ga numpuk
  geom_smooth(method = "lm", color = "red", se = TRUE) +  # garis tren
  labs(
    title = "Hubungan Absensi dengan Nilai Akhir (G3)",
    x = "Jumlah Absensi",
    y = "Nilai Akhir (G3)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'