# 設定工作目錄
setwd("C:/Users/Liz/Desktop/NTNU/Contemporary EduPsychology/sel test")

# 讀取資料
dat <- read_dta("SES/SES.dta")
datc <- read_dta("SES/cleaned_SES_data_v13.dta")

# 計算 STA_ASS01 到 STA_TRU08 變項的總分
datc <- datc %>%
  mutate(STA_Total_Score = rowSums(.[, grep("STA_ASS|STA_TRU", names(datc))]))

Cohort 世代群體分析

# 比較不同 Cohort 群體的平均總分
cohort_summary <- datc %>%
  group_by(CohortID) %>%
  summarise(
    n_C_score = n(),
    mean_C_score = mean(STA_Total_Score, na.rm = TRUE),
    sd_C_score = sd(STA_Total_Score, na.rm = TRUE)
  )

# 進行 t 檢驗以檢查群體間總分的差異
t.test(STA_Total_Score ~ CohortID, data = datc)
## 
##  Welch Two Sample t-test
## 
## data:  STA_Total_Score by CohortID
## t = 6.1214, df = 6047.1, p-value = 9.857e-10
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  0.8500209 1.6509437
## sample estimates:
## mean in group 1 mean in group 2 
##        34.74050        33.49002
# 兩群體的平均總分有顯著差異,群體 1(Younger cohort)和群體 2(Older cohort)在 STA_Total_Score 總分上的差異具有顯著性。
# 根據結果,年輕群體的平均總分顯著高於年長群體。

t_test_table <- cohort_summary %>%
  rename(Group = CohortID, N = n_C_score, Mean = mean_C_score, SD = sd_C_score) %>%
  mutate(Mean = round(Mean, 2), SD = round(SD, 2))

t_test_table %>%
  kable() %>%
  kable_styling(full_width = FALSE, position = "center")
Group N Mean SD
1 3106 34.74 8.51
2 3006 33.49 7.44

LANG 語言(族群)群體分析

# 比較不同 LANG 群體的平均總分
lang_summary <-datc %>%
  group_by(LANG) %>%
  summarise(
    n_L_score = n(),
    mean_L_score = mean(STA_Total_Score, na.rm = TRUE),
    sd_L_score = sd(STA_Total_Score, na.rm = TRUE))


# 進行 t 檢驗以檢查群體間總分的差異
lang_t_test <- t.test(STA_Total_Score ~ LANG, data = datc)
lang_t_test
## 
##  Welch Two Sample t-test
## 
## data:  STA_Total_Score by LANG
## t = 0.20475, df = 655.92, p-value = 0.8378
## alternative hypothesis: true difference in means between group ENG and group SPA is not equal to 0
## 95 percent confidence interval:
##  -0.6821797  0.8410037
## sample estimates:
## mean in group ENG mean in group SPA 
##          34.13279          34.05338
# 兩群未有差異

#
t_test_table_lang <- lang_summary %>%
  rename(Group = LANG, N = n_L_score, Mean = mean_L_score, SD = sd_L_score) %>%
  mutate(Mean = round(Mean, 2), SD = round(SD, 2))

t_test_table_lang %>%
  kable() %>%
  kable_styling(full_width = FALSE, position = "center")
Group N Mean SD
ENG 5550 34.13 7.94
SPA 562 34.05 8.84

Cohort 和 LANG 的交互作用分析

# 描述性統計
interaction_summary <- datc %>%
  group_by(CohortID, LANG) %>%
  summarise(
    n_score = n(),
    mean_score = mean(STA_Total_Score, na.rm = TRUE),
    sd_score = sd(STA_Total_Score, na.rm = TRUE))

# 進行二因素方差分析 (two-way ANOVA)
anova_res <- aov(STA_Total_Score ~ CohortID * LANG, data = datc)
summary(anova_res)
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## CohortID         1   2389  2388.7  37.355 1.05e-09 ***
## LANG             1     70    70.0   1.095  0.29545    
## CohortID:LANG    1    559   558.8   8.739  0.00313 ** 
## Residuals     6108 390581    63.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 進行 Tukey's HSD 測試
tukey_res <- TukeyHSD(anova_res, "CohortID:LANG")
tukey_res
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = STA_Total_Score ~ CohortID * LANG, data = datc)
## 
## $`CohortID:LANG`
##                   diff        lwr        upr     p adj
## 2:ENG-1:ENG -1.1042108 -1.6560657 -0.5523559 0.0000017
## 1:SPA-1:ENG  0.3181466 -0.7791407  1.4154340 0.8787948
## 2:SPA-1:ENG -3.0891602 -4.7660650 -1.4122554 0.0000134
## 1:SPA-2:ENG  1.4223574  0.3286766  2.5160383 0.0046440
## 2:SPA-2:ENG -1.9849494 -3.6594965 -0.3104023 0.0124613
## 2:SPA-1:SPA -3.4073068 -5.3317818 -1.4828319 0.0000324
# 
interaction_summary <- interaction_summary %>%
  rename(N = n_score, Mean = mean_score, SD = sd_score) %>%
  mutate(Mean = round(Mean, 2), SD = round(SD, 2))

interaction_summary %>%
  kable() %>%
  kable_styling(full_width = FALSE, position = "center")
CohortID LANG N Mean SD
1 ENG 2703 34.70 8.39
1 SPA 403 35.02 9.32
2 ENG 2847 33.60 7.45
2 SPA 159 31.61 6.95

群體 2 (ENG) 的平均總分顯著低於群體 1 (ENG)。
群體 1 (SPA) 和群體 1 (ENG) 之間沒有顯著差異。
群體 2 (SPA) 的平均總分顯著低於群體 1 (ENG)。
群體 1 (SPA) 的平均總分顯著高於群體 2 (ENG)。
群體 2 (SPA) 的平均總分顯著低於群體 2 (ENG)。
群體 2 (SPA) 的平均總分顯著低於群體 1 (SPA 。

Plot

# 繪製交互作用圖
interaction_boxplot <- ggplot(datc, aes(x = as.factor(CohortID), y = STA_Total_Score, fill = LANG)) +
  geom_boxplot() +
  labs(title = "Interaction between CohortID and LANG on Total Score",
       x = "CohortID",
       y = "Total Score",
       fill = "LANG") +
  theme_minimal()

print(interaction_boxplot)

interaction <- ggplot(interaction_summary, aes(x = as.factor(CohortID), y = Mean, group = LANG, color = LANG)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD), width = 0.2) +
  labs(title = "Interaction between CohortID and LANG on Total Score",
       x = "CohortID",
       y = "Mean Total Score") +
  theme_minimal()

print(interaction)