# 設定工作目錄
setwd("C:/Users/Liz/Desktop/NTNU/Contemporary EduPsychology/sel test")
# 讀取資料
dat <- read_dta("SES/SES.dta")
datc <- read_dta("SES/cleaned_SES_data_v13.dta")
# 計算 STA_ASS01 到 STA_TRU08 變項的總分
datc <- datc %>%
mutate(STA_Total_Score = rowSums(.[, grep("STA_ASS|STA_TRU", names(datc))]))
Cohort 世代群體分析
# 比較不同 Cohort 群體的平均總分
cohort_summary <- datc %>%
group_by(CohortID) %>%
summarise(
n_C_score = n(),
mean_C_score = mean(STA_Total_Score, na.rm = TRUE),
sd_C_score = sd(STA_Total_Score, na.rm = TRUE)
)
# 進行 t 檢驗以檢查群體間總分的差異
t.test(STA_Total_Score ~ CohortID, data = datc)
##
## Welch Two Sample t-test
##
## data: STA_Total_Score by CohortID
## t = 6.1214, df = 6047.1, p-value = 9.857e-10
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## 0.8500209 1.6509437
## sample estimates:
## mean in group 1 mean in group 2
## 34.74050 33.49002
# 兩群體的平均總分有顯著差異,群體 1(Younger cohort)和群體 2(Older cohort)在 STA_Total_Score 總分上的差異具有顯著性。
# 根據結果,年輕群體的平均總分顯著高於年長群體。
t_test_table <- cohort_summary %>%
rename(Group = CohortID, N = n_C_score, Mean = mean_C_score, SD = sd_C_score) %>%
mutate(Mean = round(Mean, 2), SD = round(SD, 2))
t_test_table %>%
kable() %>%
kable_styling(full_width = FALSE, position = "center")
|
Group
|
N
|
Mean
|
SD
|
|
1
|
3106
|
34.74
|
8.51
|
|
2
|
3006
|
33.49
|
7.44
|
LANG 語言(族群)群體分析
# 比較不同 LANG 群體的平均總分
lang_summary <-datc %>%
group_by(LANG) %>%
summarise(
n_L_score = n(),
mean_L_score = mean(STA_Total_Score, na.rm = TRUE),
sd_L_score = sd(STA_Total_Score, na.rm = TRUE))
# 進行 t 檢驗以檢查群體間總分的差異
lang_t_test <- t.test(STA_Total_Score ~ LANG, data = datc)
lang_t_test
##
## Welch Two Sample t-test
##
## data: STA_Total_Score by LANG
## t = 0.20475, df = 655.92, p-value = 0.8378
## alternative hypothesis: true difference in means between group ENG and group SPA is not equal to 0
## 95 percent confidence interval:
## -0.6821797 0.8410037
## sample estimates:
## mean in group ENG mean in group SPA
## 34.13279 34.05338
# 兩群未有差異
#
t_test_table_lang <- lang_summary %>%
rename(Group = LANG, N = n_L_score, Mean = mean_L_score, SD = sd_L_score) %>%
mutate(Mean = round(Mean, 2), SD = round(SD, 2))
t_test_table_lang %>%
kable() %>%
kable_styling(full_width = FALSE, position = "center")
|
Group
|
N
|
Mean
|
SD
|
|
ENG
|
5550
|
34.13
|
7.94
|
|
SPA
|
562
|
34.05
|
8.84
|
Cohort 和 LANG 的交互作用分析
# 描述性統計
interaction_summary <- datc %>%
group_by(CohortID, LANG) %>%
summarise(
n_score = n(),
mean_score = mean(STA_Total_Score, na.rm = TRUE),
sd_score = sd(STA_Total_Score, na.rm = TRUE))
# 進行二因素方差分析 (two-way ANOVA)
anova_res <- aov(STA_Total_Score ~ CohortID * LANG, data = datc)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## CohortID 1 2389 2388.7 37.355 1.05e-09 ***
## LANG 1 70 70.0 1.095 0.29545
## CohortID:LANG 1 559 558.8 8.739 0.00313 **
## Residuals 6108 390581 63.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 進行 Tukey's HSD 測試
tukey_res <- TukeyHSD(anova_res, "CohortID:LANG")
tukey_res
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = STA_Total_Score ~ CohortID * LANG, data = datc)
##
## $`CohortID:LANG`
## diff lwr upr p adj
## 2:ENG-1:ENG -1.1042108 -1.6560657 -0.5523559 0.0000017
## 1:SPA-1:ENG 0.3181466 -0.7791407 1.4154340 0.8787948
## 2:SPA-1:ENG -3.0891602 -4.7660650 -1.4122554 0.0000134
## 1:SPA-2:ENG 1.4223574 0.3286766 2.5160383 0.0046440
## 2:SPA-2:ENG -1.9849494 -3.6594965 -0.3104023 0.0124613
## 2:SPA-1:SPA -3.4073068 -5.3317818 -1.4828319 0.0000324
#
interaction_summary <- interaction_summary %>%
rename(N = n_score, Mean = mean_score, SD = sd_score) %>%
mutate(Mean = round(Mean, 2), SD = round(SD, 2))
interaction_summary %>%
kable() %>%
kable_styling(full_width = FALSE, position = "center")
|
CohortID
|
LANG
|
N
|
Mean
|
SD
|
|
1
|
ENG
|
2703
|
34.70
|
8.39
|
|
1
|
SPA
|
403
|
35.02
|
9.32
|
|
2
|
ENG
|
2847
|
33.60
|
7.45
|
|
2
|
SPA
|
159
|
31.61
|
6.95
|
群體 2 (ENG) 的平均總分顯著低於群體 1 (ENG)。
群體 1 (SPA) 和群體 1 (ENG) 之間沒有顯著差異。
群體 2 (SPA) 的平均總分顯著低於群體 1 (ENG)。
群體 1 (SPA) 的平均總分顯著高於群體 2 (ENG)。
群體 2 (SPA) 的平均總分顯著低於群體 2 (ENG)。
群體 2 (SPA) 的平均總分顯著低於群體 1 (SPA 。
Plot
# 繪製交互作用圖
interaction_boxplot <- ggplot(datc, aes(x = as.factor(CohortID), y = STA_Total_Score, fill = LANG)) +
geom_boxplot() +
labs(title = "Interaction between CohortID and LANG on Total Score",
x = "CohortID",
y = "Total Score",
fill = "LANG") +
theme_minimal()
print(interaction_boxplot)

interaction <- ggplot(interaction_summary, aes(x = as.factor(CohortID), y = Mean, group = LANG, color = LANG)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD), width = 0.2) +
labs(title = "Interaction between CohortID and LANG on Total Score",
x = "CohortID",
y = "Mean Total Score") +
theme_minimal()
print(interaction)
