データの準備

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(ggpubr)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
ankeito <- read_excel("/Users/riku/Dropbox/zemizemi/data/honjiken/ankeitodata_with_correct_ans_2022.xlsx", sheet = "Sheet1")

#ankeito %>% filter(TASK == "MAT", COMPLEXITY == "L")  %>%
#        select("Q1") %>% rename(Q1L = Q1)


Q1L <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "L")  %>%
  select("Q1") %>% rename(Q1L = Q1)

Q1M <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "M")  %>%
  select("Q1") %>% rename(Q1M = Q1)

Q1H <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "H")  %>%
  select("Q1") %>% rename(Q1H = Q1)

Q2L <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "L")  %>%
  select("Q2") %>% rename(Q2L = Q2)

Q2M <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "M")  %>%
  select("Q2") %>% rename(Q2M = Q2)

Q2H <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "H")  %>%
  select("Q2") %>% rename(Q2H = Q2)

Q3L <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "L")  %>%
  select("Q3") %>% rename(Q3L = Q3)

Q3M <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "M")  %>%
  select("Q3") %>% rename(Q3M = Q3)

Q3H <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "H")  %>%
  select("Q3") %>% rename(Q3H = Q3)

Q4L <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "L")  %>%
  select("Q4") %>% rename(Q4L = Q4)

Q4M <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "M")  %>%
  select("Q4") %>% rename(Q4M = Q4)

Q4H <- ankeito %>% filter(TASK == "MAT", COMPLEXITY == "H")  %>%
  select("Q4") %>% rename(Q4H = Q4)

#ankeito.frame <- data.frame(Q1L, Q1M, Q1H, Q2L, Q2M, Q2H, Q3L, Q3M, Q3H, Q4L, Q4M, Q4H)
ankeito.frame <- data.frame(Q1L, Q1M, Q1H, Q2L, Q2M, Q2H, Q3L, Q3M, Q3H, Q4L, Q4M, Q4H)

ankeito.frame

クラスタリング(Q1 and Q4)

とりあえずQ1(難しさ)のHとMの差分とQ4(面白さ)のHとMの差分だけを分析します.

clin <- data.frame(CL = (ankeito.frame$Q1H - ankeito.frame$Q1M), IN = (ankeito.frame$Q4H - ankeito.frame$Q4M))

######
set.seed(123)
#clin_clu <- kmeans(clin, 2, nstart = 50)
#saveRDS(clin_clu, file = "kmeans0430_n50.RData")
#
clin_clu <- readRDS("kmeans0430_n50.RData")

clin_clu$size
## [1] 24 12
library(fpc)

clin.c2 <- kmeans(clin, centers = 2, nstart = 50)

clin.c3 <- kmeans(clin, centers = 3, nstart = 50)

calinhara(clin, clin.c2$cluster)
## [1] 20.25779
calinhara(clin, clin.c3$cluster)
## [1] 22.13087
#clin.c2$size


#clin_new <- ankeito.frame %>% select("Q1M", "Q1H", "Q4M", "Q4H")
#clin_new$cluster <- clin.c2$cluster

2クラスタが3クラスタより適切です.

クラスタ1では24名,クラスタ2では12名の参加者がいます.

2クラスタ散布図

ggscatter(clin, x = "CL", y = "IN",
          color = clin_clu$cluster, alpha = 0.6)
## Warning in if (color %in% names(data) & is.null(add.params$color))
## add.params$color <- color: the condition has length > 1 and only the first
## element will be used

2クラスタのそれぞれのQ1とQ4の箱ひげ図

clin_new1 <- ankeito.frame %>% select("Q1M", "Q4M")
clin_new1$complexity <- "M"

clin_new2 <- ankeito.frame %>% select("Q1H", "Q4H")
clin_new2$complexity <- "H"

clin_new1 <- clin_new1 %>% rename(Q1 = Q1M, Q4 = Q4M)
clin_new2 <- clin_new2 %>% rename(Q1 = Q1H, Q4 = Q4H)

clin_new1$cluster <- clin_clu$cluster
clin_new2$cluster <- clin_clu$cluster

clin_new <- rbind(clin_new1, clin_new2)

clin_new$cluster <- as.factor(clin_new$cluster)

clin_new
clin_new$id <- c(1:36)

clin_new$id <- as.factor(clin_new$id)

clin_new$complexity <- as.factor(clin_new$complexity)

clin_new <- clin_new %>% 
  reorder_levels("complexity", order = c("M", "H"))

clin_new %>%
  group_by(cluster, complexity) %>%
  get_summary_stats(Q1, type = "mean_sd")
clin_new %>%
  group_by(cluster, complexity) %>%
  get_summary_stats(Q4, type = "mean_sd")
ggboxplot(
  clin_new, x = "complexity", y = "Q4", color = "cluster", palette = "jco", add = "jitter", title = "Q4 between cluster 1&2"
)

ggboxplot(
  clin_new, x = "complexity", y = "Q1", color = "cluster", palette = "jco", add = "jitter", title = "Q1 between cluster 1&2"
)

bxp <- ggboxplot(
  clin_new, x = "complexity", y = "Q1",
  color = "cluster", palette = "jco"
  )
#bxp

clin_new %>%
  group_by(cluster, complexity) %>%
  identify_outliers(Q1)
clin_new %>%
  group_by(cluster, complexity) %>%
  shapiro_test(Q1)
ggqqplot(clin_new, "Q1", ggtheme = theme_bw()) +
  facet_grid(complexity ~ cluster, labeller = "label_both")

Q1の分析

Q1 of cluster 1 between M・H (Wilcoxon rank sum test)

クラスタ1のQ1のM・H 間の比較です. 有意差は見られました.

stat.test <- clin_new_c1 %>% 
  wilcox_test(Q1 ~ complexity, paired = TRUE) %>%
  add_significance()
stat.test
clin_new_c1 %>% wilcox_effsize(Q1 ~ complexity, paired = TRUE)
stat.test <- stat.test %>% add_xy_position(x = "complexity")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q1 of cluster 2 between M・H (Wilcoxon rank sum test)

クラスタ2のQ1のM・H 間の比較です. 有意差は見られませんでした.

stat.test <- clin_new_c2 %>% 
  wilcox_test(Q1 ~ complexity, paired = TRUE) %>%
  add_significance()
stat.test
clin_new_c2 %>% wilcox_effsize(Q1 ~ complexity, paired = TRUE)
stat.test <- stat.test %>% add_xy_position(x = "complexity")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q1 of M between cluster 1・2

M条件でのクラスタ1と2のQ1の比較です. 有意差は見られませんでした.

stat.test <- clin_new_m %>% 
  wilcox_test(Q1 ~ cluster) %>%
  add_significance()
stat.test
clin_new_m %>% wilcox_effsize(Q1 ~ cluster)
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q1 of H between cluster 1・2

H条件でのクラスタ1と2のQ1の比較です. 有意差は見られました.

stat.test <- clin_new_h %>% 
  wilcox_test(Q1 ~ cluster) %>%
  add_significance()
stat.test
clin_new_h %>% wilcox_effsize(Q1 ~ cluster)
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q4の分析

Q4 of cluster 1 between M・H (Wilcoxon rank sum test)

クラスタ1のQ4のM・H 間の比較です. 有意差は見られました.

stat.test <- clin_new_c1 %>% 
  wilcox_test(Q4 ~ complexity, paired = TRUE) %>%
  add_significance()
stat.test
clin_new_c1 %>% wilcox_effsize(Q4 ~ complexity, paired = TRUE)
stat.test <- stat.test %>% add_xy_position(x = "complexity")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q4 of cluster 2 between M・H (Wilcoxon rank sum test)

クラスタ2のQ4のM・H 間の比較です. 有意差は見られませんでした.

stat.test <- clin_new_c2 %>% 
  wilcox_test(Q4 ~ complexity, paired = TRUE) %>%
  add_significance()
stat.test
clin_new_c2 %>% wilcox_effsize(Q4 ~ complexity, paired = TRUE)
stat.test <- stat.test %>% add_xy_position(x = "complexity")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q4 of M between cluster 1・2

M条件でのクラスタ1と2のQ4の比較です. 有意差は見られませんでした.

stat.test <- clin_new_m %>% 
  wilcox_test(Q4 ~ cluster) %>%
  add_significance()
stat.test
clin_new_m %>% wilcox_effsize(Q4 ~ cluster)
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

Q4 of H between cluster 1・2

H条件でのクラスタ1と2のQ4の比較です. 有意差は見られませんでした.

stat.test <- clin_new_h %>% 
  wilcox_test(Q4 ~ cluster) %>%
  add_significance()
stat.test
clin_new_h %>% wilcox_effsize(Q4 ~ cluster)
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

アンケートの分析のまとめ

LISASの分析

ankeito_lisas <- read_xlsx("/Users/riku/Dropbox/zemizemi/data/honjiken/ankeitodata_with_correct_ans_202010version.xlsx", sheet = "lisas")

mod1_cluster <- data.frame(id = c(1:36), cluster = clin_clu$cluster)

ankeito_lisas_with_cluster <- left_join(ankeito_lisas, mod1_cluster)
## Joining, by = "id"
ankeito_lisas_with_cluster$cluster <- as.factor(ankeito_lisas_with_cluster$cluster)


LISAS_mat_m <- ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "M") %>% 
  select(id, lisas, cluster) 

LISAS_mat_h <- ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "H") %>% 
  select(id, lisas, cluster) 

LISAS_mat_m$task <- "MAT_M"
LISAS_mat_h$task <- "MAT_H"

lisas_mat_p <- rbind(LISAS_mat_m, LISAS_mat_h)
lisas_mat_p$task <- as.factor(lisas_mat_p$task)

lisas_mat_p1 <- filter(lisas_mat_p, lisas_mat_p$cluster == "1")
lisas_mat_p2 <- filter(lisas_mat_p, lisas_mat_p$cluster == "2")

LISAS of cluster 1 between MAT M&H

有意差は見られました.

mat_cluster_1 <- inner_join(
ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "M", cluster == "1") %>% 
  select(id, lisas, cluster) %>% rename(MAT_M = lisas),
ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "H", cluster == "1") %>% 
  select(id, lisas, cluster) %>% rename(MAT_H = lisas))
## Joining, by = c("id", "cluster")
mat_cluster_1_long <- mat_cluster_1 %>%
  gather(key = "task", value = "lisas", MAT_M, MAT_H)


mat_cluster_1_long %>% 
  group_by(task) %>% 
  get_summary_stats(lisas, type = "mean_sd")
bxp <- ggpaired(mat_cluster_1_long, x = "task", y = "lisas", 
         order = c("MAT_M", "MAT_H"),
         ylab = "LISAS MAT cluster 1", xlab = "TASK", title = "LISAS of cluster 1 between task")
bxp

mat_cluster_1 <- mat_cluster_1 %>% mutate(differences = MAT_M - MAT_H)

mat_cluster_1 %>% identify_outliers(differences)
mat_cluster_1 %>% shapiro_test(differences) 
stat.test <- mat_cluster_1_long  %>% 
  t_test(lisas ~ task, paired = TRUE) %>%
  add_significance()
stat.test
stat.test <- stat.test %>% add_xy_position(x = "task")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed= TRUE))

LISAS of cluster 2 between MAT M&H

有意差は見られませんでした.

mat_cluster_2 <- inner_join(
ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "M", cluster == "2") %>% 
  select(id, lisas, cluster) %>% rename(MAT_M = lisas),
ankeito_lisas_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "H", cluster == "2") %>% 
  select(id, lisas, cluster) %>% rename(MAT_H = lisas))
## Joining, by = c("id", "cluster")
mat_cluster_2_long <- mat_cluster_2 %>%
  gather(key = "task", value = "lisas", MAT_M, MAT_H)


mat_cluster_2_long %>% 
  group_by(task) %>% 
  get_summary_stats(lisas, type = "mean_sd")
bxp <- ggpaired(mat_cluster_2_long, x = "task", y = "lisas", 
         order = c("MAT_M", "MAT_H"),
         ylab = "LISAS MAT cluster 2", xlab = "TASK", title = "LISAS of cluster 2 between task")
bxp

mat_cluster_2 <- mat_cluster_2 %>% mutate(differences = MAT_M - MAT_H)

mat_cluster_2 %>% identify_outliers(differences)
mat_cluster_2 %>% shapiro_test(differences) 
stat.test <- mat_cluster_2_long  %>% 
  t_test(lisas ~ task, paired = TRUE) %>%
  add_significance()
stat.test
mat_cluster_2_long  %>% cohens_d(lisas ~ task, paired = TRUE)
stat.test <- stat.test %>% add_xy_position(x = "task")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed= TRUE))

LISAS of MAT M between cluster 1&2

有意差は見られませんでした.

lisas_mat_p_m <- lisas_mat_p %>% filter(task == "MAT_M")

lisas_mat_p_m %>%
  group_by(cluster) %>%
  get_summary_stats(lisas, type = "mean_sd")
bxp <- ggboxplot(
  lisas_mat_p_m, x = "cluster", y = "lisas", 
  ylab = "LISAS", xlab = "cluster", add = "jitter", title = "LISAS of MAT M between cluster"
  )
bxp

lisas_mat_p_m %>%
  group_by(cluster) %>%
  identify_outliers(lisas)
lisas_mat_p_m %>%
  group_by(cluster) %>%
  shapiro_test(lisas)
lisas_mat_p_m %>% levene_test(lisas ~ cluster)
stat.test <- lisas_mat_p_m %>% 
  t_test(lisas ~ cluster, var.equal = TRUE) %>%
  add_significance()
stat.test
lisas_mat_p_m %>% 
  cohens_d(lisas ~ cluster, var.equal = TRUE) 
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

LISAS of MAT H between cluster 1&2

有意差は見られませんでした.

lisas_mat_p_h <- lisas_mat_p %>% filter(task == "MAT_H")

lisas_mat_p_h %>%
  group_by(cluster) %>%
  get_summary_stats(lisas, type = "mean_sd")
bxp <- ggboxplot(
  lisas_mat_p_h, x = "cluster", y = "lisas", 
  ylab = "LISAS", xlab = "cluster", add = "jitter", title = "LISAS of MAT H between cluster"
  )
bxp

lisas_mat_p_h %>%
  group_by(cluster) %>%
  identify_outliers(lisas)
lisas_mat_p_h %>%
  group_by(cluster) %>%
  shapiro_test(lisas)
lisas_mat_p_h %>% levene_test(lisas ~ cluster)
stat.test <- lisas_mat_p_h %>% 
  t_test(lisas ~ cluster, var.equal = TRUE) %>%
  add_significance()
stat.test
lisas_mat_p_h %>% 
  cohens_d(lisas ~ cluster, var.equal = TRUE) 
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

まとめ

Q1

Q1 of cluster 1 between M&H : M < H

Q1 of cluster 2 between M&H : ns

Q1 in M : ns

Q1 in H : 1 > 2

Q4

Q4 of cluster 1 between M&H : M < H

Q4 of cluster 2 between M&H : ns

Q4 in M : ns

Q4 in H : ns

LISAS

LISAS of cluster 1 between M&H : M < H

LISAS of cluster 2 between M&H : ns

LISAS in M between cluster 1&2 : ns

LISAS in H between cluster 1&2 : ns

クラスタ1は課題の認知負荷の増加を主観的に感じていて,課題の認知負荷の増加に伴い課題の面白さも増加すると思うタイプ.タスクの複雑性が上がると反応時間は遅くなる.(Q1 in H : 1 > 2)??

クラスタ2は認知負荷の増加に無感で,面白さへの認知の変動もないタイプ. タスクの複雑性が上がっても反応時間は変わらない.

Demographic data

SPOT90

ankeito_with_cluster <- inner_join(ankeito, mod1_cluster)
## Joining, by = "id"
SPOT90_p <- ankeito_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "L") %>% 
  select(SPOT90, cluster) 

SPOT90_p$cluster <- as.factor(SPOT90_p$cluster)

#ggplot(SPOT90_p, aes(x = cluster, group = cluster, y = SPOT90)) + geom_boxplot()
#t.test(select(filter(SPOT90_p, cluster == "1"), SPOT90), select(filter(SPOT90_p, cluster == "2"), SPOT90))


SPOT90_p %>%
  group_by(cluster) %>%
  get_summary_stats(SPOT90, type = "mean_sd")
bxp <- ggboxplot(
  SPOT90_p, x = "cluster", y = "SPOT90", 
  ylab = "SPOT90", xlab = "cluster", add = "jitter"
)
bxp

SPOT90_p %>%
  group_by(cluster) %>%
  identify_outliers(SPOT90)
#SPOT90_p <- SPOT90_p[c(-36),] #id 36 excluded
                    
SPOT90_p %>%
  group_by(cluster) %>%
  get_summary_stats(SPOT90, type = "mean_sd")
bxp <- ggboxplot(
  SPOT90_p, x = "cluster", y = "SPOT90", 
  ylab = "SPOT90", xlab = "cluster", add = "jitter"
)

bxp

SPOT90_p %>%
  group_by(cluster) %>%
  shapiro_test(SPOT90)
ggqqplot(SPOT90_p, x = "SPOT90", facet.by = "cluster")

SPOT90_p %>% levene_test(SPOT90 ~ cluster)
stat.test <- SPOT90_p %>% 
  t_test(SPOT90 ~ cluster, var.equal = TRUE) %>%
  add_significance()
stat.test
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

length_of_stay_in_Japan

ns

filter(ankeito_lisas_with_cluster, TASK == "MAT", COMPLEXITY =="H")
ankeito_lisas_with_cluster$Gender <- as.factor(ankeito_lisas_with_cluster$Gender)
summary(select(ankeito_lisas_with_cluster, Gender, cluster))
##     Gender   cluster
##  Female:96   1:96   
##  Male  :48   2:48
ankeito_lisas_with_cluster %>% 
  filter(TASK == "MAT", COMPLEXITY =="H", cluster == 1) %>% 
  select(Gender) %>% 
  summary()
##     Gender  
##  Female:16  
##  Male  : 8
ankeito_lisas_with_cluster %>% 
  filter(TASK == "MAT", COMPLEXITY =="H", cluster == 2) %>% 
  select(Gender) %>% 
  summary()
##     Gender 
##  Female:8  
##  Male  :4
ankeito_lisas_with_cluster %>% 
  filter(TASK == "MAT", COMPLEXITY =="H", cluster == 1) %>% 
  select(length_of_stay_in_Japan) %>% 
  summary()
##  length_of_stay_in_Japan
##  Min.   :0.500          
##  1st Qu.:1.375          
##  Median :2.500          
##  Mean   :2.233          
##  3rd Qu.:3.000          
##  Max.   :4.500
ankeito_lisas_with_cluster %>% 
  filter(TASK == "MAT", COMPLEXITY =="H", cluster == 2) %>% 
  select(length_of_stay_in_Japan) %>% 
  summary()
##  length_of_stay_in_Japan
##  Min.   :0.500          
##  1st Qu.:1.000          
##  Median :1.500          
##  Mean   :2.333          
##  3rd Qu.:4.000          
##  Max.   :5.000
lengthjapan_c <- ankeito_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "H") %>% 
  select(length_of_stay_in_Japan, cluster) 

lengthjapan_c$cluster <- as.factor(lengthjapan_c$cluster)

#ggplot(lengthjapan_c, aes(x = cluster, group = cluster, y = length_of_stay_in_Japan)) + geom_boxplot()
#t.test(select(filter(lengthjapan_c, cluster == "1"), length_of_stay_in_Japan), select(filter(lengthjapan_c, cluster == "2"), length_of_stay_in_Japan))


lengthjapan_c %>%
  group_by(cluster) %>%
  get_summary_stats(length_of_stay_in_Japan, type = "mean_sd")
bxp <- ggboxplot(
  lengthjapan_c, x = "cluster", y = "length_of_stay_in_Japan", 
  ylab = "length_of_stay_in_Japan", xlab = "cluster", add = "jitter"
)
bxp

lengthjapan_c %>%
  group_by(cluster) %>%
  identify_outliers(length_of_stay_in_Japan)
bxp <- ggboxplot(
  lengthjapan_c, x = "cluster", y = "length_of_stay_in_Japan", 
  ylab = "length_of_stay_in_Japan", xlab = "cluster", add = "jitter"
)

bxp

lengthjapan_c %>%
  group_by(cluster) %>%
  shapiro_test(length_of_stay_in_Japan)
ggqqplot(lengthjapan_c, x = "length_of_stay_in_Japan", facet.by = "cluster")

#lengthjapan_c %>% levene_test(length_of_stay_in_Japan ~ cluster)

stat.test <- lengthjapan_c %>% 
  wilcox_test(length_of_stay_in_Japan ~ cluster) %>%
  add_significance()
stat.test
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))

age leanring

ns

age_c <- ankeito_with_cluster %>% filter(TASK == "MAT", COMPLEXITY == "H") %>% 
  select(birth_year, Age_of_Japanese_learning, cluster) 

age_c$cluster <- as.factor(age_c$cluster)

#ggplot(age_c, aes(x = cluster, group = cluster, y = japaneseage)) + geom_boxplot()
#t.test(select(filter(age_c, cluster == "1"), japaneseage), select(filter(age_c, cluster == "2"), japaneseage))

age_c$japaneseage <- 2020 - age_c$birth_year - age_c$Age_of_Japanese_learning

age_c %>%
  group_by(cluster) %>%
  get_summary_stats(japaneseage, type = "mean_sd")
bxp <- ggboxplot(
  age_c, x = "cluster", y = "japaneseage", 
  ylab = "japaneseage", xlab = "cluster", add = "jitter"
)
bxp

age_c %>%
  group_by(cluster) %>%
  identify_outliers(japaneseage)
#age_c <- age_c[c(-36),] #id 36 excluded
                    



age_c %>%
  group_by(cluster) %>%
  shapiro_test(japaneseage)
ggqqplot(age_c, x = "japaneseage", facet.by = "cluster")

age_c %>% levene_test(japaneseage ~ cluster)
stat.test <- age_c %>% 
  t_test(japaneseage ~ cluster, var.equal = TRUE) %>%
  add_significance()
stat.test
stat.test <- stat.test %>% add_xy_position(x = "cluster")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))