## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-4
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
PERMANOVA (Permutational Multivariate Analysis of Variance) PERMANOVA is a non-parametric method used to compare groups of multivariate samples. It’s often applied in ecological studies, especially when dealing with species abundance data, but its application is not restricted to that field.
How it works: PERMANOVA tests the null hypothesis that the centroids of two or more groups are equivalent in multivariate space. It operates by partitioning distance matrices among sources of variation, relying on permutations to determine the statistical significance. Assumptions: Homogeneity of group dispersions: The variability within each group should be approximately the same. This assumption is equivalent to the homogeneity of variances (homoscedasticity) in ANOVA. The distances among the observations are directly interpretable and meaningful. MANOVA (Multivariate Analysis of Variance) MANOVA is a parametric method that extends the capabilities of analysis of variance (ANOVA) to allow for the analysis of multiple dependent variables.
Assumptions: Multivariate Normality: Each combination of levels from the independent variables results in a multivariate normal distribution of the dependent variables. Homogeneity of Covariance: The covariance matrices of the dependent variables are equal across groups (often tested using Box’s M test). Linearity: Linear relationships should exist between all pairs of dependent variables for each group of the independent variable. Homogeneity of Variances: For each dependent variable, the variances should be equal across groups (homoscedasticity). Differences in Assumptions: Parametric vs. Non-parametric: MANOVA is parametric and assumes multivariate normality, linearity, and homogeneity of variances and covariances. PERMANOVA is non-parametric and does not make these assumptions about the distribution of the data. Data Type and Distance: MANOVA works directly on raw data, whereas PERMANOVA works on a distance or dissimilarity matrix representing pairwise distances between samples. Dispersion: Both methods assume homogeneity of dispersion, but in different contexts. MANOVA assumes homogeneity of variances and covariances, whereas PERMANOVA assumes homogeneity of group dispersions in multivariate space. In essence, PERMANOVA offers a more flexible approach when the assumptions of MANOVA are not met, especially in cases where data are not multivariate normally distributed or when working with complex data types like species abundance data.
次元削減の結果、
流暢性ではspeech rate, mid clause duration、
複雑性では(内容語/総語数), contents asunit、
正確性ではe_asunit
が選出された。
library(readxl)
data <- read_xlsx("/Users/riku/Library/CloudStorage/Dropbox/zemizemi/paper/caf/all_230717_noname.xlsx")
data$jyoshi_as <- (data$kakari + data$fukujyoshi + data$setsuzokujyoshi + data$kakujyoshi)/ data$asunit
data <- data %>% filter(!(trueid == "17" | trueid == "24"))speech_rate for PC1, Mid_clause_pause_duration for PC2
data_standardized <- scale(data[, c("speech_rate", "articulation_rate", "mean_run_length", "effective_speech_rate", "effective_articulation_rate", "Mid_clause_pause_duration", "Mid_clause_pause_ratio", "DYSF_ratio")])
pca_result <- prcomp(data_standardized, center = TRUE, scale. = TRUE)
summary(pca_result)## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1974 1.0752 1.0108 0.69057 0.54547 0.39570 0.2482
## Proportion of Variance 0.6036 0.1445 0.1277 0.05961 0.03719 0.01957 0.0077
## Cumulative Proportion 0.6036 0.7481 0.8758 0.93537 0.97257 0.99214 0.9998
## PC8
## Standard deviation 0.03604
## Proportion of Variance 0.00016
## Cumulative Proportion 1.00000
## PC1 PC2 PC3 PC4
## speech_rate -0.44167096 0.02209629 -0.05326305 -0.15611635
## articulation_rate -0.39997066 0.28765769 -0.13422130 0.36895027
## mean_run_length -0.37546513 0.21289090 0.07797681 -0.66668635
## effective_speech_rate -0.43716000 0.08009078 0.11877818 -0.13213372
## effective_articulation_rate -0.37999623 0.21052445 -0.19797287 0.54675497
## Mid_clause_pause_duration 0.21986399 0.73499991 -0.22630681 -0.19686007
## Mid_clause_pause_ratio 0.34497002 0.42815175 -0.15169498 0.04361295
## DYSF_ratio -0.03355109 -0.31171847 -0.91953230 -0.19487534
## PC5 PC6 PC7 PC8
## speech_rate 0.28979027 -0.2720744 -0.30567383 0.725113699
## articulation_rate 0.07875903 -0.3257988 0.69931909 -0.041735558
## mean_run_length -0.08731988 0.5088821 0.31032179 -0.016339515
## effective_speech_rate 0.28823577 -0.2796275 -0.39344999 -0.674414371
## effective_articulation_rate -0.21248046 0.5540951 -0.34762723 0.011580191
## Mid_clause_pause_duration -0.42344449 -0.3110876 -0.21247806 0.015448660
## Mid_clause_pause_ratio 0.77075786 0.2802407 0.01268478 -0.002210384
## DYSF_ratio 0.02435432 -0.0186156 0.01616474 -0.130341458
contents_asunit for PC1, ld(内容語/総語数) for PC2.
data_standardized <- scale(data[, c("clauseperasunit", "ld" , "kotonarucontents_asunit" , "contents_asunit", "jyoshi_as" )])
pca_result <- prcomp(data_standardized, center = TRUE, scale. = TRUE)
summary(pca_result)## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.883 1.0094 0.51239 0.35307 0.22143
## Proportion of Variance 0.709 0.2038 0.05251 0.02493 0.00981
## Cumulative Proportion 0.709 0.9127 0.96526 0.99019 1.00000
## PC1 PC2 PC3 PC4 PC5
## clauseperasunit -0.4693786 0.1552543 -0.8527489 0.16786750 -0.0148206
## ld 0.1478575 0.9453361 0.1372005 0.22531446 -0.1220130
## kotonarucontents_asunit -0.5034067 0.1540269 0.1791096 -0.68210990 -0.4748718
## contents_asunit -0.5100956 0.1572307 0.2824399 -0.06682073 0.7942567
## jyoshi_as -0.4941721 -0.1838203 0.3770183 0.67179980 -0.3585335
e_asunit
data_standardized <- scale(data[, c("e_asunit" , "e_nashi_as_count_wariai" )])
pca_result <- prcomp(data_standardized, center = TRUE, scale. = TRUE)
summary(pca_result)## Importance of components:
## PC1 PC2
## Standard deviation 1.3536 0.40955
## Proportion of Variance 0.9161 0.08386
## Cumulative Proportion 0.9161 1.00000
## PC1 PC2
## e_asunit 0.7071068 0.7071068
## e_nashi_as_count_wariai -0.7071068 0.7071068
交互作用は有意ではなかった。 認知負荷は有意ではなかった。 習熟度は有意であった。
data <- read_xlsx("/Users/riku/Library/CloudStorage/Dropbox/zemizemi/paper/caf/all_230717_noname_LM.xlsx")
data$load <- factor(data$load, levels = c("L", "M"))
data$load <- as.factor(data$load)
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
dist_matrix <- vegdist(dep_vars, method = "euclidean")id 1 を除外した。
# Convert the "dist" object into a full matrix
full_dist_matrix <- as.matrix(dist_matrix)
# Calculate the average distance of each sample to all other samples
avg_distances <- rowMeans(full_dist_matrix)
# Plot to visualize potential outliers
boxplot(avg_distances, main="Average Distances of Samples", ylab="Distance")# Identify potential outliers
threshold <- quantile(avg_distances, 0.95) # for example, the 95th percentile
potential_outliers <- which(avg_distances > threshold)# Removc id 1
data <- data %>% filter(!(trueid == "1" ))
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
dist_matrix <- vegdist(dep_vars, method = "euclidean")
full_dist_matrix <- as.matrix(dist_matrix)
avg_distances <- rowMeans(full_dist_matrix)
boxplot(avg_distances, main="Average Distances of Samples", ylab="Distance")# Subset the dependent variables
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
# 2. Check the assumption of homogeneity of group dispersions
# Create a distance matrix
dist_matrix <- vegdist(dep_vars, method = "euclidean")
# Test for homogeneity of dispersions
grouping <- interaction(data$load, data$proficiency)
dispersion_test <- betadisper(dist_matrix, grouping)
anova(dispersion_test)## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 5 3337 667.33 0.9462 0.4577
## Residuals 62 43726 705.26
# 3. Run the PERMANOVA
set.seed(123) # Set a random seed for reproducibility
permanova_result <- adonis2(dist_matrix ~ data$load * data$proficiency, permutations = 999)
print(permanova_result)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ data$load * data$proficiency, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## data$load 1 2312 0.01560 1.5355 0.204
## data$proficiency 2 52266 0.35252 17.3535 0.001 ***
## data$load:data$proficiency 2 319 0.00215 0.1058 0.921
## Residual 62 93367 0.62974
## Total 67 148263 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 5 × 6
## rowname speech_rate Mid_clause_pause_dur…¹ contents_asunit e_asunit ld
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 speech_rate 1 -0.52 0.3 0.18 -0.35
## 2 Mid_clause… -0.52 1 0.037 -0.089 0.064
## 3 contents_a… 0.3 0.037 1 0.57 -0.12
## 4 e_asunit 0.18 -0.089 0.57 1 -0.19
## 5 ld -0.35 0.064 -0.12 -0.19 1
## # ℹ abbreviated name: ¹Mid_clause_pause_duration
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# 1. Scale the values of each dependent variable
data_scaled <- as.data.frame(lapply(data[c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')], scale))
data_scaled$load <- data$load
# 2. Create an empty matrix and populate with average scaled values
num_vars <- length(c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld'))
num_load_levels <- length(unique(data_scaled$load))
heatmap_matrix <- matrix(0, nrow=num_vars, ncol=num_load_levels)
rownames(heatmap_matrix) <- c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')
colnames(heatmap_matrix) <- unique(data_scaled$load)
for (var in rownames(heatmap_matrix)) {
heatmap_matrix[var, ] <- with(data_scaled, tapply(get(var), load, mean, na.rm=TRUE))
}
# 3. Generate the heatmap
heatmap.2(heatmap_matrix, main="Heatmap for Scaled Dependent Variables",
xlab="Load", ylab="Variables",
col=colorRampPalette(c("blue", "white", "red"))(256), # Colors adjusted for scaled values
trace="none", density.info="none",
margins=c(8,8), srtRow=0, srtCol=45, key.title="Mean Value",
key.xlab="Average scaled values of variables", dendrogram="row")
in separate ANOVA,
L > M 減少: speech rate, ld, contents asunit
L < M 増加: mid clause pause duration
ns: e asunit
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Create individual plots
plot1 <- ggboxplot(
data, x = "load", y = "speech_rate", add = "jitter", palette = "lancet"
)
plot2 <- ggboxplot(
data, x = "load", y = "Mid_clause_pause_duration", add = "jitter", palette = "lancet"
)
plot3 <- ggboxplot(
data, x = "load", y = "contents_asunit", add = "jitter", palette = "lancet"
)
plot4 <- ggboxplot(
data, x = "load", y = "ld", add = "jitter", palette = "lancet"
)
plot5 <- ggboxplot(
data, x = "load", y = "e_asunit", add = "jitter", palette = "lancet"
)
# Combine the plots into a single plot
grid.arrange(plot1, plot2, plot3, plot4, plot5, ncol = 2, nrow = 3)交互作用は有意ではなかった。 認知負荷は有意であった。 習熟度は有意であった。
data <- read_xlsx("/Users/riku/Library/CloudStorage/Dropbox/zemizemi/paper/caf/all_230717_noname_MH.xlsx")
data$load <- factor(data$load, levels = c("M", "H"))
data$load <- as.factor(data$load)
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
dist_matrix <- vegdist(dep_vars, method = "euclidean")id 1 を除外した。
# Convert the "dist" object into a full matrix
full_dist_matrix <- as.matrix(dist_matrix)
# Calculate the average distance of each sample to all other samples
avg_distances <- rowMeans(full_dist_matrix)
# Plot to visualize potential outliers
boxplot(avg_distances, main="Average Distances of Samples", ylab="Distance")# Identify potential outliers
threshold <- quantile(avg_distances, 0.95) # for example, the 95th percentile
potential_outliers <- which(avg_distances > threshold)# Removc id 1
data <- data %>% filter(!(trueid == "1" ))
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
dist_matrix <- vegdist(dep_vars, method = "euclidean")
full_dist_matrix <- as.matrix(dist_matrix)
avg_distances <- rowMeans(full_dist_matrix)
boxplot(avg_distances, main="Average Distances of Samples", ylab="Distance")# Subset the dependent variables
dep_vars <- data[, c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')]
# 2. Check the assumption of homogeneity of group dispersions
# Create a distance matrix
dist_matrix <- vegdist(dep_vars, method = "euclidean")
# Test for homogeneity of dispersions
grouping <- interaction(data$load, data$proficiency)
dispersion_test <- betadisper(dist_matrix, grouping)
anova(dispersion_test)## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 5 2429 485.89 0.8704 0.5062
## Residuals 62 34612 558.26
# 3. Run the PERMANOVA
set.seed(123) # Set a random seed for reproducibility
permanova_result <- adonis2(dist_matrix ~ data$load * data$proficiency, permutations = 999)
print(permanova_result)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ data$load * data$proficiency, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## data$load 1 6443 0.04946 4.8098 0.029 *
## data$proficiency 2 40190 0.30857 15.0022 0.001 ***
## data$load:data$proficiency 2 567 0.00435 0.2116 0.802
## Residual 62 83047 0.63761
## Total 67 130247 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 speech… M H 34 34 4.42 33 1.01e-4 1.01e-4 ***
data %>%
pairwise_t_test(
Mid_clause_pause_duration ~ load, paired = TRUE,
p.adjust.method = "bonferroni"
)## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Mid_clause… M H 34 34 -0.721 33 0.476 0.476 ns
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 contents_a… M H 34 34 2.44 33 0.02 0.02 *
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 e_asunit M H 34 34 2.25 33 0.031 0.031 *
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 ld M H 34 34 -2.88 33 0.007 0.007 **
## # A tibble: 5 × 6
## rowname speech_rate Mid_clause_pause_dur…¹ contents_asunit e_asunit ld
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 speech_rate 1 -0.48 0.37 0.2 -0.45
## 2 Mid_clause_… -0.48 1 0.051 -0.026 0.29
## 3 contents_as… 0.37 0.051 1 0.45 -0.13
## 4 e_asunit 0.2 -0.026 0.45 1 -0.23
## 5 ld -0.45 0.29 -0.13 -0.23 1
## # ℹ abbreviated name: ¹Mid_clause_pause_duration
library(gplots)
library(scales)
# 1. Scale the values of each dependent variable
data_scaled <- as.data.frame(lapply(data[c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')], scale))
data_scaled$load <- data$load
# 2. Create an empty matrix and populate with average scaled values
num_vars <- length(c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld'))
num_load_levels <- length(unique(data_scaled$load))
heatmap_matrix <- matrix(0, nrow=num_vars, ncol=num_load_levels)
rownames(heatmap_matrix) <- c('speech_rate', 'Mid_clause_pause_duration', 'contents_asunit', 'e_asunit', 'ld')
colnames(heatmap_matrix) <- unique(data_scaled$load)
for (var in rownames(heatmap_matrix)) {
heatmap_matrix[var, ] <- with(data_scaled, tapply(get(var), load, mean, na.rm=TRUE))
}
# 3. Generate the heatmap
heatmap.2(heatmap_matrix, main="Heatmap for Scaled Dependent Variables",
xlab="Load", ylab="Variables",
col=colorRampPalette(c("blue", "white", "red"))(256), # Colors adjusted for scaled values
trace="none", density.info="none",
margins=c(8,8), srtRow=0, srtCol=45, key.title="Mean Value",
key.xlab="Average scaled values of variables", dendrogram="row")
in separate ANOVA,
M > H減少: speech rate, contents asunit, e asunit M < H増加: ld ns: mid clause pause duration
library(ggplot2)
library(gridExtra)
# Create individual plots
plot1 <- ggboxplot(
data, x = "load", y = "speech_rate", add = "jitter", palette = "lancet"
)
plot2 <- ggboxplot(
data, x = "load", y = "Mid_clause_pause_duration", add = "jitter", palette = "lancet"
)
plot3 <- ggboxplot(
data, x = "load", y = "contents_asunit", add = "jitter", palette = "lancet"
)
plot4 <- ggboxplot(
data, x = "load", y = "ld", add = "jitter", palette = "lancet"
)
plot5 <- ggboxplot(
data, x = "load", y = "e_asunit", add = "jitter", palette = "lancet"
)
# Combine the plots into a single plot
grid.arrange(plot1, plot2, plot3, plot4, plot5, ncol = 2, nrow = 3)