# If needed, uncomment to install packages:
# pkgs <- c("tidyverse","janitor","GGally","factoextra","ggfortify","pheatmap",
# "cluster","NbClust","randomForest","ranger","caret","e1071",
# "lme4","lmerTest","broom.mixed","performance","patchwork")
#install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(GGally)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggfortify)
library(pheatmap)
library(cluster)
library(NbClust)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(ranger)
##
## Attaching package: 'ranger'
##
## The following object is masked from 'package:randomForest':
##
## importance
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(broom.mixed)
library(performance)
library(patchwork)
raw <- read.csv("Trait.Absolute_MMJ.csv")
raw
let’s reshape it first too
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
raw2 <- dcast(Genotype + Temperature + CO2 + Condition ~ Trait, data = raw)
raw2
Coerce factors; trim white spaces
dat <- raw2 |>
mutate(
Genotype = factor(str_squish(as.character(Genotype))),
Condition = factor(str_squish(as.character(Condition))),
Temperature= factor(str_squish(as.character(Temperature))),
CO2 = factor(str_squish(as.character(CO2)))
)
dat
Identify trait columns = all numeric columns not in metadata
meta_cols <- c("Genotype","Condition","Temperature","CO2")
trait_cols <- names(dat)[sapply(dat, is.numeric) & !names(dat) %in% meta_cols]
dat
Composite environment factor
dat <- dat |>
mutate(environment = paste(dat$Condition, ".T.",dat$Temperature,".CO2.", dat$CO2, sep = "")) |>
mutate(environment = factor(environment))
dat
Make scaled trait matrix
X <- scale(dat[, trait_cols, drop = FALSE])
X_df <- as.data.frame(X)
X_df
Pairwise plot for traits (keeps file small)
pdf("Correlation_CvsD.pdf", width= 10, height = 10)
ggpairs(dat, columns = 5:13, aes(color = Condition))
dev.off()
## quartz_off_screen
## 2
pdf("Correlation_CO2.pdf", width= 10, height = 10)
ggpairs(dat, columns = 5:13, aes(color = CO2))
dev.off()
## quartz_off_screen
## 2
pdf("Correlation_Temp.pdf", width= 10, height = 10)
ggpairs(dat, columns = 5:13, aes(color = Temperature))
dev.off()
## quartz_off_screen
## 2
pca <- prcomp(X_df, scale = TRUE)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0063 1.6813 1.0338 0.75574 0.53402 0.36070 0.27105
## Proportion of Variance 0.4473 0.3141 0.1188 0.06346 0.03169 0.01446 0.00816
## Cumulative Proportion 0.4473 0.7614 0.8801 0.94357 0.97526 0.98972 0.99788
## PC8 PC9
## Standard deviation 0.13662 0.02070
## Proportion of Variance 0.00207 0.00005
## Cumulative Proportion 0.99995 1.00000
pca
## Standard deviations (1, .., p=9):
## [1] 2.00631513 1.68133788 1.03383118 0.75574180 0.53402354 0.36070496 0.27105073
## [8] 0.13661537 0.02069903
##
## Rotation (n x k) = (9 x 9):
## PC1 PC2 PC3 PC4 PC5 PC6
## AriIdx -0.3005591 -0.179666192 0.57174937 -0.519507378 0.30275678 0.19022160
## ChlIdx 0.4304852 0.191361585 0.23329407 -0.105840306 -0.04685558 -0.76669375
## Fq.Fm 0.4598663 0.007295212 0.35596262 -0.003994782 0.02331185 0.20612269
## Fv.Fm 0.2696364 -0.413971205 -0.24980928 0.259419456 0.61196625 0.02678297
## GR -0.2945924 -0.003241439 0.52641188 0.785434031 -0.04897422 -0.05926574
## NPQ -0.3013932 0.458956854 0.07076547 -0.131106497 0.16070366 -0.15180967
## npq.t 0.2133399 0.507899956 -0.09298904 0.077490731 -0.23269077 0.49302599
## phiNO -0.3738561 -0.323450000 -0.25952132 -0.058045657 -0.43181256 -0.21125264
## phiNPQ -0.2779586 0.431788440 -0.26729565 0.090143543 0.51211867 -0.13613095
## PC7 PC8 PC9
## AriIdx 0.37952729 -0.090551762 -0.0226383013
## ChlIdx 0.34615026 -0.048597429 0.0154419724
## Fq.Fm -0.30719929 0.131055623 -0.7121568787
## Fv.Fm 0.33579126 0.371778411 -0.0050928807
## GR 0.11517599 -0.003452863 -0.0006229628
## NPQ -0.17383073 0.772417026 0.0246383465
## npq.t 0.61379926 0.087137209 -0.0176036854
## phiNO 0.32732487 0.199885073 -0.5538101559
## phiNPQ 0.04047378 -0.435725814 -0.4294562618
fviz_pca_biplot(pca, label = "var", habillage = dat$Temperature,
addEllipses=TRUE, ellipse.level=0.95)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_pca_biplot(pca, label = "var", habillage = dat$CO2,
addEllipses=TRUE, ellipse.level=0.95)
fviz_pca_biplot(pca, label = "var", habillage = dat$Condition,
addEllipses=TRUE, ellipse.level=0.95)
fviz_pca_ind(pca, label = "var", habillage = dat$Temperature,
addEllipses=TRUE, ellipse.level=0.95)
dat$Temp_and_CO2 <- paste("Temp.", dat$Temperature, ".CO2.", dat$CO2, sep="")
dat$environment
## [1] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [3] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [5] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [7] Control.T.high.CO2.high Drought.T.high.CO2.high
## [9] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [11] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [13] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [15] Control.T.high.CO2.high Drought.T.high.CO2.high
## [17] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [19] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [21] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [23] Control.T.high.CO2.high Drought.T.high.CO2.high
## [25] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [27] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [29] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [31] Control.T.high.CO2.high Drought.T.high.CO2.high
## [33] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [35] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [37] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [39] Control.T.high.CO2.high Drought.T.high.CO2.high
## [41] Control.T.ambient.CO2.ambient Drought.T.ambient.CO2.ambient
## [43] Control.T.ambient.CO2.high Drought.T.ambient.CO2.high
## [45] Control.T.high.CO2.ambient Drought.T.high.CO2.ambient
## [47] Control.T.high.CO2.high Drought.T.high.CO2.high
## 8 Levels: Control.T.ambient.CO2.ambient ... Drought.T.high.CO2.high
fviz_pca_ind(pca, label = "var", habillage = dat$Temp_and_CO2,
addEllipses=TRUE, ellipse.level=0.95)
fviz_pca_ind(pca, label = "var", habillage = dat$environment,
addEllipses=TRUE, ellipse.level=0.8)
pdf("PCA_all_env.pdf")
fviz_pca_ind(pca, label = "var", habillage = dat$environment,
addEllipses=TRUE, ellipse.level=0.8)
dev.off()
## quartz_off_screen
## 2
pdf("PCA_Temp_and_CO2.pdf")
fviz_pca_ind(pca, label = "var", habillage = dat$Temp_and_CO2,
addEllipses=TRUE, ellipse.level=0.8)
dev.off()
## quartz_off_screen
## 2
pdf("PCA_CO2.pdf")
fviz_pca_biplot(pca, label = "var", habillage = dat$CO2,
addEllipses=TRUE, ellipse.level=0.8)
dev.off()
## quartz_off_screen
## 2
pdf("PCA_Temp.pdf")
fviz_pca_biplot(pca, label = "var", habillage = dat$Temperature,
addEllipses=TRUE, ellipse.level=0.8)
dev.off()
## quartz_off_screen
## 2
pdf("PCA_Drought.pdf")
fviz_pca_biplot(pca, label = "var", habillage = dat$Condition,
addEllipses=TRUE, ellipse.level=0.8)
dev.off()
## quartz_off_screen
## 2
fviz_pca_var(pca, col.var="steelblue")+
theme_minimal()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_pca_var(pca, col.var="contrib")+
scale_color_gradient2(low="white",
high="red", midpoint=8) +
theme_minimal()
# ANOVA
anova_res <- aov(as.matrix(X_df$AriIdx) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.076 0.076 0.601 0.443
## Temperature 1 13.594 13.594 107.490 6.75e-13 ***
## CO2 1 19.844 19.844 156.906 2.00e-15 ***
## Condition:Temperature 1 0.027 0.027 0.216 0.645
## Condition:CO2 1 0.016 0.016 0.128 0.722
## Temperature:CO2 1 8.330 8.330 65.869 5.51e-10 ***
## Condition:Temperature:CO2 1 0.053 0.053 0.420 0.521
## Residuals 40 5.059 0.126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$ChlIdx) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.45 0.45 4.660 0.0369 *
## Temperature 1 40.32 40.32 418.869 < 2e-16 ***
## CO2 1 0.37 0.37 3.807 0.0581 .
## Condition:Temperature 1 0.01 0.01 0.113 0.7385
## Condition:CO2 1 0.10 0.10 1.041 0.3137
## Temperature:CO2 1 1.90 1.90 19.744 6.84e-05 ***
## Condition:Temperature:CO2 1 0.00 0.00 0.041 0.8401
## Residuals 40 3.85 0.10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$Fq.Fm) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.00 0.00 0.006 0.93623
## Temperature 1 41.61 41.61 735.173 < 2e-16 ***
## CO2 1 0.52 0.52 9.108 0.00441 **
## Condition:Temperature 1 0.09 0.09 1.672 0.20346
## Condition:CO2 1 0.45 0.45 7.882 0.00768 **
## Temperature:CO2 1 1.66 1.66 29.296 3.15e-06 ***
## Condition:Temperature:CO2 1 0.41 0.41 7.286 0.01013 *
## Residuals 40 2.26 0.06
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$Fv.Fm) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.019 0.019 0.322 0.573
## Temperature 1 5.933 5.933 99.861 1.97e-12 ***
## CO2 1 17.094 17.094 287.688 < 2e-16 ***
## Condition:Temperature 1 0.000 0.000 0.003 0.959
## Condition:CO2 1 0.020 0.020 0.331 0.568
## Temperature:CO2 1 21.520 21.520 362.179 < 2e-16 ***
## Condition:Temperature:CO2 1 0.038 0.038 0.634 0.431
## Residuals 40 2.377 0.059
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$GR) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 5.648 5.648 9.165 0.004303 **
## Temperature 1 8.380 8.380 13.596 0.000673 ***
## CO2 1 5.270 5.270 8.551 0.005663 **
## Condition:Temperature 1 0.762 0.762 1.237 0.272703
## Condition:CO2 1 2.119 2.119 3.438 0.071081 .
## Temperature:CO2 1 0.001 0.001 0.002 0.969196
## Condition:Temperature:CO2 1 0.167 0.167 0.270 0.605997
## Residuals 40 24.653 0.616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$NPQ) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.095 0.095 0.520 0.475
## Temperature 1 9.936 9.936 54.144 5.97e-09 ***
## CO2 1 7.323 7.323 39.906 1.69e-07 ***
## Condition:Temperature 1 0.397 0.397 2.166 0.149
## Condition:CO2 1 0.003 0.003 0.015 0.902
## Temperature:CO2 1 21.578 21.578 117.581 1.78e-13 ***
## Condition:Temperature:CO2 1 0.326 0.326 1.777 0.190
## Residuals 40 7.341 0.184
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$npq.t) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.000 0.000 0.000 0.998
## Temperature 1 13.251 13.251 45.815 3.94e-08 ***
## CO2 1 0.019 0.019 0.066 0.799
## Condition:Temperature 1 0.058 0.058 0.202 0.655
## Condition:CO2 1 0.002 0.002 0.006 0.940
## Temperature:CO2 1 22.095 22.095 76.390 8.08e-11 ***
## Condition:Temperature:CO2 1 0.005 0.005 0.018 0.893
## Residuals 40 11.569 0.289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$phiNO) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.145 0.145 0.605 0.4413
## Temperature 1 31.515 31.515 131.542 3.21e-14 ***
## CO2 1 0.725 0.725 3.024 0.0897 .
## Condition:Temperature 1 0.991 0.991 4.135 0.0487 *
## Condition:CO2 1 0.386 0.386 1.610 0.2119
## Temperature:CO2 1 2.866 2.866 11.961 0.0013 **
## Condition:Temperature:CO2 1 0.790 0.790 3.297 0.0769 .
## Residuals 40 9.583 0.240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_res <- aov(as.matrix(X_df$phiNPQ) ~ Condition * Temperature * CO2, data = dat)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 1 0.309 0.309 0.970 0.331
## Temperature 1 11.569 11.569 36.326 4.33e-07 ***
## CO2 1 0.017 0.017 0.053 0.819
## Condition:Temperature 1 0.724 0.724 2.273 0.139
## Condition:CO2 1 0.103 0.103 0.325 0.572
## Temperature:CO2 1 21.507 21.507 67.534 4.01e-10 ***
## Condition:Temperature:CO2 1 0.032 0.032 0.102 0.751
## Residuals 40 12.739 0.318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dat
manova_res <- manova(as.matrix(X_df) ~ Condition * Temperature * CO2, data = dat)
summary(manova_res)
## Df Pillai approx F num Df den Df Pr(>F)
## Condition 1 0.30681 1.57 9 32 0.16549
## Temperature 1 0.99131 405.55 9 32 < 2e-16 ***
## CO2 1 0.96805 107.71 9 32 < 2e-16 ***
## Condition:Temperature 1 0.15800 0.67 9 32 0.73178
## Condition:CO2 1 0.33556 1.80 9 32 0.10793
## Temperature:CO2 1 0.96053 86.52 9 32 < 2e-16 ***
## Condition:Temperature:CO2 1 0.34761 1.89 9 32 0.08902 .
## Residuals 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dist_matrix <- dist(X_df)
cluster <- hclust(dist_matrix)
plot(cluster)
data <- cbind(dat$Condition, X_df)
colnames(data)[1] <- "Condition"
data
rf <- randomForest(Condition ~ ., data, importance = TRUE)
varImpPlot(rf)
for (g in unique(dat$Genotype)) {
cat("\n### PCA for genotype:", g, "\n")
pca_g <- prcomp(subset(X_df, dat$Genotype == g), scale = TRUE)
print(fviz_pca_biplot(pca_g))
}
##
## ### PCA for genotype: Australia
##
## ### PCA for genotype: Mexico
##
## ### PCA for genotype: Pakistan
##
## ### PCA for genotype: S.Africa
##
## ### PCA for genotype: Texas
##
## ### PCA for genotype: Vietnam