# knitr::purl("morph_PCA.Rmd", output = "morph_PCA.R") # convert Rmd to R script

PCA Functions

Round <- function(number){
  # for plotting
  x <- round(number, 1)
  if(x%%1 == 0){
    return(paste(as.character(x), ".0", sep = ""))
  }
  else{
    return(x)
  }
}
PCA_graphs <- function(dataset, PCA_title){
    # cos2 and the alpha.var: alpha.var colours variables by cos2 
    # (importance of most important PC to variable), 
    # see https://personal.utdallas.edu/~herve/abdi-awPCA2010.pdf
  
    GFpca <- PCA(dataset, scale.unit = TRUE, graph = TRUE, ncp = 10)
    
    eig.val <- get_eigenvalue(GFpca)
    var.val <- GFpca$var
    print(eig.val) #will only show in in console
    print(var.val)
    
    scree <- fviz_eig(GFpca, addlabels = TRUE, ylim = c(0, 100))
    print(scree)
    
    labX <- paste("PC1 (", Round(eig.val[1, 2]), "%)", sep = "")
    labY <- paste("PC1 (", Round(eig.val[2, 2]), "%)", sep = "")
    leplot <- fviz_pca_biplot(GFpca, geom.id = c("point"), 
                              geom.var = c("arrow", "text"), 
                              alpha.var = "cos2",
                              label = "var", repel = T, 
                              col.ind = "gray", col.var = "black")
    print(leplot)
    
    ggpubr::ggpar(leplot, title = PCA_title, xlab = labX, ylab = labY, 
                  ggtheme = theme_classic(), font.main = c(20, "bold"), 
                  font.x = 14, font.y = 14, font.tickslab = 12
                  )
    
    D = cor(dataset)
    test <- cor.mtest(dataset)$p
    par(mfrow=c(1,2))
    corrplot.mixed(D,lower.col = "black", number.cex = .7, p.mat=test, sig.level=0.05)
    corrplot.mixed(D,lower.col = "black", number.cex = .7)
    
    return(GFpca)
}

Reading and Standardizaing Data

source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"

script_names = c("clean_morph_data.R",
                 "remove_torn_wings.R"
                  )

for (script in script_names) { 
  path = paste0(source_path, script)
  source(path) 
}
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
data_list <- read_morph_data("data/allmorphology04.27.21-coors.csv")
## morph types: L S  NA LS L/S SL 
##    recoding missing morph types...
##    S if wing2thorax <=2.2, L if wing2thorax >=2.5
## 
## ambiguous wing morph bug count:  32
raw_data = data_list[[1]]
all_bugs = nrow(raw_data)
data_long = data_list[[2]] 

# Remove individuals with torn wings first
raw_data = remove_torn_wings(raw_data) # **don't need to remove torn wings for wing morph analysis
## 
## number of bugs with torn wings: 147
data_long = remove_torn_wings(data_long) # **don't need to remove torn wings for wing morph analysis
## 
## number of bugs with torn wings: 144
clean_bugs = nrow(raw_data)

cat("number of bugs with torn wings:", all_bugs - clean_bugs, "\n\n")
## number of bugs with torn wings: 147

Morphology

Thorax, body, wing, and beak lengths only considered.

d <- raw_data %>%
  select(thorax, body, wing, beak) %>%
  filter(!is.na(body))
colnames(d) <- c("thorax", "body", "wing", "beak")

MorphPCA = PCA_graphs(d, "(a) ")
## Warning in PCA(dataset, scale.unit = TRUE, graph = TRUE, ncp = 10): Missing
## values are imputed by the mean of the variable: you should use the imputePCA
## function of the missMDA package

##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.30562422       82.6406056                    82.64061
## Dim.2 0.56129568       14.0323919                    96.67300
## Dim.3 0.12110532        3.0276329                    99.70063
## Dim.4 0.01197479        0.2993697                   100.00000
## $coord
##            Dim.1      Dim.2       Dim.3        Dim.4
## thorax 0.9492103  0.1218156 -0.28996599  0.008972550
## body   0.9610791 -0.2551954  0.06631575 -0.082489423
## wing   0.9223435 -0.3664841  0.09962757  0.071036425
## beak   0.7938724  0.5890852  0.15067053  0.006603123
## 
## $cor
##            Dim.1      Dim.2       Dim.3        Dim.4
## thorax 0.9492103  0.1218156 -0.28996599  0.008972550
## body   0.9610791 -0.2551954  0.06631575 -0.082489423
## wing   0.9223435 -0.3664841  0.09962757  0.071036425
## beak   0.7938724  0.5890852  0.15067053  0.006603123
## 
## $cos2
##            Dim.1      Dim.2       Dim.3        Dim.4
## thorax 0.9010002 0.01483905 0.084080276 8.050665e-05
## body   0.9236730 0.06512468 0.004397779 6.804505e-03
## wing   0.8507176 0.13431057 0.009925652 5.046174e-03
## beak   0.6302334 0.34702138 0.022701609 4.360123e-05
## 
## $contrib
##           Dim.1     Dim.2     Dim.3      Dim.4
## thorax 27.25658  2.643713 69.427403  0.6723013
## body   27.94247 11.602562  3.631367 56.8236016
## wing   25.73546 23.928667  8.195885 42.1399885
## beak   19.06549 61.825058 18.745345  0.3641086

vars <- MorphPCA$ind
par(mfrow=c(2,3))

#Correlations
m <- lm(d$thorax ~ vars$coord[,1])
plot(vars$coord[,1], d$thorax, xlab="PC1", ylab="thorax (mm)")
cor_val <- paste("R = ", round(cor(vars$coord[,1], d$thorax), 2))
text(-2,2, cor_val)
abline(m, lty=2)

m <- lm(d$body ~ vars$coord[,1])
plot(vars$coord[,1], d$body, xlab="PC1", ylab="body (mm)")
cor_val <- paste("R = ", round(cor(vars$coord[,1], d$body), 2))
text(-2,2, cor_val)
abline(m, lty=2)

m <- lm(d$wing ~ vars$coord[,1])
plot(vars$coord[,1], d$wing, xlab="PC1", ylab="wing (mm)")
cor_val <- paste("R = ", round(cor(vars$coord[,1], d$wing), 2))
text(-2,2, cor_val)
abline(m, lty=2)

m <- lm(d$beak ~ vars$coord[,1])
plot(vars$coord[,1], d$beak, xlab="PC1", ylab="beak (mm)")
cor_val <- paste("R = ", round(cor(vars$coord[,1], d$beak), 2))
text(-2,2, cor_val)
abline(m, lty=2)

m <- lm(d$beak ~ vars$coord[,2])
plot(vars$coord[,2], d$beak, xlab="PC2", ylab="beak (mm)")
cor_val <- paste("R = ", round(cor(vars$coord[,2], d$beak), 2))
text(-0.5,3, cor_val)
abline(m, lty=2)

#Plot by host
host <- raw_data %>%
  filter(!is.na(body)) %>%
  select(pophost)
  

d$pophost <- host$pophost
fviz_pca_ind(MorphPCA,
             geom.ind = "point",
             col.ind = d$pophost,
             legend.title = "Host Plant",
             addEllipses=TRUE)

# Plot by sex
sex <- raw_data %>%
  filter(!is.na(body)) %>%
  select(sex) 

d$sex <- sex$sex
fviz_pca_ind(MorphPCA,
             geom.ind = "point",
             col.ind = d$sex,
             legend.title = "Sex",
             addEllipses=TRUE)

# Plot by distance from the sympatric zone
# sym_dist <- raw_data %>%
#   filter(!is.na(body)) %>%
#   select(sym_dist)
# 
# d$sym_dist <- sym_dist$sym_dist
# fviz_pca_ind(MorphPCA,
#              geom.ind = "point",
#              col.ind = d$sym_dist,
#              legend.title = "Sympatric Zone Distance")

# Plot by population
pop <- raw_data %>%
  filter(!is.na(body)) %>%
  select(population)

d$population <- pop$pop
fviz_pca_ind(MorphPCA,
             geom.ind = "point",
             col.ind = d$population,
             legend.title = "Population",
             addEllipses=TRUE)

Morphology w/ Wing2Body

raw_data$wing2body = raw_data$wing / raw_data$body 
# No mass
d <- raw_data %>%
  select(thorax, wing2body, beak) %>%
  filter(!is.na(beak)) %>%
  filter(!is.na(wing2body))
colnames(d) <- c("thorax", "wing2body", "beak")

MorphPCA = PCA_graphs(d, "(c) ")
## Warning in PCA(dataset, scale.unit = TRUE, graph = TRUE, ncp = 10): Missing
## values are imputed by the mean of the variable: you should use the imputePCA
## function of the missMDA package

##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.0153752        67.179174                    67.17917
## Dim.2  0.8319997        27.733323                    94.91250
## Dim.3  0.1526251         5.087503                   100.00000
## $coord
##               Dim.1       Dim.2      Dim.3
## thorax    0.9541162 -0.08555843 -0.2869530
## wing2body 0.6195092  0.77673931  0.1135097
## beak      0.8492620 -0.47048431  0.2395801
## 
## $cor
##               Dim.1       Dim.2      Dim.3
## thorax    0.9541162 -0.08555843 -0.2869530
## wing2body 0.6195092  0.77673931  0.1135097
## beak      0.8492620 -0.47048431  0.2395801
## 
## $cos2
##               Dim.1       Dim.2      Dim.3
## thorax    0.9103377 0.007320245 0.08234203
## wing2body 0.3837916 0.603323948 0.01288444
## beak      0.7212459 0.221355485 0.05739861
## 
## $contrib
##              Dim.1      Dim.2     Dim.3
## thorax    45.16964  0.8798375 53.950523
## wing2body 19.04318 72.5149256  8.441891
## beak      35.78718 26.6052369 37.607586

# long wing only

data_long$wing2body = data_long$wing / data_long$body 
# No mass
d <- data_long %>%
  select(thorax, wing2body, beak) %>%
  filter(!is.na(beak)) %>%
  filter(!is.na(wing2body))
colnames(d) <- c("thorax", "wing2body", "beak")

MorphPCA = PCA_graphs(d, "(d) ")

##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1   1.790973        59.699102                    59.69910
## Dim.2   1.017371        33.912365                    93.61147
## Dim.3   0.191656         6.388533                   100.00000
## $coord
##                Dim.1       Dim.2      Dim.3
## thorax    0.94858140  0.07712449 -0.3069937
## wing2body 0.05707906  0.99618441  0.0660197
## beak      0.94228890 -0.13798329  0.3050447
## 
## $cor
##                Dim.1       Dim.2      Dim.3
## thorax    0.94858140  0.07712449 -0.3069937
## wing2body 0.05707906  0.99618441  0.0660197
## beak      0.94228890 -0.13798329  0.3050447
## 
## $cos2
##                 Dim.1       Dim.2       Dim.3
## thorax    0.899806664 0.005948186 0.094245149
## wing2body 0.003258019 0.992383379 0.004358601
## beak      0.887908366 0.019039389 0.093052245
## 
## $contrib
##                Dim.1      Dim.2    Dim.3
## thorax    50.2412174  0.5846625 49.17412
## wing2body  0.1819134 97.5439071  2.27418
## beak      49.5768692  1.8714304 48.55170