We will be going through
library(tidyverse)
library(readxl)
library(reshape2)
library(Hmisc)
TAS_original_data <- read_excel("C:/ZZ_SherMay/BHP/TAS_original_data.xlsx")
view(TAS_original_data)
head(TAS_original_data)
## # A tibble: 6 × 75
## TAS TAS05 TAS15 ER30001 ER30002 ER32000 ER32006 ER33801 ER33802 ER33803
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NA 1 4 39 2 2 289 3 60
## 2 1 NA 1 4 41 2 2 1157 3 30
## 3 1 1 NA 4 180 2 3 771 2 22
## 4 1 1 NA 5 32 2 2 624 3 30
## 5 1 NA 1 5 33 1 2 1504 3 30
## 6 1 1 NA 6 34 1 2 1202 51 30
## # ℹ 65 more variables: TA050001 <dbl>, TA050002 <dbl>, TA050003 <dbl>,
## # TA050004 <dbl>, TA050044 <dbl>, TA050047 <dbl>, TA050050 <dbl>,
## # TA050065 <dbl>, TA050066 <dbl>, TA050067 <dbl>, TA050070 <dbl>,
## # TA050071 <dbl>, TA050127 <dbl>, TA050128 <dbl>, TA050129 <dbl>,
## # TA050130 <dbl>, TA050573 <dbl>, TA050574 <dbl>, TA050575 <dbl>,
## # TA050594 <dbl>, TA050595 <dbl>, TA050639 <dbl>, TA050663 <dbl>,
## # TA050664 <dbl>, TA050665 <dbl>, TA050670 <dbl>, TA050675 <dbl>, …
Separating the data
correlation_data_2005 <- TAS_original_data[, c(15, 16, 17, 18, 19, 20, 32, 33, 34, 35, 37, 38)] %>% drop_na()
head(correlation_data_2005)
## # A tibble: 6 × 12
## TA050044 TA050047 TA050050 TA050065 TA050066 TA050067 TA050639 TA050663
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 5 7 3 1 1 7 7
## 2 5 5 5 7 7 7 7 7
## 3 2 2 6 1 1 1 0 7
## 4 4 4 4 2 1 1 6 5
## 5 3 4 3 4 2 2 6 5
## 6 4 5 6 4 5 2 6 3
## # ℹ 4 more variables: TA050664 <dbl>, TA050665 <dbl>, TA050675 <dbl>,
## # TA050676 <dbl>
Change variable names
colnames(correlation_data_2005) =c("B5A", "B5D", "B6C", "C2D", "C2E", "C2F", "G30A", "G41A", "G41B", "G41C", "G41P", "H1")
head(correlation_data_2005)
## # A tibble: 6 × 12
## B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 5 7 3 1 1 7 7 7 7 6 3
## 2 5 5 5 7 7 7 7 7 6 6 5 2
## 3 2 2 6 1 1 1 0 7 5 7 3 1
## 4 4 4 4 2 1 1 6 5 6 6 5 2
## 5 3 4 3 4 2 2 6 5 5 5 5 2
## 6 4 5 6 4 5 2 6 3 6 4 4 1
Correlation for 2005
correlation_2005 <- round(cor(correlation_data_2005),4)
view(correlation_2005)
knitr::kable(correlation_2005)
| B5A | B5D | B6C | C2D | C2E | C2F | G30A | G41A | G41B | G41C | G41P | H1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B5A | 1.0000 | 0.3012 | 0.1091 | 0.0758 | 0.0066 | 0.0032 | 0.0251 | 0.1945 | 0.1956 | 0.1088 | 0.0512 | 0.0270 |
| B5D | 0.3012 | 1.0000 | 0.2784 | -0.0056 | -0.0159 | 0.0043 | 0.0174 | 0.0114 | 0.0537 | 0.0084 | -0.0021 | 0.0149 |
| B6C | 0.1091 | 0.2784 | 1.0000 | -0.1798 | -0.1090 | -0.0633 | 0.1073 | 0.0948 | 0.1359 | 0.1164 | 0.0592 | -0.0525 |
| C2D | 0.0758 | -0.0056 | -0.1798 | 1.0000 | 0.3973 | 0.3441 | -0.0647 | -0.0525 | -0.0420 | -0.1032 | 0.0367 | 0.0938 |
| C2E | 0.0066 | -0.0159 | -0.1090 | 0.3973 | 1.0000 | 0.6457 | -0.0805 | -0.0736 | -0.1292 | -0.1421 | -0.0158 | 0.1374 |
| C2F | 0.0032 | 0.0043 | -0.0633 | 0.3441 | 0.6457 | 1.0000 | -0.0796 | -0.0608 | -0.0835 | -0.1332 | 0.0115 | 0.1762 |
| G30A | 0.0251 | 0.0174 | 0.1073 | -0.0647 | -0.0805 | -0.0796 | 1.0000 | 0.1241 | 0.0929 | 0.1100 | 0.0645 | -0.0532 |
| G41A | 0.1945 | 0.0114 | 0.0948 | -0.0525 | -0.0736 | -0.0608 | 0.1241 | 1.0000 | 0.5046 | 0.4236 | 0.2560 | 0.0149 |
| G41B | 0.1956 | 0.0537 | 0.1359 | -0.0420 | -0.1292 | -0.0835 | 0.0929 | 0.5046 | 1.0000 | 0.5877 | 0.2764 | -0.0764 |
| G41C | 0.1088 | 0.0084 | 0.1164 | -0.1032 | -0.1421 | -0.1332 | 0.1100 | 0.4236 | 0.5877 | 1.0000 | 0.3008 | -0.0893 |
| G41P | 0.0512 | -0.0021 | 0.0592 | 0.0367 | -0.0158 | 0.0115 | 0.0645 | 0.2560 | 0.2764 | 0.3008 | 1.0000 | -0.0220 |
| H1 | 0.0270 | 0.0149 | -0.0525 | 0.0938 | 0.1374 | 0.1762 | -0.0532 | 0.0149 | -0.0764 | -0.0893 | -0.0220 | 1.0000 |
melted_correlation_2005 <- melt(correlation_2005)
head(melted_correlation_2005)
## Var1 Var2 value
## 1 B5A B5A 1.0000
## 2 B5D B5A 0.3012
## 3 B6C B5A 0.1091
## 4 C2D B5A 0.0758
## 5 C2E B5A 0.0066
## 6 C2F B5A 0.0032
get_upper_tri <- function(correlation_2005){
correlation_2005[lower.tri(correlation_2005)]<- NA
return(correlation_2005)
}
upper_tri <- get_upper_tri(correlation_2005)
upper_tri
## B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C
## B5A 1 0.3012 0.1091 0.0758 0.0066 0.0032 0.0251 0.1945 0.1956 0.1088
## B5D NA 1.0000 0.2784 -0.0056 -0.0159 0.0043 0.0174 0.0114 0.0537 0.0084
## B6C NA NA 1.0000 -0.1798 -0.1090 -0.0633 0.1073 0.0948 0.1359 0.1164
## C2D NA NA NA 1.0000 0.3973 0.3441 -0.0647 -0.0525 -0.0420 -0.1032
## C2E NA NA NA NA 1.0000 0.6457 -0.0805 -0.0736 -0.1292 -0.1421
## C2F NA NA NA NA NA 1.0000 -0.0796 -0.0608 -0.0835 -0.1332
## G30A NA NA NA NA NA NA 1.0000 0.1241 0.0929 0.1100
## G41A NA NA NA NA NA NA NA 1.0000 0.5046 0.4236
## G41B NA NA NA NA NA NA NA NA 1.0000 0.5877
## G41C NA NA NA NA NA NA NA NA NA 1.0000
## G41P NA NA NA NA NA NA NA NA NA NA
## H1 NA NA NA NA NA NA NA NA NA NA
## G41P H1
## B5A 0.0512 0.0270
## B5D -0.0021 0.0149
## B6C 0.0592 -0.0525
## C2D 0.0367 0.0938
## C2E -0.0158 0.1374
## C2F 0.0115 0.1762
## G30A 0.0645 -0.0532
## G41A 0.2560 0.0149
## G41B 0.2764 -0.0764
## G41C 0.3008 -0.0893
## G41P 1.0000 -0.0220
## H1 NA 1.0000
melted_correlation_2005 <- melt(upper_tri, na.rm = TRUE)
ggheatmap <- ggplot(melted_correlation_2005, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 10, hjust = 1))+
coord_fixed()
print(ggheatmap)
pearson_correlation_2005 <- ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(pearson_correlation_2005)
Separating the data
correlation_data_2015 <- TAS_original_data[, c(49,50,51,52,53,54,66,67,68,69,71,72)] %>% drop_na()
head(correlation_data_2015)
## # A tibble: 6 × 12
## TA150045 TA150048 TA150051 TA150066 TA150067 TA150068 TA150784 TA150808
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 5 4 7 7 5 5 1
## 2 4 5 7 7 7 7 1 1
## 3 4 3 5 5 6 6 5 5
## 4 3 3 5 1 1 3 7 1
## 5 5 5 6 4 5 5 7 5
## 6 5 5 7 7 1 7 7 7
## # ℹ 4 more variables: TA150809 <dbl>, TA150810 <dbl>, TA150820 <dbl>,
## # TA150821 <dbl>
Change variable names
colnames(correlation_data_2015) =c("B5A", "B5D", "B6C", "C2D", "C2E", "C2F", "G30A", "G41A", "G41B", "G41C", "G41P", "H1")
head(correlation_data_2015)
## # A tibble: 6 × 12
## B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 5 4 7 7 5 5 1 2 0 2 3
## 2 4 5 7 7 7 7 1 1 5 3 1 4
## 3 4 3 5 5 6 6 5 5 5 5 5 3
## 4 3 3 5 1 1 3 7 1 1 0 1 2
## 5 5 5 6 4 5 5 7 5 5 0 4 2
## 6 5 5 7 7 1 7 7 7 7 7 7 4
Correlation for 2015
correlation_2015 <- round(cor(correlation_data_2015),2)
view(correlation_2015)
knitr::kable(correlation_2015)
| B5A | B5D | B6C | C2D | C2E | C2F | G30A | G41A | G41B | G41C | G41P | H1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B5A | 1.00 | 0.39 | 0.14 | -0.08 | -0.16 | -0.13 | 0.17 | 0.11 | 0.12 | -0.18 | 0.05 | -0.04 |
| B5D | 0.39 | 1.00 | 0.23 | -0.06 | -0.08 | -0.07 | 0.13 | -0.02 | 0.04 | -0.05 | 0.01 | -0.01 |
| B6C | 0.14 | 0.23 | 1.00 | -0.22 | -0.12 | -0.13 | 0.17 | 0.13 | 0.10 | 0.00 | 0.06 | -0.18 |
| C2D | -0.08 | -0.06 | -0.22 | 1.00 | 0.51 | 0.47 | -0.19 | -0.07 | -0.04 | 0.02 | -0.02 | 0.21 |
| C2E | -0.16 | -0.08 | -0.12 | 0.51 | 1.00 | 0.67 | -0.30 | -0.03 | -0.05 | 0.06 | -0.05 | 0.22 |
| C2F | -0.13 | -0.07 | -0.13 | 0.47 | 0.67 | 1.00 | -0.26 | -0.01 | -0.06 | 0.05 | 0.00 | 0.23 |
| G30A | 0.17 | 0.13 | 0.17 | -0.19 | -0.30 | -0.26 | 1.00 | 0.33 | 0.30 | 0.12 | 0.17 | -0.16 |
| G41A | 0.11 | -0.02 | 0.13 | -0.07 | -0.03 | -0.01 | 0.33 | 1.00 | 0.51 | 0.14 | 0.39 | -0.04 |
| G41B | 0.12 | 0.04 | 0.10 | -0.04 | -0.05 | -0.06 | 0.30 | 0.51 | 1.00 | 0.23 | 0.36 | -0.06 |
| G41C | -0.18 | -0.05 | 0.00 | 0.02 | 0.06 | 0.05 | 0.12 | 0.14 | 0.23 | 1.00 | 0.05 | -0.03 |
| G41P | 0.05 | 0.01 | 0.06 | -0.02 | -0.05 | 0.00 | 0.17 | 0.39 | 0.36 | 0.05 | 1.00 | -0.04 |
| H1 | -0.04 | -0.01 | -0.18 | 0.21 | 0.22 | 0.23 | -0.16 | -0.04 | -0.06 | -0.03 | -0.04 | 1.00 |
melted_correlation_2015 <- melt(correlation_2015)
head(melted_correlation_2015)
## Var1 Var2 value
## 1 B5A B5A 1.00
## 2 B5D B5A 0.39
## 3 B6C B5A 0.14
## 4 C2D B5A -0.08
## 5 C2E B5A -0.16
## 6 C2F B5A -0.13
get_upper_tri <- function(correlation_2015){
correlation_2015[lower.tri(correlation_2015)]<- NA
return(correlation_2015)
}
upper_tri <- get_upper_tri(correlation_2015)
upper_tri
## B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
## B5A 1 0.39 0.14 -0.08 -0.16 -0.13 0.17 0.11 0.12 -0.18 0.05 -0.04
## B5D NA 1.00 0.23 -0.06 -0.08 -0.07 0.13 -0.02 0.04 -0.05 0.01 -0.01
## B6C NA NA 1.00 -0.22 -0.12 -0.13 0.17 0.13 0.10 0.00 0.06 -0.18
## C2D NA NA NA 1.00 0.51 0.47 -0.19 -0.07 -0.04 0.02 -0.02 0.21
## C2E NA NA NA NA 1.00 0.67 -0.30 -0.03 -0.05 0.06 -0.05 0.22
## C2F NA NA NA NA NA 1.00 -0.26 -0.01 -0.06 0.05 0.00 0.23
## G30A NA NA NA NA NA NA 1.00 0.33 0.30 0.12 0.17 -0.16
## G41A NA NA NA NA NA NA NA 1.00 0.51 0.14 0.39 -0.04
## G41B NA NA NA NA NA NA NA NA 1.00 0.23 0.36 -0.06
## G41C NA NA NA NA NA NA NA NA NA 1.00 0.05 -0.03
## G41P NA NA NA NA NA NA NA NA NA NA 1.00 -0.04
## H1 NA NA NA NA NA NA NA NA NA NA NA 1.00
melted_correlation_2015 <- melt(upper_tri, na.rm = TRUE)
ggheatmap_2015 <- ggplot(melted_correlation_2015, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 10, hjust = 1))+
coord_fixed()
print(ggheatmap_2015)
pearson_correlation_2015 <- ggheatmap_2015 +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
print(pearson_correlation_2015)
print(pearson_correlation_2005)
print(pearson_correlation_2015)
p_value_2005 <- rcorr(as.matrix(correlation_data_2005))
knitr::kable(p_value_2005$P)
| B5A | B5D | B6C | C2D | C2E | C2F | G30A | G41A | G41B | G41C | G41P | H1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B5A | NA | 0.0000000 | 0.0028693 | 0.0384865 | 0.8571389 | 0.9310819 | 0.4931822 | 0.0000001 | 0.0000001 | 0.0029558 | 0.1626253 | 0.4616701 |
| B5D | 0.0000000 | NA | 0.0000000 | 0.8783747 | 0.6642617 | 0.9067837 | 0.6353592 | 0.7567269 | 0.1429760 | 0.8188169 | 0.9536546 | 0.6841759 |
| B6C | 0.0028693 | 0.0000000 | NA | 0.0000008 | 0.0029004 | 0.0844835 | 0.0033686 | 0.0096196 | 0.0001983 | 0.0014600 | 0.1061612 | 0.1522174 |
| C2D | 0.0384865 | 0.8783747 | 0.0000008 | NA | 0.0000000 | 0.0000000 | 0.0777253 | 0.1524678 | 0.2521399 | 0.0048037 | 0.3172421 | 0.0103830 |
| C2E | 0.8571389 | 0.6642617 | 0.0029004 | 0.0000000 | NA | 0.0000000 | 0.0279327 | 0.0447402 | 0.0004079 | 0.0000993 | 0.6670988 | 0.0001681 |
| C2F | 0.9310819 | 0.9067837 | 0.0844835 | 0.0000000 | 0.0000000 | NA | 0.0298391 | 0.0973969 | 0.0227085 | 0.0002656 | 0.7543083 | 0.0000013 |
| G30A | 0.4931822 | 0.6353592 | 0.0033686 | 0.0777253 | 0.0279327 | 0.0298391 | NA | 0.0006860 | 0.0112191 | 0.0026507 | 0.0783370 | 0.1465516 |
| G41A | 0.0000001 | 0.7567269 | 0.0096196 | 0.1524678 | 0.0447402 | 0.0973969 | 0.0006860 | NA | 0.0000000 | 0.0000000 | 0.0000000 | 0.6852876 |
| G41B | 0.0000001 | 0.1429760 | 0.0001983 | 0.2521399 | 0.0004079 | 0.0227085 | 0.0112191 | 0.0000000 | NA | 0.0000000 | 0.0000000 | 0.0371811 |
| G41C | 0.0029558 | 0.8188169 | 0.0014600 | 0.0048037 | 0.0000993 | 0.0002656 | 0.0026507 | 0.0000000 | 0.0000000 | NA | 0.0000000 | 0.0147231 |
| G41P | 0.1626253 | 0.9536546 | 0.1061612 | 0.3172421 | 0.6670988 | 0.7543083 | 0.0783370 | 0.0000000 | 0.0000000 | 0.0000000 | NA | 0.5479169 |
| H1 | 0.4616701 | 0.6841759 | 0.1522174 | 0.0103830 | 0.0001681 | 0.0000013 | 0.1465516 | 0.6852876 | 0.0371811 | 0.0147231 | 0.5479169 | NA |
p_value_2015 <- rcorr(as.matrix(correlation_data_2015))
knitr::kable(p_value_2015$P)
| B5A | B5D | B6C | C2D | C2E | C2F | G30A | G41A | G41B | G41C | G41P | H1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B5A | NA | 0.0000000 | 0.0000000 | 0.0006018 | 0.0000000 | 0.0000001 | 0.0e+00 | 0.0000177 | 0.0000023 | 0.0000000 | 0.0406298 | 0.1440034 |
| B5D | 0.0000000 | NA | 0.0000000 | 0.0207941 | 0.0009097 | 0.0025806 | 2.0e-07 | 0.4419628 | 0.0995555 | 0.0341004 | 0.5674047 | 0.7166642 |
| B6C | 0.0000000 | 0.0000000 | NA | 0.0000000 | 0.0000005 | 0.0000003 | 0.0e+00 | 0.0000003 | 0.0001159 | 0.9945213 | 0.0107783 | 0.0000000 |
| C2D | 0.0006018 | 0.0207941 | 0.0000000 | NA | 0.0000000 | 0.0000000 | 0.0e+00 | 0.0057776 | 0.1202582 | 0.3819376 | 0.4142202 | 0.0000000 |
| C2E | 0.0000000 | 0.0009097 | 0.0000005 | 0.0000000 | NA | 0.0000000 | 0.0e+00 | 0.2501209 | 0.0568460 | 0.0205153 | 0.0471224 | 0.0000000 |
| C2F | 0.0000001 | 0.0025806 | 0.0000003 | 0.0000000 | 0.0000000 | NA | 0.0e+00 | 0.5699416 | 0.0142241 | 0.0402883 | 0.9839207 | 0.0000000 |
| G30A | 0.0000000 | 0.0000002 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | NA | 0.0000000 | 0.0000000 | 0.0000012 | 0.0000000 | 0.0000000 |
| G41A | 0.0000177 | 0.4419628 | 0.0000003 | 0.0057776 | 0.2501209 | 0.5699416 | 0.0e+00 | NA | 0.0000000 | 0.0000000 | 0.0000000 | 0.1498989 |
| G41B | 0.0000023 | 0.0995555 | 0.0001159 | 0.1202582 | 0.0568460 | 0.0142241 | 0.0e+00 | 0.0000000 | NA | 0.0000000 | 0.0000000 | 0.0242226 |
| G41C | 0.0000000 | 0.0341004 | 0.9945213 | 0.3819376 | 0.0205153 | 0.0402883 | 1.2e-06 | 0.0000000 | 0.0000000 | NA | 0.0490344 | 0.1648620 |
| G41P | 0.0406298 | 0.5674047 | 0.0107783 | 0.4142202 | 0.0471224 | 0.9839207 | 0.0e+00 | 0.0000000 | 0.0000000 | 0.0490344 | NA | 0.0746875 |
| H1 | 0.1440034 | 0.7166642 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0e+00 | 0.1498989 | 0.0242226 | 0.1648620 | 0.0746875 | NA |