TAS Descriptive statistics

We will be going through

Step 1: Loading Packages

library(tidyverse)
library(readxl)
library(reshape2)
library(Hmisc)

Step 2: Import the data

TAS_original_data <- read_excel("C:/ZZ_SherMay/BHP/TAS_original_data.xlsx")

Step 3: Preview the data

view(TAS_original_data)
head(TAS_original_data)
## # A tibble: 6 × 75
##     TAS TAS05 TAS15 ER30001 ER30002 ER32000 ER32006 ER33801 ER33802 ER33803
##   <dbl> <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1     1    NA     1       4      39       2       2     289       3      60
## 2     1    NA     1       4      41       2       2    1157       3      30
## 3     1     1    NA       4     180       2       3     771       2      22
## 4     1     1    NA       5      32       2       2     624       3      30
## 5     1    NA     1       5      33       1       2    1504       3      30
## 6     1     1    NA       6      34       1       2    1202      51      30
## # ℹ 65 more variables: TA050001 <dbl>, TA050002 <dbl>, TA050003 <dbl>,
## #   TA050004 <dbl>, TA050044 <dbl>, TA050047 <dbl>, TA050050 <dbl>,
## #   TA050065 <dbl>, TA050066 <dbl>, TA050067 <dbl>, TA050070 <dbl>,
## #   TA050071 <dbl>, TA050127 <dbl>, TA050128 <dbl>, TA050129 <dbl>,
## #   TA050130 <dbl>, TA050573 <dbl>, TA050574 <dbl>, TA050575 <dbl>,
## #   TA050594 <dbl>, TA050595 <dbl>, TA050639 <dbl>, TA050663 <dbl>,
## #   TA050664 <dbl>, TA050665 <dbl>, TA050670 <dbl>, TA050675 <dbl>, …

Step 4: Correlation 2005

Separating the data

correlation_data_2005 <- TAS_original_data[, c(15, 16, 17, 18, 19, 20, 32, 33, 34, 35, 37, 38)] %>% drop_na()
head(correlation_data_2005)
## # A tibble: 6 × 12
##   TA050044 TA050047 TA050050 TA050065 TA050066 TA050067 TA050639 TA050663
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1        3        5        7        3        1        1        7        7
## 2        5        5        5        7        7        7        7        7
## 3        2        2        6        1        1        1        0        7
## 4        4        4        4        2        1        1        6        5
## 5        3        4        3        4        2        2        6        5
## 6        4        5        6        4        5        2        6        3
## # ℹ 4 more variables: TA050664 <dbl>, TA050665 <dbl>, TA050675 <dbl>,
## #   TA050676 <dbl>

Change variable names

colnames(correlation_data_2005) =c("B5A", "B5D", "B6C", "C2D", "C2E", "C2F", "G30A", "G41A", "G41B", "G41C", "G41P", "H1")
head(correlation_data_2005)
## # A tibble: 6 × 12
##     B5A   B5D   B6C   C2D   C2E   C2F  G30A  G41A  G41B  G41C  G41P    H1
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     3     5     7     3     1     1     7     7     7     7     6     3
## 2     5     5     5     7     7     7     7     7     6     6     5     2
## 3     2     2     6     1     1     1     0     7     5     7     3     1
## 4     4     4     4     2     1     1     6     5     6     6     5     2
## 5     3     4     3     4     2     2     6     5     5     5     5     2
## 6     4     5     6     4     5     2     6     3     6     4     4     1

Correlation for 2005

correlation_2005 <- round(cor(correlation_data_2005),4)
view(correlation_2005)
knitr::kable(correlation_2005)
B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
B5A 1.0000 0.3012 0.1091 0.0758 0.0066 0.0032 0.0251 0.1945 0.1956 0.1088 0.0512 0.0270
B5D 0.3012 1.0000 0.2784 -0.0056 -0.0159 0.0043 0.0174 0.0114 0.0537 0.0084 -0.0021 0.0149
B6C 0.1091 0.2784 1.0000 -0.1798 -0.1090 -0.0633 0.1073 0.0948 0.1359 0.1164 0.0592 -0.0525
C2D 0.0758 -0.0056 -0.1798 1.0000 0.3973 0.3441 -0.0647 -0.0525 -0.0420 -0.1032 0.0367 0.0938
C2E 0.0066 -0.0159 -0.1090 0.3973 1.0000 0.6457 -0.0805 -0.0736 -0.1292 -0.1421 -0.0158 0.1374
C2F 0.0032 0.0043 -0.0633 0.3441 0.6457 1.0000 -0.0796 -0.0608 -0.0835 -0.1332 0.0115 0.1762
G30A 0.0251 0.0174 0.1073 -0.0647 -0.0805 -0.0796 1.0000 0.1241 0.0929 0.1100 0.0645 -0.0532
G41A 0.1945 0.0114 0.0948 -0.0525 -0.0736 -0.0608 0.1241 1.0000 0.5046 0.4236 0.2560 0.0149
G41B 0.1956 0.0537 0.1359 -0.0420 -0.1292 -0.0835 0.0929 0.5046 1.0000 0.5877 0.2764 -0.0764
G41C 0.1088 0.0084 0.1164 -0.1032 -0.1421 -0.1332 0.1100 0.4236 0.5877 1.0000 0.3008 -0.0893
G41P 0.0512 -0.0021 0.0592 0.0367 -0.0158 0.0115 0.0645 0.2560 0.2764 0.3008 1.0000 -0.0220
H1 0.0270 0.0149 -0.0525 0.0938 0.1374 0.1762 -0.0532 0.0149 -0.0764 -0.0893 -0.0220 1.0000

Step 5: Generate heat map

melted_correlation_2005 <- melt(correlation_2005)
head(melted_correlation_2005)
##   Var1 Var2  value
## 1  B5A  B5A 1.0000
## 2  B5D  B5A 0.3012
## 3  B6C  B5A 0.1091
## 4  C2D  B5A 0.0758
## 5  C2E  B5A 0.0066
## 6  C2F  B5A 0.0032
  get_upper_tri <- function(correlation_2005){
    correlation_2005[lower.tri(correlation_2005)]<- NA
    return(correlation_2005)
  }
upper_tri <- get_upper_tri(correlation_2005)
upper_tri
##      B5A    B5D    B6C     C2D     C2E     C2F    G30A    G41A    G41B    G41C
## B5A    1 0.3012 0.1091  0.0758  0.0066  0.0032  0.0251  0.1945  0.1956  0.1088
## B5D   NA 1.0000 0.2784 -0.0056 -0.0159  0.0043  0.0174  0.0114  0.0537  0.0084
## B6C   NA     NA 1.0000 -0.1798 -0.1090 -0.0633  0.1073  0.0948  0.1359  0.1164
## C2D   NA     NA     NA  1.0000  0.3973  0.3441 -0.0647 -0.0525 -0.0420 -0.1032
## C2E   NA     NA     NA      NA  1.0000  0.6457 -0.0805 -0.0736 -0.1292 -0.1421
## C2F   NA     NA     NA      NA      NA  1.0000 -0.0796 -0.0608 -0.0835 -0.1332
## G30A  NA     NA     NA      NA      NA      NA  1.0000  0.1241  0.0929  0.1100
## G41A  NA     NA     NA      NA      NA      NA      NA  1.0000  0.5046  0.4236
## G41B  NA     NA     NA      NA      NA      NA      NA      NA  1.0000  0.5877
## G41C  NA     NA     NA      NA      NA      NA      NA      NA      NA  1.0000
## G41P  NA     NA     NA      NA      NA      NA      NA      NA      NA      NA
## H1    NA     NA     NA      NA      NA      NA      NA      NA      NA      NA
##         G41P      H1
## B5A   0.0512  0.0270
## B5D  -0.0021  0.0149
## B6C   0.0592 -0.0525
## C2D   0.0367  0.0938
## C2E  -0.0158  0.1374
## C2F   0.0115  0.1762
## G30A  0.0645 -0.0532
## G41A  0.2560  0.0149
## G41B  0.2764 -0.0764
## G41C  0.3008 -0.0893
## G41P  1.0000 -0.0220
## H1        NA  1.0000
melted_correlation_2005 <- melt(upper_tri, na.rm = TRUE)

ggheatmap <- ggplot(melted_correlation_2005, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
    name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1))+
 coord_fixed()

print(ggheatmap)

pearson_correlation_2005 <- ggheatmap + 
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.6, 0.7),
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(pearson_correlation_2005)

Step 6: Correlation 2015

Separating the data

correlation_data_2015 <- TAS_original_data[, c(49,50,51,52,53,54,66,67,68,69,71,72)] %>% drop_na()
head(correlation_data_2015)
## # A tibble: 6 × 12
##   TA150045 TA150048 TA150051 TA150066 TA150067 TA150068 TA150784 TA150808
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1        3        5        4        7        7        5        5        1
## 2        4        5        7        7        7        7        1        1
## 3        4        3        5        5        6        6        5        5
## 4        3        3        5        1        1        3        7        1
## 5        5        5        6        4        5        5        7        5
## 6        5        5        7        7        1        7        7        7
## # ℹ 4 more variables: TA150809 <dbl>, TA150810 <dbl>, TA150820 <dbl>,
## #   TA150821 <dbl>

Change variable names

colnames(correlation_data_2015) =c("B5A", "B5D", "B6C", "C2D", "C2E", "C2F", "G30A", "G41A", "G41B", "G41C", "G41P", "H1")
head(correlation_data_2015)
## # A tibble: 6 × 12
##     B5A   B5D   B6C   C2D   C2E   C2F  G30A  G41A  G41B  G41C  G41P    H1
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     3     5     4     7     7     5     5     1     2     0     2     3
## 2     4     5     7     7     7     7     1     1     5     3     1     4
## 3     4     3     5     5     6     6     5     5     5     5     5     3
## 4     3     3     5     1     1     3     7     1     1     0     1     2
## 5     5     5     6     4     5     5     7     5     5     0     4     2
## 6     5     5     7     7     1     7     7     7     7     7     7     4

Correlation for 2015

correlation_2015 <- round(cor(correlation_data_2015),2)
view(correlation_2015)
knitr::kable(correlation_2015)
B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
B5A 1.00 0.39 0.14 -0.08 -0.16 -0.13 0.17 0.11 0.12 -0.18 0.05 -0.04
B5D 0.39 1.00 0.23 -0.06 -0.08 -0.07 0.13 -0.02 0.04 -0.05 0.01 -0.01
B6C 0.14 0.23 1.00 -0.22 -0.12 -0.13 0.17 0.13 0.10 0.00 0.06 -0.18
C2D -0.08 -0.06 -0.22 1.00 0.51 0.47 -0.19 -0.07 -0.04 0.02 -0.02 0.21
C2E -0.16 -0.08 -0.12 0.51 1.00 0.67 -0.30 -0.03 -0.05 0.06 -0.05 0.22
C2F -0.13 -0.07 -0.13 0.47 0.67 1.00 -0.26 -0.01 -0.06 0.05 0.00 0.23
G30A 0.17 0.13 0.17 -0.19 -0.30 -0.26 1.00 0.33 0.30 0.12 0.17 -0.16
G41A 0.11 -0.02 0.13 -0.07 -0.03 -0.01 0.33 1.00 0.51 0.14 0.39 -0.04
G41B 0.12 0.04 0.10 -0.04 -0.05 -0.06 0.30 0.51 1.00 0.23 0.36 -0.06
G41C -0.18 -0.05 0.00 0.02 0.06 0.05 0.12 0.14 0.23 1.00 0.05 -0.03
G41P 0.05 0.01 0.06 -0.02 -0.05 0.00 0.17 0.39 0.36 0.05 1.00 -0.04
H1 -0.04 -0.01 -0.18 0.21 0.22 0.23 -0.16 -0.04 -0.06 -0.03 -0.04 1.00

Step 7: Generate heat map

melted_correlation_2015 <- melt(correlation_2015)
head(melted_correlation_2015)
##   Var1 Var2 value
## 1  B5A  B5A  1.00
## 2  B5D  B5A  0.39
## 3  B6C  B5A  0.14
## 4  C2D  B5A -0.08
## 5  C2E  B5A -0.16
## 6  C2F  B5A -0.13
  get_upper_tri <- function(correlation_2015){
    correlation_2015[lower.tri(correlation_2015)]<- NA
    return(correlation_2015)
  }
upper_tri <- get_upper_tri(correlation_2015)
upper_tri
##      B5A  B5D  B6C   C2D   C2E   C2F  G30A  G41A  G41B  G41C  G41P    H1
## B5A    1 0.39 0.14 -0.08 -0.16 -0.13  0.17  0.11  0.12 -0.18  0.05 -0.04
## B5D   NA 1.00 0.23 -0.06 -0.08 -0.07  0.13 -0.02  0.04 -0.05  0.01 -0.01
## B6C   NA   NA 1.00 -0.22 -0.12 -0.13  0.17  0.13  0.10  0.00  0.06 -0.18
## C2D   NA   NA   NA  1.00  0.51  0.47 -0.19 -0.07 -0.04  0.02 -0.02  0.21
## C2E   NA   NA   NA    NA  1.00  0.67 -0.30 -0.03 -0.05  0.06 -0.05  0.22
## C2F   NA   NA   NA    NA    NA  1.00 -0.26 -0.01 -0.06  0.05  0.00  0.23
## G30A  NA   NA   NA    NA    NA    NA  1.00  0.33  0.30  0.12  0.17 -0.16
## G41A  NA   NA   NA    NA    NA    NA    NA  1.00  0.51  0.14  0.39 -0.04
## G41B  NA   NA   NA    NA    NA    NA    NA    NA  1.00  0.23  0.36 -0.06
## G41C  NA   NA   NA    NA    NA    NA    NA    NA    NA  1.00  0.05 -0.03
## G41P  NA   NA   NA    NA    NA    NA    NA    NA    NA    NA  1.00 -0.04
## H1    NA   NA   NA    NA    NA    NA    NA    NA    NA    NA    NA  1.00
melted_correlation_2015 <- melt(upper_tri, na.rm = TRUE)

ggheatmap_2015 <- ggplot(melted_correlation_2015, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
    name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1))+
 coord_fixed()

print(ggheatmap_2015)

pearson_correlation_2015 <- ggheatmap_2015 + 
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.6, 0.7),
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))

print(pearson_correlation_2015)

Correlation Heatmap

2005

print(pearson_correlation_2005)

2015

print(pearson_correlation_2015)

P-value

2005

p_value_2005 <- rcorr(as.matrix(correlation_data_2005))
knitr::kable(p_value_2005$P)
B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
B5A NA 0.0000000 0.0028693 0.0384865 0.8571389 0.9310819 0.4931822 0.0000001 0.0000001 0.0029558 0.1626253 0.4616701
B5D 0.0000000 NA 0.0000000 0.8783747 0.6642617 0.9067837 0.6353592 0.7567269 0.1429760 0.8188169 0.9536546 0.6841759
B6C 0.0028693 0.0000000 NA 0.0000008 0.0029004 0.0844835 0.0033686 0.0096196 0.0001983 0.0014600 0.1061612 0.1522174
C2D 0.0384865 0.8783747 0.0000008 NA 0.0000000 0.0000000 0.0777253 0.1524678 0.2521399 0.0048037 0.3172421 0.0103830
C2E 0.8571389 0.6642617 0.0029004 0.0000000 NA 0.0000000 0.0279327 0.0447402 0.0004079 0.0000993 0.6670988 0.0001681
C2F 0.9310819 0.9067837 0.0844835 0.0000000 0.0000000 NA 0.0298391 0.0973969 0.0227085 0.0002656 0.7543083 0.0000013
G30A 0.4931822 0.6353592 0.0033686 0.0777253 0.0279327 0.0298391 NA 0.0006860 0.0112191 0.0026507 0.0783370 0.1465516
G41A 0.0000001 0.7567269 0.0096196 0.1524678 0.0447402 0.0973969 0.0006860 NA 0.0000000 0.0000000 0.0000000 0.6852876
G41B 0.0000001 0.1429760 0.0001983 0.2521399 0.0004079 0.0227085 0.0112191 0.0000000 NA 0.0000000 0.0000000 0.0371811
G41C 0.0029558 0.8188169 0.0014600 0.0048037 0.0000993 0.0002656 0.0026507 0.0000000 0.0000000 NA 0.0000000 0.0147231
G41P 0.1626253 0.9536546 0.1061612 0.3172421 0.6670988 0.7543083 0.0783370 0.0000000 0.0000000 0.0000000 NA 0.5479169
H1 0.4616701 0.6841759 0.1522174 0.0103830 0.0001681 0.0000013 0.1465516 0.6852876 0.0371811 0.0147231 0.5479169 NA

2015

p_value_2015 <- rcorr(as.matrix(correlation_data_2015))
knitr::kable(p_value_2015$P)
B5A B5D B6C C2D C2E C2F G30A G41A G41B G41C G41P H1
B5A NA 0.0000000 0.0000000 0.0006018 0.0000000 0.0000001 0.0e+00 0.0000177 0.0000023 0.0000000 0.0406298 0.1440034
B5D 0.0000000 NA 0.0000000 0.0207941 0.0009097 0.0025806 2.0e-07 0.4419628 0.0995555 0.0341004 0.5674047 0.7166642
B6C 0.0000000 0.0000000 NA 0.0000000 0.0000005 0.0000003 0.0e+00 0.0000003 0.0001159 0.9945213 0.0107783 0.0000000
C2D 0.0006018 0.0207941 0.0000000 NA 0.0000000 0.0000000 0.0e+00 0.0057776 0.1202582 0.3819376 0.4142202 0.0000000
C2E 0.0000000 0.0009097 0.0000005 0.0000000 NA 0.0000000 0.0e+00 0.2501209 0.0568460 0.0205153 0.0471224 0.0000000
C2F 0.0000001 0.0025806 0.0000003 0.0000000 0.0000000 NA 0.0e+00 0.5699416 0.0142241 0.0402883 0.9839207 0.0000000
G30A 0.0000000 0.0000002 0.0000000 0.0000000 0.0000000 0.0000000 NA 0.0000000 0.0000000 0.0000012 0.0000000 0.0000000
G41A 0.0000177 0.4419628 0.0000003 0.0057776 0.2501209 0.5699416 0.0e+00 NA 0.0000000 0.0000000 0.0000000 0.1498989
G41B 0.0000023 0.0995555 0.0001159 0.1202582 0.0568460 0.0142241 0.0e+00 0.0000000 NA 0.0000000 0.0000000 0.0242226
G41C 0.0000000 0.0341004 0.9945213 0.3819376 0.0205153 0.0402883 1.2e-06 0.0000000 0.0000000 NA 0.0490344 0.1648620
G41P 0.0406298 0.5674047 0.0107783 0.4142202 0.0471224 0.9839207 0.0e+00 0.0000000 0.0000000 0.0490344 NA 0.0746875
H1 0.1440034 0.7166642 0.0000000 0.0000000 0.0000000 0.0000000 0.0e+00 0.1498989 0.0242226 0.1648620 0.0746875 NA