HyNet overview

Published

2024-05-06

1 Packages and functions

Load required packages and functions.

Code

library(dplyr)
library(ggplot2)
library(patchwork)
theme_set(theme_light())

library(maptools)

library(igraph)
source("../code/fun_create_adj.R")

2 Data

Code

# Load Ches Bay shape file
shoreline <- maptools::readShapePoly("../CBnet_shoreline/final_shoreline.shp")
shoreline <- maptools::readShapePoly("../CBnet_shoreline/CBnet_Shoreline_simplify.shp")
rca_cells <- data.table::fread("../data_rca/rca_cells_2.csv") %>%
    filter(FSM == 1) %>%
    mutate(CellID = paste0("x", CellID)) %>%
    mutate(Atlantic = (LAT < 37.22 & LON > -75.96) |
               (LAT < 37.5 & LON > -75.8) |
               (LAT < 38.0 & LON > -75.6) |
               (LAT < 36.96 & LON > -75.99)
    ) 
AtlanticCells <- rca_cells %>%
    filter(Atlantic) %>%
    pull(CellID)
rca_cells <- rca_cells %>% 
    filter(!Atlantic)
#Use info from the shape file to find projections for the stations
###http://www.prj2epsg.org/search
###26918 - NAD_1983_UTM_Zone_18N
###http://spatialreference.org/ref/epsg/26918/
tmp <- rgdal::project(cbind(360 + rca_cells$LON, rca_cells$LAT),
                      proj = "+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
rca_cells <- rca_cells %>% 
    mutate(lon = tmp[, 1],
           lat = tmp[, 2])

Code

lagmax = 30

Load results for the years 2002 and 2011 with maximum lag of 30 days.

Code

# Load model coefficients
# Mcoef2002 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2002_coef_lagmax",
#                                       lagmax, ".rds")))
# Mcoef2011 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2011_coef_lagmax",
#                                       lagmax, ".rds")))
# Mcoef2002 <- readRDS("../dataderived/VARpairs_2002.rds")
# Mcoef2011 <- readRDS("../dataderived/VARpairs_2011.rds")
Mcoef2002 <- readRDS("../dataderived/CAUSpairs_2002.rds")
Mcoef2011 <- readRDS("../dataderived/CAUSpairs_2011.rds")

3 Network

3.1 Network construction

Process results into adjacency matrices.

Code

proc2002 <- create_adj_pairs(Mcoef2002)
proc2011 <- create_adj_pairs(Mcoef2011)

Explore coefficients.

Code

summary(as.vector(proc2002$Acoef[!is.na(proc2002$Acoef)]))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.000   0.059   0.090   0.141   0.863

Code

summary(as.vector(proc2011$Acoef[!is.na(proc2011$Acoef)]))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.001   0.066   0.085   0.139   0.922

Additionally threshold coefficients.

Code

proc2002$Acoef[proc2002$Acoef < 0.01] <- 0
proc2011$Acoef[proc2011$Acoef < 0.01] <- 0

Explore lags.

Code

summary(as.vector(proc2002$Alag))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>       0       2      18      15      27      30    3064

Code

summary(as.vector(proc2011$Alag))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>       0      12      26      20      29      30    3064

Additionally adjust the Granger test \(p\)-values for the number of tests. Here by the family of tests we understand the number of relationships tested between each \(i\)th spatial cell (\(i = 1, \dots,\) 3064) and a cell \(j\), \(j \neq i\). To control the false discovery rate in each family of tests, we use Benjamini–Hochberg (BH) or Benjamini–Yekutieli (BY) adjustment. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerful than other methods such as the Bonferroni correction. The BY procedure is more conservative than the BH procedure.

Code

tmp_BH <- apply(proc2002$Gp, 2, p.adjust, method = "BH")
tmp_BY <- apply(proc2002$Gp, 2, p.adjust, method = "BY")
summary(as.vector(tmp_BH[!is.na(tmp_BH)]))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.011   0.137   0.287   0.512   1.000

Code

summary(as.vector(tmp_BY[!is.na(tmp_BY)]))

#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.091   1.000   0.626   1.000   1.000

Code

proc2002$Gp <- tmp_BY #apply(proc2002$Gp, 2, p.adjust, method = "BY")
proc2011$Gp <- apply(proc2011$Gp, 2, p.adjust, method = "BY")

# Fill in the diagonal for easier assignment of the edge attributes
diag(proc2002$Gp) <- diag(proc2011$Gp) <- 1

Create weighted directed networks, where edge weight is the average weighted coefficient.

Code

# If subsample
# set.seed(123)
# ss <- sort(sample(3300, 500))
# If not subsample
ss <- 1:nrow(rca_cells)

Calculate network stats.

Closeness centrality captures the notion that a vertex is ‘central’ if it is ‘close’ to many other vertices. \[ C_{clos}(v) = \frac{1}{\sum_{u \in V} dist(v, u)}, \] where \(dist(v, u)\) is the geodesic distance (distance over the network) between the vertices \(u, v \in V\). Often the normalization factor \(N_v - 1\) is used so \(C_{clos}(v) \in [0, 1]\) (Section 4.2.2 in Kolaczyk and Csárdi 2014).

Eigenvector centrality measures a node’s importance by considering the importance of its neighbors. This makes the eigenvector centrality suitable for measuring importance of a node. It is represented by the principal eigenvector of the adjacency matrix, and is similar to the core of Google’s PageRank algorithm, which they use to rank web pages.

Code

# Create networks
# - based on penalized pairwise VAR coefficients
g2002 <- igraph::graph_from_adjacency_matrix(proc2002$Acoef[ss, ss]
                                             ,weighted = TRUE
                                             ,mode = "directed"
                                             ,diag = FALSE)
# - based on Granger p-values
g2002G <- igraph::graph_from_adjacency_matrix(proc2002$Gp[ss, ss] < alpha
                                             ,weighted = NULL
                                             ,mode = "directed"
                                             ,diag = FALSE) %>% 
    set_edge_attr("Coef", value = proc2002$Acoef[ss, ss][proc2002$Gp[ss, ss] < alpha]) %>% 
    set_edge_attr("Lag", value = proc2002$Alag[ss, ss][proc2002$Gp[ss, ss] < alpha])

# - based on penalized pairwise VAR coefficients
g2011 <- igraph::graph_from_adjacency_matrix(proc2011$Acoef[ss, ss]
                                             ,weighted = TRUE
                                             ,mode = "directed"
                                             ,diag = FALSE)
# - based on Granger p-values
g2011G <- igraph::graph_from_adjacency_matrix(proc2011$Gp[ss, ss] < alpha
                                             ,weighted = NULL
                                             ,mode = "directed"
                                             ,diag = FALSE) %>% 
    set_edge_attr("Coef", value = proc2011$Acoef[ss, ss][proc2011$Gp[ss, ss] < alpha]) %>% 
    set_edge_attr("Lag", value = proc2011$Alag[ss, ss][proc2011$Gp[ss, ss] < alpha])

# Network stats
D <- rca_cells[ss, ] %>%
    mutate(deg_in_2002 = degree(g2002, mode = "in"),
           deg_in_2011 = degree(g2011, mode = "in"),
           deg_in_2002G = degree(g2002G, mode = "in"),
           deg_in_2011G = degree(g2011G, mode = "in"),
           deg_out_2002 = degree(g2002, mode = "out"),
           deg_out_2011 = degree(g2011, mode = "out"),
           deg_out_2002G = degree(g2002G, mode = "out"),
           deg_out_2011G = degree(g2011G, mode = "out"),
           clos_out_2002 = closeness(g2002, mode = "out"),
           clos_out_2011 = closeness(g2011, mode = "out"),
           clos_out_2002G = closeness(g2002G, mode = "out"),
           clos_out_2011G = closeness(g2011G, mode = "out"),
           cent_2002 = eigen_centrality(g2002, directed = TRUE)$vector,
           cent_2011 = eigen_centrality(g2011, directed = TRUE)$vector,
           cent_2002G = eigen_centrality(g2002G, directed = TRUE)$vector,
           cent_2011G = eigen_centrality(g2011G, directed = TRUE)$vector
           )

Calculate correlations between node stats and O2.

Code

# Load O2 data
o2_2002 <- data.table::fread(paste0("../data_rca/rca_ts_2002_2.csv"),
                       select = c("DOAVEG_avg", "CellID", "Date")) %>%
    mutate(CellID = paste0("x", CellID)) %>%
    filter(!is.element(CellID, AtlanticCells))
o2_2002_avg <- o2_2002 %>% 
    group_by(CellID) %>% 
    summarise(O2_2002g = mean(DOAVEG_avg))
Dtmp <- merge(D, o2_2002_avg, by = "CellID")

# Load deseasonalized O2 data
o2_2002_ds <- readRDS("../dataderived/Dwide_mat_deseas_2002.rds")
o2_2002_ds_avg <- apply(o2_2002_ds, 2, mean)
o2_2002_ds_avg <- tibble(CellID = names(o2_2002_ds_avg), O2_2002g_ds = o2_2002_ds_avg)
Dtmp <- merge(Dtmp, o2_2002_ds_avg, by = "CellID")
Dtmp %>% 
    select(contains("2002G")) %>% 
    GGally::ggpairs()

3.2 Compare 2002 and 2011

Numeric summaries.

Code

D %>% 
    select(!c(CellID, H, DX, DY, LAT, LON, FSM, lat, lon)) %>% 
    psych::describe(quant = c(0.1, 0.25, 0.5, 0.75, 0.90)) #%>%

#>                vars    n    mean     sd  median trimmed     mad min  max range
#> Atlantic          1 3064     NaN     NA      NA     NaN      NA Inf -Inf  -Inf
#> deg_in_2002       2 3064 1631.07 957.23 1629.50 1638.82 1260.95   0 3063  3063
#> deg_in_2011       3 3064 1666.67 954.90 1751.00 1686.21 1269.11   0 3063  3063
#> deg_in_2002G      4 3064  659.80 460.45  584.00  621.00  465.54   0 2189  2189
#> deg_in_2011G      5 3064  501.66 387.54  435.50  467.56  448.49   0 1788  1788
#> deg_out_2002      6 3064 1631.07 789.05 1593.50 1620.06 1010.39 274 3063  2789
#> deg_out_2011      7 3064 1666.67 767.17 1640.00 1658.96  956.28 252 3063  2811
#> deg_out_2002G     8 3064  659.80 334.54  629.00  642.91  363.98  25 1681  1656
#> deg_out_2011G     9 3064  501.66 281.76  473.00  481.95  302.45  17 1588  1571
#> clos_out_2002    10 3064    0.00   0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2011    11 3064    0.00   0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2002G   12 3064    0.00   0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2011G   13 3064    0.00   0.00    0.00    0.00    0.00   0    0     0
#> cent_2002        14 3064    0.09   0.16    0.02    0.05    0.03   0    1     1
#> cent_2011        15 3064    0.12   0.18    0.04    0.08    0.06   0    1     1
#> cent_2002G       16 3064    0.30   0.23    0.25    0.28    0.25   0    1     1
#> cent_2011G       17 3064    0.23   0.21    0.17    0.20    0.22   0    1     1
#>                 skew kurtosis    se   Q0.1   Q0.25    Q0.5   Q0.75    Q0.9
#> Atlantic          NA       NA    NA     NA      NA      NA      NA      NA
#> deg_in_2002    -0.02    -1.30 17.29 314.60  786.75 1629.50 2490.25 2976.70
#> deg_in_2011    -0.13    -1.30 17.25 311.30  804.75 1751.00 2550.00 2939.00
#> deg_in_2002G    0.70    -0.06  8.32 102.30  299.00  584.00  939.25 1316.70
#> deg_in_2011G    0.62    -0.48  7.00  51.00  159.00  435.50  781.00 1053.40
#> deg_out_2002    0.10    -1.17 14.25 579.30  940.75 1593.50 2319.25 2756.70
#> deg_out_2011    0.07    -1.14 13.86 625.50 1014.00 1640.00 2312.25 2756.70
#> deg_out_2002G   0.39    -0.52  6.04 239.00  394.75  629.00  890.25 1124.00
#> deg_out_2011G   0.64     0.16  5.09 167.00  279.00  473.00  685.25  877.00
#> clos_out_2002   0.23    -1.24  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2011  -0.03    -1.02  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2002G  0.31    -0.13  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2011G  0.16     0.87  0.00   0.00    0.00    0.00    0.00    0.00
#> cent_2002       2.90     9.71  0.00   0.00    0.00    0.02    0.11    0.26
#> cent_2011       2.01     4.12  0.00   0.00    0.00    0.04    0.17    0.37
#> cent_2002G      0.72    -0.28  0.00   0.02    0.10    0.25    0.45    0.65
#> cent_2011G      0.94     0.16  0.00   0.01    0.04    0.17    0.35    0.54

Code

    # t()

3.2.1 VAR-based networks

Code

p1 <- ggplot(D, aes(x = deg_in_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & theme_light()

Figure 1: Degree distributions of the two networks *based on penalized pairwise VAR coefficients.*

Code

# Size of points on the map
sz = 1
# Aspect ratio for lat and lon
# https://stackoverflow.com/questions/66018518/aspect-ratio-for-lat-lon-plots
lat_mean <- mean(rca_cells$LAT)
rto <- 1 / cos(lat_mean * pi / 180)

p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 2: Maps of the node degrees in the two networks *based on penalized pairwise VAR coefficients.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011 - deg_in_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011 - deg_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
(p1 + p2) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 3: Maps of the node degree differences.*

Code

p1 <- ggplot(D, aes(x = clos_out_2002)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 4: Distributions of the closeness centrality of the networks *based on penalized pairwise VAR coefficients.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 5: Maps of the node closeness centrality in the two networks *based on penalized pairwise VAR coefficients.*

Code

p1 <- ggplot(D, aes(x = cent_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 6: Distributions of the eigenvector centrality of the two networks *based on penalized pairwise VAR coefficients.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 7: Maps of the node eigenvector centrality in the two networks *based on penalized pairwise VAR coefficients.*

3.2.2 Granger causality-based networks

Code

p1 <- ggplot(D, aes(x = deg_in_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & theme_light()

Figure 8: Degree distributions of the two networks *based on pairwise Granger causality tests.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 9: Maps of the node degrees in the two networks *based on pairwise Granger causality tests.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G - deg_in_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G - deg_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
(p1 + p2) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 10: Maps of the node degree differences.

Code

p1 <- ggplot(D, aes(x = clos_out_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 11: Distributions of the closeness centrality of the networks *based on pairwise Granger causality tests.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 12: Maps of the node closeness centrality in the two networks *based on pairwise Granger causality tests.*

Code

p1 <- ggplot(D, aes(x = cent_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 13: Distributions of the eigenvector centrality of the two networks *based on pairwise Granger causality tests.*

Code

p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 14: Maps of the node eigenvector centrality in the two networks *based on pairwise Granger causality tests.*

3.3 Visualize all networks

Code

YEARS = 1986L:2015L
alpha = 0.05
# ndims = 5

# edims <- numeric()
# E_ASE <- E_LSE <- numeric()
# Full loop on weighted net is about 5 minutes.
for (year in YEARS) { # year = 2002

    # Load data obtained on cluster
    Mcoef <- readRDS(paste0("../dataderived/CAUSpairs_", year, ".rds"))
    proc <- create_adj_pairs(Mcoef)

    # Thresholding / adjusting
    # proc$Acoef[proc$Acoef < 0.01] <- 0
    proc$Gp <- apply(proc$Gp, 2, p.adjust, method = "BY")
    diag(proc$Gp) <- 1

    # Unweighted directed adjacency matrix
    AM <- proc$Gp < alpha

    # Weighted directed adjacency matrix
    AMw <- proc$Acoef
    AMw[AM == 0] <- 0

    # Create graph, add attributes
    graphGranger <- igraph::graph_from_adjacency_matrix(AMw, weighted = TRUE
                                                        #AM, weighted = NULL
                                                        ,mode = "directed"
                                                        ,diag = FALSE)
    # ?cluster_walktrap
    # ?cluster_infomap 
    clusters <- cluster_walktrap(graphGranger)
    # print(clusters)
    # table(clusters$membership)
    Cluster <- as.factor(clusters$membership)
    # plot(graphGranger, vertex.color = membership(clusters))
    
    
    
    # igraph::is_weighted(graphGranger)
    deg_in = degree(graphGranger, mode = "in") %>% as_tibble()
    deg_out = degree(graphGranger, mode = "out") %>% as_tibble()
    
    p1 <- ggplot(deg_in, aes(x = value)) + 
        geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
        ylab("Density") + xlab("In-degree") + 
        ggtitle(year)
    p2 <- ggplot(deg_out, aes(x = value)) + 
        geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
        ylab("Density") + xlab("Out-degree")
    
    p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_in$value)) + 
        geom_polygon(data = shoreline, 
                     aes(x = long, y = lat, group = group), 
                     color = "black", fill = NA) + 
        geom_point(size = sz)
    p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out$value)) + 
        geom_polygon(data = shoreline, 
                     aes(x = long, y = lat, group = group), 
                     color = "black", fill = NA) + 
        geom_point(size = sz)
    
    
    p_cl <- ggplot(D, aes(x = lon, y = lat, color = Cluster)) + 
        geom_polygon(data = shoreline, 
                     aes(x = long, y = lat, group = group), 
                     color = "black", fill = NA) + 
        geom_point(size = sz) + 
        scale_color_viridis_d()
    
    print(
    p1 + p2 + p3 + p4 + p_cl + 
        plot_layout(nrow = 1) + 
        plot_annotation(tag_levels = 'A') & theme_light())
}

References

Kolaczyk ED, Csárdi G (2014) Statistical analysis of network data with R. Springer, New York