HyNet overview

Published

2023-05-09

1 Packages and functions

Load required packages and functions.

Code
library(dplyr)
library(ggplot2)
library(patchwork)

library(maptools)

library(igraph)
source("../code/fun_create_adj.R")

2 Data

Code
# Load Ches Bay shape file
shoreline <- maptools::readShapePoly("../CBnet_shoreline/final_shoreline.shp")
shoreline <- maptools::readShapePoly("../CBnet_shoreline/CBnet_Shoreline_simplify.shp")
rca_cells <- data.table::fread("../data_rca/rca_cells_2.csv") %>%
    filter(FSM == 1) %>%
    mutate(CellID = paste0("x", CellID))
#Use info from the shape file to find projections for the stations
###http://www.prj2epsg.org/search
###26918 - NAD_1983_UTM_Zone_18N
###http://spatialreference.org/ref/epsg/26918/
tmp <- rgdal::project(cbind(360 + rca_cells$LON, rca_cells$LAT),
                      proj = "+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
rca_cells <- rca_cells %>% 
    mutate(lon = tmp[, 1],
           lat = tmp[, 2])
Code
lagmax = 30

Load results for the years 2002 and 2011 with maximum lag of 30 days.

Code
# Load model coefficients
# Mcoef2002 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2002_coef_lagmax",
#                                       lagmax, ".rds")))
# Mcoef2011 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2011_coef_lagmax",
#                                       lagmax, ".rds")))
# Mcoef2002 <- readRDS("../dataderived/VARpairs_2002.rds")
# Mcoef2011 <- readRDS("../dataderived/VARpairs_2011.rds")
Mcoef2002 <- readRDS("../dataderived/CAUSpairs_2002.rds")
Mcoef2011 <- readRDS("../dataderived/CAUSpairs_2011.rds")

3 Network

3.1 Network construction

Process results into adjacency matrices.

Code
proc2002 <- create_adj_pairs(Mcoef2002)
proc2011 <- create_adj_pairs(Mcoef2011)

Explore coefficients.

Code
summary(as.vector(proc2002$Acoef[!is.na(proc2002$Acoef)]))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.000   0.057   0.091   0.142   0.863
Code
summary(as.vector(proc2011$Acoef[!is.na(proc2011$Acoef)]))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>    0.00    0.00    0.09    0.09    0.14   12.28

Additionally threshold coefficients.

Code
proc2002$Acoef[proc2002$Acoef < 0.01] <- 0
proc2011$Acoef[proc2011$Acoef < 0.01] <- 0

Explore lags.

Code
summary(as.vector(proc2002$Alag))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>       0       2      16      15      27      30    3386
Code
summary(as.vector(proc2011$Alag))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>       0      13      26      21      30      30    3386

Additionally adjust the Granger test \(p\)-values for the number of tests. Here by the family of tests we understand the number of relationships tested between each \(i\)th spatial cell (\(i = 1, \dots,\) 3386) and a cell \(j\), \(j \neq i\). To control the false discovery rate in each family of tests, we use Benjamini–Hochberg (BH) or Benjamini–Yekutieli (BY) adjustment. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerful than other methods such as the Bonferroni correction. The BY procedure is more conservative than the BH procedure.

Code
tmp_BH <- apply(proc2002$Gp, 2, p.adjust, method = "BH")
tmp_BY <- apply(proc2002$Gp, 2, p.adjust, method = "BY")
summary(as.vector(tmp_BH[!is.na(tmp_BH)]))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.017   0.167   0.302   0.538   1.000
Code
summary(as.vector(tmp_BY[!is.na(tmp_BY)]))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   0.000   0.145   1.000   0.659   1.000   1.000
Code
proc2002$Gp <- tmp_BY #apply(proc2002$Gp, 2, p.adjust, method = "BY")
proc2011$Gp <- apply(proc2011$Gp, 2, p.adjust, method = "BY")

Create weighted directed networks, where edge weight is the average weighted coefficient.

Code
# If subsample
# set.seed(123)
# ss <- sort(sample(3300, 500))
# If not subsample
ss <- 1:nrow(rca_cells)

Calculate network stats.

Closeness centrality captures the notion that a vertex is ‘central’ if it is ‘close’ to many other vertices. \[ C_{clos}(v) = \frac{1}{\sum_{u \in V} dist(v, u)}, \] where \(dist(v, u)\) is the geodesic distance (distance over the network) between the vertices \(u, v \in V\). Often the normalization factor \(N_v - 1\) is used so \(C_{clos}(v) \in [0, 1]\) (Section 4.2.2 in Kolaczyk and Csárdi 2014).

Eigenvector centrality measures a node’s importance by considering the importance of its neighbors. This makes the eigenvector centrality suitable for measuring importance of a node. It is represented by the principal eigenvector of the adjacency matrix, and is similar to the core of Google’s PageRank algorithm, which they use to rank web pages.

Code
# Create networks
# - based on penalized pairwise VAR coefficients
g2002 <- igraph::graph_from_adjacency_matrix(proc2002$Acoef[ss, ss]
                                             ,weighted = TRUE
                                             ,mode = "directed"
                                             ,diag = FALSE)
# - based on Granger p-values
g2002G <- igraph::graph_from_adjacency_matrix(proc2002$Gp[ss, ss] < alpha
                                             ,weighted = NULL
                                             ,mode = "directed"
                                             ,diag = FALSE)
# - based on penalized pairwise VAR coefficients
g2011 <- igraph::graph_from_adjacency_matrix(proc2011$Acoef[ss, ss]
                                             ,weighted = TRUE
                                             ,mode = "directed"
                                             ,diag = FALSE)
# - based on Granger p-values
g2011G <- igraph::graph_from_adjacency_matrix(proc2011$Gp[ss, ss] < alpha
                                             ,weighted = NULL
                                             ,mode = "directed"
                                             ,diag = FALSE)

# Network stats
D <- rca_cells[ss, ] %>%
    mutate(deg_in_2002 = degree(g2002, mode = "in"),
           deg_in_2011 = degree(g2011, mode = "in"),
           deg_in_2002G = degree(g2002G, mode = "in"),
           deg_in_2011G = degree(g2011G, mode = "in"),
           deg_out_2002 = degree(g2002, mode = "out"),
           deg_out_2011 = degree(g2011, mode = "out"),
           deg_out_2002G = degree(g2002G, mode = "out"),
           deg_out_2011G = degree(g2011G, mode = "out"),
           clos_out_2002 = closeness(g2002, mode = "out"),
           clos_out_2011 = closeness(g2011, mode = "out"),
           clos_out_2002G = closeness(g2002G, mode = "out"),
           clos_out_2011G = closeness(g2011G, mode = "out"),
           cent_2002 = eigen_centrality(g2002, directed = TRUE)$vector,
           cent_2011 = eigen_centrality(g2011, directed = TRUE)$vector,
           cent_2002G = eigen_centrality(g2002G, directed = TRUE)$vector,
           cent_2011G = eigen_centrality(g2011G, directed = TRUE)$vector
           )

3.2 Compare 2002 and 2011

Numeric summaries.

Code
D %>% 
    select(!c(CellID, H, DX, DY, LAT, LON, FSM, lat, lon)) %>% 
    psych::describe(quant = c(0.1, 0.25, 0.5, 0.75, 0.90)) #%>%
#>                vars    n    mean      sd  median trimmed     mad min  max range
#> deg_in_2002       1 3386 1787.74 1052.19 1766.00 1793.32 1378.82   0 3385  3385
#> deg_in_2011       2 3386 1849.02 1043.73 1930.50 1867.76 1416.62   1 3385  3384
#> deg_in_2002G      3 3386  636.81  478.67  550.00  590.43  490.74   0 2238  2238
#> deg_in_2011G      4 3386  593.23  501.72  504.00  530.30  523.36   0 1928  1928
#> deg_out_2002      5 3386 1787.74  882.50 1736.50 1775.47 1123.81 279 3385  3106
#> deg_out_2011      6 3386 1849.02  861.34 1818.00 1840.24 1082.30 293 3385  3092
#> deg_out_2002G     7 3386  636.81  351.73  589.00  614.56  389.92  16 1737  1721
#> deg_out_2011G     8 3386  593.23  354.76  542.00  571.52  386.96  11 1762  1751
#> clos_out_2002     9 3386    0.00    0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2011    10 3386    0.00    0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2002G   11 3386    0.00    0.00    0.00    0.00    0.00   0    0     0
#> clos_out_2011G   12 3386    0.00    0.00    0.00    0.00    0.00   0    0     0
#> cent_2002        13 3386    0.09    0.16    0.01    0.05    0.02   0    1     1
#> cent_2011        14 3386    0.03    0.07    0.01    0.02    0.01   0    1     1
#> cent_2002G       15 3386    0.27    0.23    0.22    0.25    0.23   0    1     1
#> cent_2011G       16 3386    0.27    0.25    0.22    0.25    0.29   0    1     1
#>                 skew kurtosis    se   Q0.1   Q0.25    Q0.5   Q0.75    Q0.9
#> deg_in_2002     0.00    -1.29 18.08 350.50  871.00 1766.00 2730.50 3278.00
#> deg_in_2011    -0.11    -1.31 17.94 377.50  893.00 1930.50 2812.75 3248.00
#> deg_in_2002G    0.77     0.03  8.23  65.00  253.25  550.00  924.00 1335.00
#> deg_in_2011G    0.87    -0.03  8.62  38.00  160.25  504.00  875.00 1344.50
#> deg_out_2002    0.10    -1.18 15.17 619.00 1010.25 1736.50 2539.75 3046.50
#> deg_out_2011    0.08    -1.15 14.80 684.50 1106.25 1818.00 2573.50 3078.50
#> deg_out_2002G   0.49    -0.45  6.04 208.00  350.00  589.00  880.75 1125.50
#> deg_out_2011G   0.49    -0.55  6.10 166.00  303.00  542.00  850.00 1134.50
#> clos_out_2002   0.18    -1.32  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2011   0.52    -0.76  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2002G  0.28    -0.08  0.00   0.00    0.00    0.00    0.00    0.00
#> clos_out_2011G  0.18     0.06  0.00   0.00    0.00    0.00    0.00    0.00
#> cent_2002       2.96    10.13  0.00   0.00    0.00    0.01    0.10    0.25
#> cent_2011       8.00    78.60  0.00   0.00    0.00    0.01    0.03    0.05
#> cent_2002G      0.83    -0.10  0.00   0.02    0.08    0.22    0.42    0.63
#> cent_2011G      0.68    -0.65  0.00   0.01    0.04    0.22    0.45    0.69
Code
    # t()

3.2.1 VAR-based networks

Code
p1 <- ggplot(D, aes(x = deg_in_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & theme_light()

Figure 1: Degree distributions of the two networks based on penalized pairwise VAR coefficients.

Code
# Size of points on the map
sz = 1
# Aspect ratio for lat and lon
# https://stackoverflow.com/questions/66018518/aspect-ratio-for-lat-lon-plots
lat_mean <- mean(rca_cells$LAT)
rto <- 1 / cos(lat_mean * pi / 180)

p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 2: Maps of the node degrees in the two networks based on penalized pairwise VAR coefficients.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011 - deg_in_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011 - deg_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
(p1 + p2) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 3: Maps of the node degree differences.*

Code
p1 <- ggplot(D, aes(x = clos_out_2002)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 4: Distributions of the closeness centrality of the networks based on penalized pairwise VAR coefficients.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 5: Maps of the node closeness centrality in the two networks based on penalized pairwise VAR coefficients.

Code
p1 <- ggplot(D, aes(x = cent_2002)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 6: Distributions of the eigenvector centrality of the two networks based on penalized pairwise VAR coefficients.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 7: Maps of the node eigenvector centrality in the two networks based on penalized pairwise VAR coefficients.

3.2.2 Granger causality-based networks

Code
p1 <- ggplot(D, aes(x = deg_in_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
    ylab("Density")
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & theme_light()

Figure 8: Degree distributions of the two networks based on pairwise Granger causality tests.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 9: Maps of the node degrees in the two networks based on pairwise Granger causality tests.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G - deg_in_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G - deg_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz) + 
    scale_color_continuous(type = "viridis")
(p1 + p2) +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 10: Maps of the node degree differences.

Code
p1 <- ggplot(D, aes(x = clos_out_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 11: Distributions of the closeness centrality of the networks based on pairwise Granger causality tests.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 12: Maps of the node closeness centrality in the two networks based on pairwise Granger causality tests.

Code
p1 <- ggplot(D, aes(x = cent_2002G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011G)) + 
    geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
    ylab("Density")
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light()

Figure 13: Distributions of the eigenvector centrality of the two networks based on pairwise Granger causality tests.

Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011G)) + 
    geom_polygon(data = shoreline, 
                 aes(x = long, y = lat, group = group), 
                 color = "black", fill = NA) + 
    geom_point(size = sz)
p1 + p2 +
    plot_annotation(tag_levels = 'A') & 
    theme_light() & 
    coord_fixed(ratio = rto)

Figure 14: Maps of the node eigenvector centrality in the two networks based on pairwise Granger causality tests.

References

Kolaczyk ED, Csárdi G (2014) Statistical analysis of network data with R. Springer, New York