HyNet overview
1 Packages and functions
Load required packages and functions.
Code
library(dplyr)
library(ggplot2)
library(patchwork)
library(maptools)
library(igraph)
source("../code/fun_create_adj.R")2 Data
Code
# Load Ches Bay shape file
shoreline <- maptools::readShapePoly("../CBnet_shoreline/final_shoreline.shp")
shoreline <- maptools::readShapePoly("../CBnet_shoreline/CBnet_Shoreline_simplify.shp")
rca_cells <- data.table::fread("../data_rca/rca_cells_2.csv") %>%
filter(FSM == 1) %>%
mutate(CellID = paste0("x", CellID))
#Use info from the shape file to find projections for the stations
###http://www.prj2epsg.org/search
###26918 - NAD_1983_UTM_Zone_18N
###http://spatialreference.org/ref/epsg/26918/
tmp <- rgdal::project(cbind(360 + rca_cells$LON, rca_cells$LAT),
proj = "+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
rca_cells <- rca_cells %>%
mutate(lon = tmp[, 1],
lat = tmp[, 2])Code
lagmax = 30Load results for the years 2002 and 2011 with maximum lag of 30 days.
Code
# Load model coefficients
# Mcoef2002 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2002_coef_lagmax",
# lagmax, ".rds")))
# Mcoef2011 <- as.matrix(readRDS(paste0("../dataderived/BigVAR_2011_coef_lagmax",
# lagmax, ".rds")))
# Mcoef2002 <- readRDS("../dataderived/VARpairs_2002.rds")
# Mcoef2011 <- readRDS("../dataderived/VARpairs_2011.rds")
Mcoef2002 <- readRDS("../dataderived/CAUSpairs_2002.rds")
Mcoef2011 <- readRDS("../dataderived/CAUSpairs_2011.rds")3 Network
3.1 Network construction
Process results into adjacency matrices.
Code
proc2002 <- create_adj_pairs(Mcoef2002)
proc2011 <- create_adj_pairs(Mcoef2011)Explore coefficients.
Code
summary(as.vector(proc2002$Acoef[!is.na(proc2002$Acoef)]))#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.000 0.000 0.057 0.091 0.142 0.863
Code
summary(as.vector(proc2011$Acoef[!is.na(proc2011$Acoef)]))#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.00 0.00 0.09 0.09 0.14 12.28
Additionally threshold coefficients.
Code
proc2002$Acoef[proc2002$Acoef < 0.01] <- 0
proc2011$Acoef[proc2011$Acoef < 0.01] <- 0Explore lags.
Code
summary(as.vector(proc2002$Alag))#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0 2 16 15 27 30 3386
Code
summary(as.vector(proc2011$Alag))#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0 13 26 21 30 30 3386
Additionally adjust the Granger test \(p\)-values for the number of tests. Here by the family of tests we understand the number of relationships tested between each \(i\)th spatial cell (\(i = 1, \dots,\) 3386) and a cell \(j\), \(j \neq i\). To control the false discovery rate in each family of tests, we use Benjamini–Hochberg (BH) or Benjamini–Yekutieli (BY) adjustment. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerful than other methods such as the Bonferroni correction. The BY procedure is more conservative than the BH procedure.
Code
tmp_BH <- apply(proc2002$Gp, 2, p.adjust, method = "BH")
tmp_BY <- apply(proc2002$Gp, 2, p.adjust, method = "BY")
summary(as.vector(tmp_BH[!is.na(tmp_BH)]))#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.000 0.017 0.167 0.302 0.538 1.000
Code
summary(as.vector(tmp_BY[!is.na(tmp_BY)]))#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.000 0.145 1.000 0.659 1.000 1.000
Code
proc2002$Gp <- tmp_BY #apply(proc2002$Gp, 2, p.adjust, method = "BY")
proc2011$Gp <- apply(proc2011$Gp, 2, p.adjust, method = "BY")Create weighted directed networks, where edge weight is the average weighted coefficient.
Code
# If subsample
# set.seed(123)
# ss <- sort(sample(3300, 500))
# If not subsample
ss <- 1:nrow(rca_cells)Calculate network stats.
Closeness centrality captures the notion that a vertex is ‘central’ if it is ‘close’ to many other vertices. \[ C_{clos}(v) = \frac{1}{\sum_{u \in V} dist(v, u)}, \] where \(dist(v, u)\) is the geodesic distance (distance over the network) between the vertices \(u, v \in V\). Often the normalization factor \(N_v - 1\) is used so \(C_{clos}(v) \in [0, 1]\) (Section 4.2.2 in Kolaczyk and Csárdi 2014).
Eigenvector centrality measures a node’s importance by considering the importance of its neighbors. This makes the eigenvector centrality suitable for measuring importance of a node. It is represented by the principal eigenvector of the adjacency matrix, and is similar to the core of Google’s PageRank algorithm, which they use to rank web pages.
Code
# Create networks
# - based on penalized pairwise VAR coefficients
g2002 <- igraph::graph_from_adjacency_matrix(proc2002$Acoef[ss, ss]
,weighted = TRUE
,mode = "directed"
,diag = FALSE)
# - based on Granger p-values
g2002G <- igraph::graph_from_adjacency_matrix(proc2002$Gp[ss, ss] < alpha
,weighted = NULL
,mode = "directed"
,diag = FALSE)
# - based on penalized pairwise VAR coefficients
g2011 <- igraph::graph_from_adjacency_matrix(proc2011$Acoef[ss, ss]
,weighted = TRUE
,mode = "directed"
,diag = FALSE)
# - based on Granger p-values
g2011G <- igraph::graph_from_adjacency_matrix(proc2011$Gp[ss, ss] < alpha
,weighted = NULL
,mode = "directed"
,diag = FALSE)
# Network stats
D <- rca_cells[ss, ] %>%
mutate(deg_in_2002 = degree(g2002, mode = "in"),
deg_in_2011 = degree(g2011, mode = "in"),
deg_in_2002G = degree(g2002G, mode = "in"),
deg_in_2011G = degree(g2011G, mode = "in"),
deg_out_2002 = degree(g2002, mode = "out"),
deg_out_2011 = degree(g2011, mode = "out"),
deg_out_2002G = degree(g2002G, mode = "out"),
deg_out_2011G = degree(g2011G, mode = "out"),
clos_out_2002 = closeness(g2002, mode = "out"),
clos_out_2011 = closeness(g2011, mode = "out"),
clos_out_2002G = closeness(g2002G, mode = "out"),
clos_out_2011G = closeness(g2011G, mode = "out"),
cent_2002 = eigen_centrality(g2002, directed = TRUE)$vector,
cent_2011 = eigen_centrality(g2011, directed = TRUE)$vector,
cent_2002G = eigen_centrality(g2002G, directed = TRUE)$vector,
cent_2011G = eigen_centrality(g2011G, directed = TRUE)$vector
)3.2 Compare 2002 and 2011
Numeric summaries.
Code
D %>%
select(!c(CellID, H, DX, DY, LAT, LON, FSM, lat, lon)) %>%
psych::describe(quant = c(0.1, 0.25, 0.5, 0.75, 0.90)) #%>%#> vars n mean sd median trimmed mad min max range
#> deg_in_2002 1 3386 1787.74 1052.19 1766.00 1793.32 1378.82 0 3385 3385
#> deg_in_2011 2 3386 1849.02 1043.73 1930.50 1867.76 1416.62 1 3385 3384
#> deg_in_2002G 3 3386 636.81 478.67 550.00 590.43 490.74 0 2238 2238
#> deg_in_2011G 4 3386 593.23 501.72 504.00 530.30 523.36 0 1928 1928
#> deg_out_2002 5 3386 1787.74 882.50 1736.50 1775.47 1123.81 279 3385 3106
#> deg_out_2011 6 3386 1849.02 861.34 1818.00 1840.24 1082.30 293 3385 3092
#> deg_out_2002G 7 3386 636.81 351.73 589.00 614.56 389.92 16 1737 1721
#> deg_out_2011G 8 3386 593.23 354.76 542.00 571.52 386.96 11 1762 1751
#> clos_out_2002 9 3386 0.00 0.00 0.00 0.00 0.00 0 0 0
#> clos_out_2011 10 3386 0.00 0.00 0.00 0.00 0.00 0 0 0
#> clos_out_2002G 11 3386 0.00 0.00 0.00 0.00 0.00 0 0 0
#> clos_out_2011G 12 3386 0.00 0.00 0.00 0.00 0.00 0 0 0
#> cent_2002 13 3386 0.09 0.16 0.01 0.05 0.02 0 1 1
#> cent_2011 14 3386 0.03 0.07 0.01 0.02 0.01 0 1 1
#> cent_2002G 15 3386 0.27 0.23 0.22 0.25 0.23 0 1 1
#> cent_2011G 16 3386 0.27 0.25 0.22 0.25 0.29 0 1 1
#> skew kurtosis se Q0.1 Q0.25 Q0.5 Q0.75 Q0.9
#> deg_in_2002 0.00 -1.29 18.08 350.50 871.00 1766.00 2730.50 3278.00
#> deg_in_2011 -0.11 -1.31 17.94 377.50 893.00 1930.50 2812.75 3248.00
#> deg_in_2002G 0.77 0.03 8.23 65.00 253.25 550.00 924.00 1335.00
#> deg_in_2011G 0.87 -0.03 8.62 38.00 160.25 504.00 875.00 1344.50
#> deg_out_2002 0.10 -1.18 15.17 619.00 1010.25 1736.50 2539.75 3046.50
#> deg_out_2011 0.08 -1.15 14.80 684.50 1106.25 1818.00 2573.50 3078.50
#> deg_out_2002G 0.49 -0.45 6.04 208.00 350.00 589.00 880.75 1125.50
#> deg_out_2011G 0.49 -0.55 6.10 166.00 303.00 542.00 850.00 1134.50
#> clos_out_2002 0.18 -1.32 0.00 0.00 0.00 0.00 0.00 0.00
#> clos_out_2011 0.52 -0.76 0.00 0.00 0.00 0.00 0.00 0.00
#> clos_out_2002G 0.28 -0.08 0.00 0.00 0.00 0.00 0.00 0.00
#> clos_out_2011G 0.18 0.06 0.00 0.00 0.00 0.00 0.00 0.00
#> cent_2002 2.96 10.13 0.00 0.00 0.00 0.01 0.10 0.25
#> cent_2011 8.00 78.60 0.00 0.00 0.00 0.01 0.03 0.05
#> cent_2002G 0.83 -0.10 0.00 0.02 0.08 0.22 0.42 0.63
#> cent_2011G 0.68 -0.65 0.00 0.01 0.04 0.22 0.45 0.69
Code
# t()3.2.1 VAR-based networks
Code
p1 <- ggplot(D, aes(x = deg_in_2002)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
(p1 + p2) / (p3 + p4) +
plot_annotation(tag_levels = 'A') & theme_light()Code
# Size of points on the map
sz = 1
# Aspect ratio for lat and lon
# https://stackoverflow.com/questions/66018518/aspect-ratio-for-lat-lon-plots
lat_mean <- mean(rca_cells$LAT)
rto <- 1 / cos(lat_mean * pi / 180)
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011 - deg_in_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz) +
scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011 - deg_out_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz) +
scale_color_continuous(type = "viridis")
(p1 + p2) +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = clos_out_2002)) +
geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011)) +
geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
ylab("Density")
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light()Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = cent_2002)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
ylab("Density")
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light()Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)3.2.2 Granger causality-based networks
Code
p1 <- ggplot(D, aes(x = deg_in_2002G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = deg_in_2011G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p3 <- ggplot(D, aes(x = deg_out_2002G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
p4 <- ggplot(D, aes(x = deg_out_2011G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 10, fill = "grey50") +
ylab("Density")
(p1 + p2) / (p3 + p4) +
plot_annotation(tag_levels = 'A') & theme_light()Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p3 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p4 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
(p1 + p2) / (p3 + p4) +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = deg_in_2011G - deg_in_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz) +
scale_color_continuous(type = "viridis")
p2 <- ggplot(D, aes(x = lon, y = lat, color = deg_out_2011G - deg_out_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz) +
scale_color_continuous(type = "viridis")
(p1 + p2) +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = clos_out_2002G)) +
geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = clos_out_2011G)) +
geom_histogram(aes(y = after_stat(density)), bins = 50, fill = "grey50") +
ylab("Density")
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light()Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = clos_out_2011G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)Code
p1 <- ggplot(D, aes(x = cent_2002G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
ylab("Density")
p2 <- ggplot(D, aes(x = cent_2011G)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.01, fill = "grey50") +
ylab("Density")
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light()Code
p1 <- ggplot(D, aes(x = lon, y = lat, color = cent_2002G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p2 <- ggplot(D, aes(x = lon, y = lat, color = cent_2011G)) +
geom_polygon(data = shoreline,
aes(x = long, y = lat, group = group),
color = "black", fill = NA) +
geom_point(size = sz)
p1 + p2 +
plot_annotation(tag_levels = 'A') &
theme_light() &
coord_fixed(ratio = rto)