Dependencies

If you don’t have these installed…

pacakge_list <- c("igraph", "tidyverse", "ggmap", "purrr", "sp")
new_packages <- pacakge_list[!(pacakge_list %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
                 # notes:
library(dplyr)   # tidyverse: dataframe manipulation
library(tidyr)   # tidyverse: data tidying
library(stringr) # tidyverse: improves base R's string manipulation
library(igraph)  # graph tools
library(purrr)   # improves base R's functional programming
library(ggmap)   # spatial tools to interact with ggplot2
library(ggplot2) # tidyverse: actually more basic than base R plotting

Graph Data Prep

Clean Edgelist, Create Initial igraph Object

  • load edgelist
  • clean country names with stringr::str_replace_all
    • to normalize names for geocode()ing later
  • create igraph object
el <- readr::read_csv("~/Master Edgelist.csv") %>%
  mutate_all(str_replace_all, "US", "USA") %>%
  mutate_all(str_replace_all, "DPRK", "North Korea") %>%
  mutate_all(str_replace_all, "UAE", "United Arab Emirates") %>%
  mutate_all(str_replace_all, "UAE", "United Arab Emirates") %>%
  mutate_all(str_replace_all, "BVI", "Virgin Islands")
## Parsed with column specification:
## cols(
##   node_1 = col_character(),
##   node_2 = col_character()
## )
g <- graph_from_data_frame(el, directed = FALSE)

Convert Parallel Edges to Weights

  • add weight attribute to Edges of graph
  • simplify the graph by
    • remove.multiple() edges
    • combining them into an Edge attribute
      • which is calculated by "summing" the weights of redundant Edges
  • assign the results to g_2
E(g)$weight <- 1

g_2 <- igraph::simplify(g, remove.multiple = TRUE,
                        edge.attr.comb = list(weight = "sum"))

Create new_el by as_edgelist()ing g_2.

new_el <- g_2 %>% as_edgelist()

Spatial Data Prep

Geocoded Node Coordinates

  • write a custom function to
    • get_geocoded_coords()
      • by passing the country names that are g_2’s nodes to
      • ggmap::geocode()
      • which queries the Google API
        • which we paste0() into a character object
          • and collapse the results with "," so they are easily separable
get_geocoded_coords <- function(country){
  coords <- geocode(country) %>%
    as.list() %>%
    paste0(collapse = ",")
  }
  • take g_2’s Vertex $names
    • make them as_tibble() (data.frame on steroids)
      • mutate() the tibble by
      • purrr::mapping our custom get_geocoded_coords() function
        • over the values of our V(g_2)’s $names (the country names)
    • separate() the character object on the "," separator
      • creating 2 new columns called "long" and "lat"
    • mutate_at() the "long" and "lat" columns to make them as.numeric()
    • assign the results to node_coords
node_coords <- V(g_2)$name %>%
  as_tibble() %>%
  mutate(coords = purrr::map(value, get_geocoded_coords)) %>%
  separate(coords, c("long", "lat"), sep = ",") %>%
  mutate_at(c("long", "lat"), as.numeric)

Bounding Box

  • take node_coords
    • remove the the value column by select()ing it with a subtracting -
    • drop_na() all values that are NA
    • convert it into a dedicated sp::SpatialPointsDataFrame()
    • created a bounding bbox()
      • and expand it slightly by multiplying it with * 1.1
    • assign the results to node_bounds
node_bounds <- node_coords %>%
  select(-value) %>%
  drop_na %>%
  sp::SpatialPointsDataFrame(coords = .[,1:2],
                             data = .) %>%
  sp::bbox() * 1.1

Assign Coordinates to Edges Pairs

  • make g_2 as_edgelist()
    • make it as_tibble() (data.frame on steroids)
    • full_join() the node_coords twice
      • matching "V1" to the country name "value"s the first time
      • matching "V2" to the country name "value"s the second time
    • drop_na() on V1, V2 to remove rows with NA values in those columns
    • assign the results to el_coords
el_coords <-
  as_edgelist(g_2) %>%
    as_tibble() %>%
    full_join(node_coords, by = c("V1" = "value")) %>%
    full_join(node_coords, by = c("V2" = "value")) %>%
    drop_na(V1, V2)

Base Map Data

  • create a base map by
    • using ggplot2’s map_data() function and specifying "world"
      • filter() to only keep regions (countries) that are not "Antarctica"
    • assign the results to map_world
map_world <- map_data(map = "world") %>%
  filter(region != "Antarctica")
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map

Plots

Defining the Base Plot

  • build the base plot using ggplot2::ggplot()
    • add geom_polygon()s for our base map
    • add geom_segment()s for our edges
    • add geom_point()s for our nodes
    • specify theme parameters

ggplot2 is a monster, but you can start exploring here

base_gg <- ggplot() +
  geom_polygon(data = map_world,
               aes(long, lat, group = group, fill = region), 
               show.legend = FALSE,
               alpha = 0.25,
               color = "white") +
  geom_segment(data = el_coords, 
               aes(x = long.x, xend = long.y,
                   y = lat.x, yend = lat.y),
               # arrow = arrow(length = unit(0.1, "inches"), # optional arrows
                             # type = "closed"),
               size = 0.25,
               alpha = 0.5) +
  geom_point(data = node_coords,
             aes(long, lat), color = "red") +
  theme_bw() +
  theme(axis.line = element_line(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank()) +
  labs(title = "Geospatial Network",
       subtitle = "Plot Example",
       caption = "Brendan Knapp")

Various Coordinate Projections

base_gg +
  coord_cartesian()

base_gg +
  coord_fixed()

base_gg +
  coord_map("fisheye", 50)

base_gg +
  coord_map("ortho", orientation=c(30, 10, 0))

base_gg +
  coord_map("azequidistant")

base_gg +
  coord_map("azequalarea")

base_gg +
  coord_map("orthographic")

base_gg +
  coord_map("newyorker", 5)

base_gg +
  coord_polar()

base_gg +
  coord_trans()

base_gg +
  coord_map("fisheye", 50)

base_gg +
  coord_map("polyconic",
            xlim = c(node_bounds["long", "min"],
                     node_bounds["long", "max"]),
            ylim = c(node_bounds["lat", "min"],
                     node_bounds["lat", "max"]))

sessionInfo()

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 15063)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] maps_3.2.0         bindrcpp_0.2       ggmap_2.6.1       
## [4] ggplot2_2.2.1.9000 purrr_0.2.4        igraph_1.1.2      
## [7] stringr_1.2.0      tidyr_0.7.2        dplyr_0.7.4       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.14      compiler_3.4.3    plyr_1.8.4       
##  [4] bindr_0.1         tools_3.4.3       digest_0.6.12    
##  [7] evaluate_0.10.1   tibble_1.3.4      gtable_0.2.0     
## [10] lattice_0.20-35   pkgconfig_2.0.1   png_0.1-7        
## [13] rlang_0.1.4       mapproj_1.2-5     yaml_2.1.15      
## [16] proto_1.0.0       knitr_1.17.20     hms_0.4.0        
## [19] RgoogleMaps_1.4.1 tidyselect_0.2.3  rprojroot_1.2    
## [22] grid_3.4.3        glue_1.2.0        R6_2.2.2         
## [25] jpeg_0.1-8        rmarkdown_1.8     sp_1.2-5         
## [28] readr_1.1.1       reshape2_1.4.2    magrittr_1.5     
## [31] backports_1.1.1   scales_0.5.0.9000 htmltools_0.3.6  
## [34] assertthat_0.2.0  colorspace_1.3-2  geosphere_1.5-7  
## [37] labeling_0.3      stringi_1.1.6     lazyeval_0.2.1   
## [40] munsell_0.4.3     rjson_0.2.15