ERGMs to predict movement across divisions in space

load data

# sample areas -----------------------------------------------------------------

library(geox)
arrs <- geox::rpops %>%
  filter(pop > 50e3
         ,rt == 'cz') %>%
  add.rns()

arrs %>%
  filter(pop > 500e3) %>%
  arrange(pop) %>%
  head(100) %>%
  pull(rn)
##   [1] "Saginaw"            "Biloxi"             "Hagerstown"        
##   [4] "Roanoke"            "Wilmington"         "Beaumont"          
##   [7] "Shreveport"         "Tyler"              "Peoria"            
##  [10] "Kalamazoo"          "Springfield"        "Corpus Christi"    
##  [13] "Anniston"           "Chattanooga"        "Winston-Salem"     
##  [16] "Fayetteville"       "Dover"              "Reno"              
##  [19] "Savannah"           "Jackson"            "Appleton"          
##  [22] "Fort Wayne"         "Lafayette"          "Johnson City"      
##  [25] "Newport News"       "South Augusta"      "Wichita"           
##  [28] "Erie"               "Racine"             "Provo"             
##  [31] "Lexington-Fayette"  "Huntsville"         "Rockford"          
##  [34] "Greenville"         "Santa Rosa"         "South Bend"        
##  [37] "Wilmington"         "Mobile"             "Fort Collins"      
##  [40] "Florence"           "Springfield"        "Gary"              
##  [43] "Canton"             "Madison"            "Daytona Beach"     
##  [46] "Fayetteville"       "Santa Barbara"      "Portland"          
##  [49] "Allentown"          "Youngstown"         "Boise City"        
##  [52] "Palm Bay"           "Spokane"            "Little Rock"       
##  [55] "Pensacola"          "Des Moines"         "Colorado Springs"  
##  [58] "Charleston"         "Lakeland"           "Toledo"            
##  [61] "Knoxville"          "Columbia"           "Modesto"           
##  [64] "Bakersfield"        "Scranton"           "Poughkeepsie"      
##  [67] "Albuquerque"        "Baton Rouge"        "Omaha"             
##  [70] "Honolulu"           "Sarasota"           "Tulsa"             
##  [73] "El Paso"            "Syracuse"           "Greenville"        
##  [76] "Cape Coral"         "Albany"             "Eugene"            
##  [79] "Birmingham"         "Dayton"             "Harrisburg"        
##  [82] "Tucson"             "Greensboro"         "Brick Township"    
##  [85] "Virginia Beach"     "Reading"            "Louisville"        
##  [88] "Richmond"           "Memphis"            "Manchester"        
##  [91] "New Orleans"        "Brownsville"        "Grand Rapids"      
##  [94] "Oklahoma City"      "Jacksonville"       "Providence"        
##  [97] "Nashville-Davidson" "Milwaukee"          "Fresno"            
## [100] "Indianapolis"
arrs %>%
  filter(grepl('Portland', rn))
## # A tibble: 2 x 4
##   rt    rid       pop rn      
##   <chr> <chr>   <dbl> <chr>   
## 1 cz    20100  729367 Portland
## 2 cz    38801 2340930 Portland
czsf <- build.CZs('20100')
plcsf <- geox::places.wrapper(x = czsf)

library(mapview)
czsf %>% mapview() + mapview(plcsf, color = 'red')
# for analysis, i want probably cz/cbsa (or maybe sometimes Place)
# for visuals, i want region network cropped to bbox
# for sample, i'll use Place
smplc <- plcsf %>%
  filter(grepl('^Portland', name)) %>%
  st_transform(4326)
smplc %>% mapview()
# get block groups for place ---------------------------------------------------

bgs <- geox::nbhds.from.sf(x = st_bbox(smplc)
                            ,query.fcn = tigris::block_groups)
bgs[3] %>% plot()

# remove water areas (per census naming conventions)

sbgs <- bgs %>% filter(substr(tractce,1,2) != '99')

# subset to polygon, not bboxx
sbgs <- st_make_valid(sbgs)
sbgs <- geox::get.spatial.overlap(sbgs, smplc
                                  ,'geoid', 'placefp')
# filter original census query to keep all columns
sbgs <- bgs %>%
  filter(geoid %in% sbgs$geoid)

sbgs['awater'] %>% plot()

# just b/c for visuals, remove islands in ad hoc way
sbgs <- sbgs %>% filter(tractce != '002400')

bounds <- st_sf(geometry = st_union(sbgs))

# load safegraph for area ------------------------------------------------------

# function loads all CZs intersecting with area
sfg <- sfx2sfg(bounds
               ,min.flows = 10
               ,tracts.or.groups = 'bg'
               ,trim.loops = T
               ,year=2019)

#trim to just place
sfgt <- sfg %>%
  filter(across(c(origin, dest)
                , ~.x %in% sbgs$geoid))

# turn to (undirected) graph
ugh <- sfg2gh(sfgt, directed = F)

ugh
## # A tbl_graph: 51 nodes and 734 edges
## #
## # An undirected simple graph with 1 component
## #
## # Edge Data: 734 x 5 (active)
##    from    to     n  year   tstr
##   <int> <int> <dbl> <dbl>  <dbl>
## 1     1     2  19.1  2019 0.0236
## 2     1     3  14.5  2019 0.0247
## 3     2     3  40.8  4038 0.0425
## 4     1     4  12.9  2019 0.0265
## 5     2     4  25.9  2019 0.0297
## 6     2     5  19.0  2019 0.0231
## # ... with 728 more rows
## #
## # Node Data: 51 x 1
##   name        
##   <chr>       
## 1 230050001001
## 2 230050001002
## 3 230050001003
## # ... with 48 more rows
ghsf <- spatialize.graph(ugh,
                         frame.sf = NULL
                         ,tracts.or.groups = 'bg'
                         ,directed = F
                         ,year = 2019)
# transform
ghsf <- st_transform(ghsf, 4326)

Look at graph

Summarise edges (trips and tie strength); visualize

ghsf
## # A sfnetwork with 51 nodes and 734 edges
## #
## # CRS:  EPSG:4326 
## #
## # An undirected simple graph with 1 component with spatially explicit edges
## #
## # Edge Data:     734 x 7 (active)
## # Geometry type: LINESTRING
## # Dimension:     XY
## # Bounding box:  xmin: -70.3228 ymin: 43.64758 xmax: -70.24002 ymax: 43.7189
##    from    to     n  year   tstr                                 geometry   dst
##   <int> <int> <dbl> <dbl>  <dbl>                         <LINESTRING [°]>   [m]
## 1     1     2  19.1  2019 0.0236 (-70.24891 43.67195, -70.25038 43.66637)  629.
## 2     1     3  14.5  2019 0.0247 (-70.24891 43.67195, -70.25169 43.66997)  313.
## 3     2     3  40.8  4038 0.0425 (-70.25038 43.66637, -70.25169 43.66997)  412.
## 4     1     4  12.9  2019 0.0265 (-70.24891 43.67195, -70.24002 43.66662)  928.
## 5     2     4  25.9  2019 0.0297 (-70.25038 43.66637, -70.24002 43.66662)  834.
## 6     2     5  19.0  2019 0.0231  (-70.25038 43.66637, -70.2444 43.66602)  483.
## # ... with 728 more rows
## #
## # Node Data:     51 x 2
## # Geometry type: POINT
## # Dimension:     XY
## # Bounding box:  xmin: -70.3228 ymin: 43.64758 xmax: -70.24002 ymax: 43.7189
##   name                     geometry
##   <chr>                 <POINT [°]>
## 1 230050001001 (-70.24891 43.67195)
## 2 230050001002 (-70.25038 43.66637)
## 3 230050001003 (-70.25169 43.66997)
## # ... with 48 more rows
sbgs
## Simple feature collection with 51 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -70.3474 ymin: 43.63897 xmax: -70.23619 ymax: 43.72769
## Geodetic CRS:  WGS 84
## First 10 features:
##    statefp countyfp tractce blkgrpce        geoid      namelsad mtfcc funcstat
## 1       23      005  002300        1 230050023001 Block Group 1 G5030        S
## 2       23      005  002300        2 230050023002 Block Group 2 G5030        S
## 3       23      005  002200        1 230050022001 Block Group 1 G5030        S
## 4       23      005  002200        2 230050022002 Block Group 2 G5030        S
## 5       23      005  002101        3 230050021013 Block Group 3 G5030        S
## 6       23      005  002101        4 230050021014 Block Group 4 G5030        S
## 7       23      005  002102        1 230050021021 Block Group 1 G5030        S
## 8       23      005  002101        2 230050021012 Block Group 2 G5030        S
## 9       23      005  002102        2 230050021022 Block Group 2 G5030        S
## 10      23      005  001300        3 230050013003 Block Group 3 G5030        S
##      aland awater    intptlat     intptlon                       geometry
## 1   861074 945502 +43.6952196 -070.2557216 POLYGON ((-70.27093 43.6841...
## 2  1537440  19107 +43.6931846 -070.2608912 POLYGON ((-70.26815 43.6857...
## 3  2944263      0 +43.6972159 -070.2738949 POLYGON ((-70.28791 43.6995...
## 4  1693103      0 +43.6885853 -070.2875085 POLYGON ((-70.29414 43.6806...
## 5  1126775      0 +43.7137072 -070.2953324 POLYGON ((-70.30229 43.7154...
## 6  1461836      0 +43.7031421 -070.2965963 POLYGON ((-70.30406 43.7108...
## 7  2114891    243 +43.6922525 -070.2976300 POLYGON ((-70.31553 43.6995...
## 8   959877      0 +43.7088326 -070.2870054 POLYGON ((-70.29286 43.7179...
## 9  6145224  79429 +43.6895576 -070.3007839 POLYGON ((-70.33637 43.6899...
## 10  240873      0 +43.6456455 -070.2686088 POLYGON ((-70.27746 43.6442...
# flow summaries -------------------------------------------------------------

edges <- ghsf %>%
  activate(edges) %>%
  as_tibble()

tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map(summary)
## $n
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.02   17.39   30.89   42.59   54.76  318.99 
## 
## $tstr
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002775 0.007469 0.011341 0.014216 0.016861 0.079718 
## 
## $dst
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   202.4  1502.3  2571.6  2878.9  4037.5  7786.9
tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map( ~quantile(.x, seq(0,1,.1)))
## $n
##        0%       10%       20%       30%       40%       50%       60%       70% 
##  10.01574  11.77308  15.05764  19.36020  25.16816  30.89337  38.03433  46.76472 
##       80%       90%      100% 
##  63.86684  86.54512 318.99439 
## 
## $tstr
##          0%         10%         20%         30%         40%         50% 
## 0.002774950 0.005540785 0.006776791 0.008154258 0.009643977 0.011340724 
##         60%         70%         80%         90%        100% 
## 0.013271820 0.015338410 0.019187395 0.024539496 0.079717728 
## 
## $dst
## Units: [m]
##        0%       10%       20%       30%       40%       50%       60%       70% 
##  202.4337  794.5940 1223.7401 1683.8339 2112.3178 2571.5879 3096.0358 3681.9834 
##       80%       90%      100% 
## 4361.6780 5498.3505 7786.9189
# trim by flow str -------------------------------------------------------------

bounds
## Simple feature collection with 1 feature and 0 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -70.3474 ymin: 43.63897 xmax: -70.23619 ymax: 43.72769
## Geodetic CRS:  WGS 84
##                         geometry
## 1 POLYGON ((-70.25029 43.6744...
sbgs
## Simple feature collection with 51 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -70.3474 ymin: 43.63897 xmax: -70.23619 ymax: 43.72769
## Geodetic CRS:  WGS 84
## First 10 features:
##    statefp countyfp tractce blkgrpce        geoid      namelsad mtfcc funcstat
## 1       23      005  002300        1 230050023001 Block Group 1 G5030        S
## 2       23      005  002300        2 230050023002 Block Group 2 G5030        S
## 3       23      005  002200        1 230050022001 Block Group 1 G5030        S
## 4       23      005  002200        2 230050022002 Block Group 2 G5030        S
## 5       23      005  002101        3 230050021013 Block Group 3 G5030        S
## 6       23      005  002101        4 230050021014 Block Group 4 G5030        S
## 7       23      005  002102        1 230050021021 Block Group 1 G5030        S
## 8       23      005  002101        2 230050021012 Block Group 2 G5030        S
## 9       23      005  002102        2 230050021022 Block Group 2 G5030        S
## 10      23      005  001300        3 230050013003 Block Group 3 G5030        S
##      aland awater    intptlat     intptlon                       geometry
## 1   861074 945502 +43.6952196 -070.2557216 POLYGON ((-70.27093 43.6841...
## 2  1537440  19107 +43.6931846 -070.2608912 POLYGON ((-70.26815 43.6857...
## 3  2944263      0 +43.6972159 -070.2738949 POLYGON ((-70.28791 43.6995...
## 4  1693103      0 +43.6885853 -070.2875085 POLYGON ((-70.29414 43.6806...
## 5  1126775      0 +43.7137072 -070.2953324 POLYGON ((-70.30229 43.7154...
## 6  1461836      0 +43.7031421 -070.2965963 POLYGON ((-70.30406 43.7108...
## 7  2114891    243 +43.6922525 -070.2976300 POLYGON ((-70.31553 43.6995...
## 8   959877      0 +43.7088326 -070.2870054 POLYGON ((-70.29286 43.7179...
## 9  6145224  79429 +43.6895576 -070.3007839 POLYGON ((-70.33637 43.6899...
## 10  240873      0 +43.6456455 -070.2686088 POLYGON ((-70.27746 43.6442...
# skipped; sample area seems small enough
trimmed.ghsf <- apply.flow.filters(ghsf
                                   ,frame.sf = NULL
                                   ,directed = F
                                   ,min.tie.str = NULL
                                   ,tie.str.deciles = 5 # filter out bottom x deciles
                                   ,min.flows = 10
                                   ,max.dst = units::set_units(20, 'miles'))

# map graph ------------------------------------------------------------

sbgs <- sbgs %>% st_transform(4326)

# get background for mapping
sttm <- visaux::get.stamen.bkg(
  sbgs
  , zoom = 12
  ,maptype = 'toner-background'
)

# lonlat matrix for notes
lonlats <- ghsf %>%
  activate('nodes') %>%
  as_tibble() %>%
  st_sf() %>%
  st_coordinates()

library(ggraph)

ggmap(sttm
      , base_layer =
        ggraph(trimmed.ghsf # ghsf #
               , layout = lonlats )
) +
  geom_edge_density(fill = '#00D0D0') +
  geom_edge_fan( aes( edge_alpha = tstr
                      ,edge_width = tstr)
                 ,color = '#008080'
  ) +
  flow.map.base() +
  visaux::bbox2ggcrop(bounds)

Generate divisions

Can use pre-loaded divisions. I kind of like just using the function to re-generate because it makes the process more self-contained and easier to adjust.

I allocate neighborhoods (defined as block groups) to sides-of-divisons based on interstates and all urban arterials for this example.

# merge in n'hood level ERGM controls -------------------------------

# (re)generate nhood divisions ----------------------------------------------

#' i was previously working with NHPN data (national highway planning network)
#' but it was often frustratingly messy. Switching to spatial census data, which is
#' much better, but more complex


#' Census codes for road features are referenced here:
#'
#' https://www2.census.gov/geo/pdfs/reference/mtfccs2019.pdf
#' (See MTFCCS: S1100, S1200, S1400)

sbgs
## Simple feature collection with 51 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -70.3474 ymin: 43.63897 xmax: -70.23619 ymax: 43.72769
## Geodetic CRS:  WGS 84
## First 10 features:
##    statefp countyfp tractce blkgrpce        geoid      namelsad mtfcc funcstat
## 1       23      005  002300        1 230050023001 Block Group 1 G5030        S
## 2       23      005  002300        2 230050023002 Block Group 2 G5030        S
## 3       23      005  002200        1 230050022001 Block Group 1 G5030        S
## 4       23      005  002200        2 230050022002 Block Group 2 G5030        S
## 5       23      005  002101        3 230050021013 Block Group 3 G5030        S
## 6       23      005  002101        4 230050021014 Block Group 4 G5030        S
## 7       23      005  002102        1 230050021021 Block Group 1 G5030        S
## 8       23      005  002101        2 230050021012 Block Group 2 G5030        S
## 9       23      005  002102        2 230050021022 Block Group 2 G5030        S
## 10      23      005  001300        3 230050013003 Block Group 3 G5030        S
##      aland awater    intptlat     intptlon                       geometry
## 1   861074 945502 +43.6952196 -070.2557216 POLYGON ((-70.27093 43.6841...
## 2  1537440  19107 +43.6931846 -070.2608912 POLYGON ((-70.26815 43.6857...
## 3  2944263      0 +43.6972159 -070.2738949 POLYGON ((-70.28791 43.6995...
## 4  1693103      0 +43.6885853 -070.2875085 POLYGON ((-70.29414 43.6806...
## 5  1126775      0 +43.7137072 -070.2953324 POLYGON ((-70.30229 43.7154...
## 6  1461836      0 +43.7031421 -070.2965963 POLYGON ((-70.30406 43.7108...
## 7  2114891    243 +43.6922525 -070.2976300 POLYGON ((-70.31553 43.6995...
## 8   959877      0 +43.7088326 -070.2870054 POLYGON ((-70.29286 43.7179...
## 9  6145224  79429 +43.6895576 -070.3007839 POLYGON ((-70.33637 43.6899...
## 10  240873      0 +43.6456455 -070.2686088 POLYGON ((-70.27746 43.6442...
rds <- tigris::roads(state = unique(sbgs$statefp)
                     ,county = unique(sbgs$countyfp)) %>%
  rename_with(tolower)

rds <- rds %>% st_transform(4326)
rds <- st_crop(rds, smplc)

# just interstates
interstates <- rds %>% 
  filter(rttyp %in% 'I')

# all primary & secondary roads (limited-access and arterials) and hwy ramps
arterials <- rds %>%
  filter(mtfcc %in%
           Hmisc::Cs(S1100, S1200, S1630))

# call divM function
bounds$placefp <- smplc$placefp

xnbd <- divM::gen.cross.tract.dividedness(
  bounds
  ,interstates
  ,nbd.query.fcn = tigris::block_groups
  ,region.id.colm = 'placefp'
  ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect
  ,erase.water = F
  ,year = 2019
) %>%
  select(geoid, int.poly = poly.id)


xnbd.a <- divM::gen.cross.tract.dividedness(
  bounds
  ,arterials
  ,nbd.query.fcn = tigris::block_groups
  ,region.id.colm = 'placefp'
  ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect
  ,erase.water = F
  ,year = 2019
) %>%
  select(geoid, arterial.poly = poly.id)

# merge in w/ nodes
ghsf <-
  ghsf %>%
  activate('nodes') %>%
  left_join(xnbd
            , by = c('name' = 'geoid')) %>% 
    left_join(xnbd.a
            , by = c('name' = 'geoid'))


# vis
ghsf %>%
  activate('nodes') %>%
  as_tibble() %>% st_sf() %>% mapview(zcol = 'int.poly') +
  mapview(interstates) +
  mapview(bounds)
ghsf %>%
  activate('nodes') %>%
  as_tibble() %>% st_sf() %>% mapview(zcol = 'arterial.poly') +
  mapview(arterials) +
  mapview(bounds)

Add other characteristics

Keep simple for this example

# get other tract-level characteristics ----------------------------------------

demos <- sfg.seg::demos.2019 %>%
  select(1,2,matches('wh$|bl$|hsp$'))

ghsf <-
  ghsf %>%
  activate('nodes') %>%
  left_join(demos
            , by = c('name' = 'geoid'))

#region-level demos
demos %>%
  select(matches('pop|^n')) %>%
  colSums() %>% format(big.mark = ',')
##           pop          n.wh          n.bl         n.hsp 
## "328,016,242" "197,132,096" " 39,980,733" " 61,755,289"
# play with democgraphic flow visualization ------------------------------------

# could be better if not undirected
dghsf <- ghsf %>%
  activate('edges') %>%
  mutate(flow.n.bl =
           n * (.N()$n.bl[from] + .N()$n.bl[to]) / 2
         ,flow.n.hsp =
           n * (.N()$n.hsp[from] + .N()$n.hsp[to]) / 2
         ,flow.n.wh =
           n * (.N()$n.wh[from] + .N()$n.wh[to]) / 2
  )
# .....

Run an ERGM

set.seed(1234)

# final setup for ergm ---------------------------------------------------------

# ergm can't handle a lot of things: no factors, no geometry, no bundled units, no
# NAs, etc....

# de-spatialize
ugh <- ghsf %>%
  as_tbl_graph() %>%
  activate('edges') %>%
  select(-geometry) %>%
  activate('nodes') %>%
  select(-geometry)

# remove units (should be meters)
ugh <- ugh %>%
  activate('edges') %>%
  mutate(dst = as.numeric(dst))

# convert using statnet packages
# hard to manipulate, but built for the ergm implementation
devtools::load_all()
library(statnet)
eugh <- ergm.clean.n.convert(ugh)

# run ergm ---------------------------------------------------------------------

#install.packages("ergm.count")
#install.packages("ergm.rank")
#install.packages("latentnet")
#update.packages()
library(ergm)
library(ergm.rank)
library(latentnet)

eugh
##  Network attributes:
##   vertices = 51 
##   directed = FALSE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 734 
##     missing edges= 0 
##     non-missing edges= 734 
## 
##  Vertex attribute names: 
##     arterial.poly int.poly n.bl n.hsp n.wh perc.bl perc.hsp perc.wh pop vertex.names 
## 
##  Edge attribute names: 
##     dst n tstr year
fo.base <- formula(
  eugh ~ edges +
    nodematch("int.poly", diff = FALSE) +
    #nodematch("hwy.poly", diff = FALSE) +
    absdiff("perc.wh", pow=1)
)
ugh
## # A tbl_graph: 51 nodes and 734 edges
## #
## # An undirected simple graph with 1 component
## #
## # Edge Data: 734 x 6 (active)
##    from    to     n  year   tstr   dst
##   <int> <int> <dbl> <dbl>  <dbl> <dbl>
## 1     1     2  19.1  2019 0.0236  629.
## 2     1     3  14.5  2019 0.0247  313.
## 3     2     3  40.8  4038 0.0425  412.
## 4     1     4  12.9  2019 0.0265  928.
## 5     2     4  25.9  2019 0.0297  834.
## 6     2     5  19.0  2019 0.0231  483.
## # ... with 728 more rows
## #
## # Node Data: 51 x 10
##   name   int.poly arterial.poly   pop  n.wh  n.bl n.hsp perc.wh perc.bl perc.hsp
##   <chr>  <fct>    <fct>         <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>    <dbl>
## 1 23005~ 7        121             519   487     0     0   0.938  0        0     
## 2 23005~ 7        121             867   781     0    68   0.901  0        0.0784
## 3 23005~ 7        121             782   718    13     0   0.918  0.0166   0     
## # ... with 48 more rows
# ergms take while to run. A good time to use Della
ergm1 <- ergm(formula = fo.base
              ,"n"
              ,reference = ~Poisson)

ergm1 %>% summary()
## Call:
## ergm(formula = fo.base, response = "n", reference = ~Poisson)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                        Estimate Std. Error MCMC % z value Pr(>|z|)
## nonzero                 -11.083     13.528      0  -0.819    0.413
## nodematch.sum.int.poly  -13.127         NA     NA      NA       NA
## absdiff.sum.perc.wh       1.267     30.427      0   0.042    0.967
## 
##      Null Deviance:     0  on 1275  degrees of freedom
##  Residual Deviance: -1050  on 1272  degrees of freedom
##  
## Note that the null model likelihood and deviance are defined to be 0.
## This means that all likelihood-based inference (LRT, Analysis of
## Deviance, AIC, BIC, etc.) is only valid between models with the same
## reference distribution and constraints.
## 
## AIC: -1044  BIC: -1029  (Smaller is better. MC Std. Err. = 10.16)

Incorporating Distance and Spatial Interaction Function

# generate spatial interaction fcn ---------------------------------------------

# my little SIF tutorial:
# https://rpubs.com/kmc39/sif

sfn <- ghsf %>%
  activate('nodes') %>%
  as_tibble()

sfe <- ghsf %>%
  activate('edges') %>%
  as_tibble()

# turn distance into numerics representing km
sfe <- sfe %>% mutate(dst = as.numeric(dst) / 1000 )

dst.mat <- build.pairwise.dst.matrix(sfn)
dst.mat %>% head() # values are kilometers
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]     [,7]
## [1,] 0.0000000 0.6306517 0.3133730 0.9284223 0.7525950 0.8926390 2.136144
## [2,] 0.6306517 0.0000000 0.4135880 0.8332277 0.4823137 0.3801857 1.552808
## [3,] 0.3133730 0.4135880 0.0000000 1.0097007 0.7328590 0.7623688 1.836698
## [4,] 0.9284223 0.8332277 1.0097007 0.0000000 0.3584428 0.6232966 2.176180
## [5,] 0.7525950 0.4823137 0.7328590 0.3584428 0.0000000 0.2975696 1.856713
## [6,] 0.8926390 0.3801857 0.7623688 0.6232966 0.2975696 0.0000000 1.561880
##          [,8]      [,9]     [,10]    [,11]     [,12]    [,13]    [,14]    [,15]
## [1,] 2.323630 0.9955458 1.1782157 1.998422 1.4414332 2.370744 2.665565 2.779401
## [2,] 1.696117 0.8807271 0.5475970 1.482462 0.9547701 1.912369 2.133219 2.162980
## [3,] 2.057720 0.7271432 0.9393668 1.686588 1.1290642 2.057910 2.354229 2.496344
## [4,] 2.123001 1.6822390 1.0864888 2.212853 1.7388021 2.677643 2.818205 2.642087
## [5,] 1.862794 1.3516897 0.7616538 1.867322 1.3824592 2.326207 2.485961 2.370808
## [6,] 1.571276 1.2403249 0.4670194 1.590294 1.1331190 2.058695 2.196988 2.075820
##         [,16]    [,17]    [,18]    [,19]    [,20]    [,21]    [,22]    [,23]
## [1,] 2.696962 2.496271 3.034928 2.921933 3.323017 3.188231 3.189973 2.011334
## [2,] 2.109208 1.882907 2.435018 2.343613 2.765798 2.695022 2.618873 1.795910
## [3,] 2.397364 2.211392 2.739935 2.618702 3.014347 2.874859 2.884367 1.736771
## [4,] 2.692430 2.391782 2.969831 2.940669 3.393506 3.412511 3.223134 2.628827
## [5,] 2.388507 2.108381 2.682820 2.634017 3.079512 3.073357 2.915603 2.276937
## [6,] 2.091206 1.811714 2.385577 2.337024 2.783806 2.789436 2.618774 2.090847
##         [,24]    [,25]    [,26]    [,27]    [,28]    [,29]    [,30]    [,31]
## [1,] 2.647417 2.777749 3.241730 4.120361 4.474590 3.632288 2.182517 2.351342
## [2,] 2.310214 2.546119 3.053679 3.892735 4.326906 3.492485 2.458709 2.452831
## [3,] 2.349487 2.504066 2.982259 3.852067 4.231241 3.389280 2.117736 2.196512
## [4,] 3.127015 3.378779 3.886832 4.724080 5.159093 4.323317 3.100755 3.205666
## [5,] 2.768618 3.023386 3.533902 4.367366 4.808420 3.974595 2.850540 2.903860
## [6,] 2.532358 2.818539 3.338311 4.152331 4.618215 3.792128 2.838868 2.821124
##         [,32]    [,33]    [,34]    [,35]    [,36]    [,37]    [,38]    [,39]
## [1,] 2.666912 3.379431 3.567083 4.331651 3.995861 5.628241 5.561291 3.985625
## [2,] 2.613335 3.409367 3.504927 4.277710 4.015144 5.629180 5.456876 3.600522
## [3,] 2.449542 3.199082 3.350988 4.120955 3.813254 5.441589 5.334551 3.682280
## [4,] 3.425992 4.199247 4.323172 5.095840 4.811015 6.434443 6.285648 4.388073
## [5,] 3.091654 3.878989 3.985294 4.758152 4.487572 6.105613 5.939185 4.033703
## [6,] 2.948531 3.759799 3.830686 4.601996 4.360570 5.965260 5.759905 3.772220
##         [,40]    [,41]    [,42]    [,43]    [,44]    [,45]    [,46]    [,47]
## [1,] 6.205575 5.989342 5.118975 5.957548 5.170438 5.709224 5.898353 5.626140
## [2,] 5.941275 6.484540 5.564456 6.384829 5.525160 6.026514 6.107043 5.804807
## [3,] 5.930244 6.078962 5.170681 5.996974 5.161677 5.678381 5.807765 5.519917
## [4,] 6.764678 6.863973 6.025246 6.869582 6.098103 6.637420 6.807995 6.525624
## [5,] 6.406260 6.741912 5.865318 6.700085 5.887043 6.409195 6.536175 6.243586
## [6,] 6.171864 6.839829 5.932134 6.756220 5.904037 6.406668 6.481877 6.176403
##         [,48]    [,49]    [,50]    [,51]
## [1,] 3.454124 3.613402 1.750762 2.100258
## [2,] 3.916304 3.875114 2.328375 2.642998
## [3,] 3.516553 3.546973 1.916888 2.229548
## [4,] 4.358181 4.533068 2.575750 2.960877
## [5,] 4.202458 4.279710 2.491683 2.850874
## [6,] 4.278917 4.254752 2.639762 2.975308
# binned relative frequencies (# of ties over distance, relative to crosswise
# distance distribution)
dst.freq <-
  relative.tie_dst.frequency(
    sfn, sfe
    , dst.mat = dst.mat
    , n.bins = 60
  )

dst.freq
## # A tibble: 50 x 5
##    dst.bin    dst.lvl xwise.count flow.count avg.flow
##    <fct>        <dbl>       <int>      <dbl>    <dbl>
##  1 (0.15,0.3]   0.225           6       365.     60.8
##  2 (0.3,0.45]   0.375          14      1071.     76.5
##  3 (0.45,0.6]   0.525          23      1276.     55.5
##  4 (0.6,0.75]   0.675          27      1323.     49.0
##  5 (0.75,0.9]   0.825          27      1284.     47.5
##  6 (0.9,1.05]   0.975          30      1434.     47.8
##  7 (1.05,1.2]   1.12           33      2005.     60.8
##  8 (1.2,1.35]   1.27           26      1020.     39.2
##  9 (1.35,1.5]   1.42           29      1222.     42.1
## 10 (1.5,1.65]   1.58           33      1171.     35.5
## # ... with 40 more rows
dst.freq %>%
  ggplot(aes(x = dst.lvl,
             y = avg.flow))  +
  geom_path()

# log-log'd
dst.freq %>%
  ggplot(aes(x = log(dst.lvl),
             y = log(avg.flow))) +
  geom_path()

# SIF typically shows decreasing flows with distance, until a certain point where the
# relationship is lost. About 2km here.

# use flow bins to parameterize a power law SIF
# below relies on the `dst.freq` in the general environment
param.fitting <-
param.fitting <-
  stats::optim(par= c(1,1.7,1, 10)
               ,fn = function(x) power.law.loss.fcn(x, dst.freq)
               , hessian = T)

# predicted tie probabilities, based on SIF
dst.freq$flow.hat <-
  power.law(dst.freq$dst.lvl,
            param.fitting$par[1],
            param.fitting$par[2],
            param.fitting$par[3]
  )

dst.freq %>%
  select(dst.lvl, avg.flow, flow.hat) %>%
  pivot_longer(cols = contains("flow"),
               values_to = "avg.flow") %>%
  ggplot() +
  geom_path(aes(color = name,
                y = avg.flow,
                x = dst.lvl)) +
  ggtitle("Relative avg flows",
          "actual vs. fitted power law")

decay.mat <- dst.mat %>%
  power.law(param.fitting$par[1],
            param.fitting$par[2],
            param.fitting$par[3])

ERGMS with SIF

# ERGM with sif, only interstates ----------------------------------------------------------------

fo.sif <-
  formula(
    eugh ~ edges +
      nodematch("int.poly", diff = FALSE) +
      #nodematch("hwy.poly", diff = FALSE) +
      absdiff("perc.wh", pow=1) +
      edgecov(decay.mat, "dst")
  )
eugh
##  Network attributes:
##   vertices = 51 
##   directed = FALSE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 734 
##     missing edges= 0 
##     non-missing edges= 734 
## 
##  Vertex attribute names: 
##     arterial.poly int.poly n.bl n.hsp n.wh perc.bl perc.hsp perc.wh pop vertex.names 
## 
##  Edge attribute names: 
##     dst n tstr year
# ergms take while to run. A good time to use Della
ergm2 <- ergm(formula = fo.sif
              ,"tstr"
              ,reference = ~Poisson)

ergm2 %>% summary()
## Call:
## ergm(formula = fo.sif, response = "tstr", reference = ~Poisson)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                        Estimate Std. Error MCMC % z value Pr(>|z|)
## nonzero                 -10.793     28.346      0  -0.381    0.703
## nodematch.sum.int.poly   -1.616     26.916      0  -0.060    0.952
## absdiff.sum.perc.wh       0.815     91.309      0   0.009    0.993
## edgecov.sum.dst          -0.038      1.172      0  -0.032    0.974
## 
##      Null Deviance:     0  on 1275  degrees of freedom
##  Residual Deviance: -1076  on 1271  degrees of freedom
##  
## Note that the null model likelihood and deviance are defined to be 0.
## This means that all likelihood-based inference (LRT, Analysis of
## Deviance, AIC, BIC, etc.) is only valid between models with the same
## reference distribution and constraints.
## 
## AIC: -1068  BIC: -1048  (Smaller is better. MC Std. Err. = 3.985)
# ERGM with sif with arterials ----------------------------------------------------------------

fo.sif <-
  formula(
    eugh ~ edges +
      nodematch("arterial.poly", diff = FALSE) +
      #nodematch("hwy.poly", diff = FALSE) +
      absdiff("perc.wh", pow=1) +
      edgecov(decay.mat, "dst")
  )
eugh
##  Network attributes:
##   vertices = 51 
##   directed = FALSE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 734 
##     missing edges= 0 
##     non-missing edges= 734 
## 
##  Vertex attribute names: 
##     arterial.poly int.poly n.bl n.hsp n.wh perc.bl perc.hsp perc.wh pop vertex.names 
## 
##  Edge attribute names: 
##     dst n tstr year
# ergms take while to run. A good time to use Della
ergm2 <- ergm(formula = fo.sif
              ,"tstr"
              ,reference = ~Poisson)

ergm2 %>% summary()
## Call:
## ergm(formula = fo.sif, response = "tstr", reference = ~Poisson)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                             Estimate Std. Error MCMC % z value Pr(>|z|)
## nonzero                     -8.92775   23.62625      0  -0.378    0.706
## nodematch.sum.arterial.poly  2.01767   54.01709      0   0.037    0.970
## absdiff.sum.perc.wh         -0.07279   60.11763      0  -0.001    0.999
## edgecov.sum.dst             -0.08414    1.43223      0  -0.059    0.953
## 
##      Null Deviance:     0  on 1275  degrees of freedom
##  Residual Deviance: -1072  on 1271  degrees of freedom
##  
## Note that the null model likelihood and deviance are defined to be 0.
## This means that all likelihood-based inference (LRT, Analysis of
## Deviance, AIC, BIC, etc.) is only valid between models with the same
## reference distribution and constraints.
## 
## AIC: -1064  BIC: -1043  (Smaller is better. MC Std. Err. = 8.139)