December 2020
age_cty <- age_cty %>% mutate(Age_gt_37 = Mean_Age > 37)
Combination | Count |
---|---|
Red-Red | 8 |
Red-Blue | 17 |
Blue-Blue | 33 |
\[ \textrm{E}(\textrm{BB}) = \frac{S_0}{2}\frac{n_B^{(2)}}{n^{(2)}} \]
\[ \textrm{Var}(\textrm{BB}) = \frac{S_1}{4}\left[\frac{n_B^{(2)}}{n^{(2)}} - \frac{2n_B^{(3)}}{n^{(3)}} + \frac{n_B^{(4)}}{n^{(4)}} \right] + \frac{S_2}{4}\left[\frac{n_B^{(3)}}{n^{(3)}} - \frac{n_B^{(4)}}{n^{(4)}} \right] + \frac{S_0^2n_B^{(4)}}{4n^{(4)}} - \textrm{E}(\textrm{BB})^2 \]
library(spdep) # A spatial statistics package for R col_cty <- ifelse(age_cty$Age_gt_37,'Red','Blue') %>% factor() joincount.test(col_cty, nb2listw(poly2nb(age_cty),style='B'))[[1]]
Join count test under nonfree sampling data: col_cty weights: nb2listw(poly2nb(age_cty), style = "B") Std. deviate for Blue = 2.6138, p-value = 0.004477 alternative hypothesis: greater sample estimates: Same colour statistic Expectation Variance 8.000000 3.747692 2.646642
spdep
librarycol_cty
is a factor variable for the colour of each countypoly2nb
extracts a neighbour list from a polygon sf
nb2listw
turns this into a listw
object - a weighted neighbour list, although here all the weights are 1 (specified by style="B"
)joincount.test
does the test - it returns a list with 2 results (one for each colour), we select the first item (for \(\textrm{BB}\))set.seed(19760604) # Reproducibility!! joincount.mc(col_cty, nb2listw(poly2nb(age_cty),style='B'),nsim=10000)[[1]]
Monte-Carlo simulation of join-count statistic data: col_cty weights: nb2listw(poly2nb(age_cty), style = "B") number of simulations + 1: 10001 Join-count statistic for Blue = 8, rank of observed statistic = 9868, p-value = 0.0133 alternative hypothesis: greater sample estimates: mean of simulation variance of simulation 3.733800 2.664204
joincount.mc
then compares the actual join count with the simulated distributionjoincount.mc
works pretty much like joincount.test
with an extra nsim
(number of simulations) parameter.\[ I = \frac{n}{\sum_{ij}W_{ij}}\frac{\sum_{ij}W_{ij}(x_i-\bar{x})(x_j-\bar{x})}{\sum_i(x_i-\bar{x})^2} \]
\[ I = \frac{\textsf{Mean of }(x_i - \bar{x})(x_j-\bar{x}) \textsf{ if i and j adjacent}}{\textsf{Variance of x}}\\ = \textsf{Mean of }{z_jz_j} \textsf{ for z-scores of adjacent zones} \]
style='W'
for \(W_{ij}\) as above)moran.plot(age_cty$Mean_Age, nb2listw(poly2nb(age_cty),style='W'))
moran.test(age_cty$Mean_Age, nb2listw(poly2nb(age_cty),style='W'))
Moran I test under randomisation data: age_cty$Mean_Age weights: nb2listw(poly2nb(age_cty), style = "W") Moran I statistic standard deviate = 4.2282, p-value = 1.178e-05 alternative hypothesis: greater sample estimates: Moran I statistic Expectation Variance 0.49267372 -0.04000000 0.01587125
set.seed(19981115) moran.mc(age_cty$Mean_Age, nb2listw(poly2nb(age_cty),style='W'),nsim=1000)
Monte-Carlo simulation of Moran I data: age_cty$Mean_Age weights: nb2listw(poly2nb(age_cty), style = "W") number of simulations + 1: 1001 statistic = 0.49267, observed rank = 1001, p-value = 0.000999 alternative hypothesis: greater
library(opendatatoronto) watermain <- search_packages("Watermain Breaks") %>% list_package_resources() %>% .[2,] %>% get_resource() neighbourhoods <- list_package_resources( "https://open.toronto.ca/dataset/neighbourhoods/") %>% get_resource() %>% st_transform(3857) cline <- search_packages("TCL") %>% list_package_resources() %>% .[1,] %>% get_resource() %>% st_transform(3857) %>% filter(FCODE_DESC %in% c("Major Arterial", "Minor Arterial")) ggplot(neighbourhoods) + geom_sf() + geom_sf(data=cline,col='darkred') + geom_sf(data=watermain %>% filter(BREAK_YEAR==2014))
RANN
package in Rnnd <- watermain %>% st_coordinates() %>% RANN::nn2(k=2) %>% .$nn.dists nnd <- nnd[,2] glue::glue( "Mean NND is {round(mean(nnd),2)} for {length(nnd)} points.")
Mean NND is 46.79 for 35465 points.
Skellam, JG, 1951, “Random Dispersal in Theoretical Populations”, Biometrika, 38, 196-218
sf
tools to estimate \(\lambda\)lambda <- length(nnd)/sum(st_area(neighbourhoods)) test_stat <- 2 * pi * lambda * sum(nnd^2) glue::glue( " Test statistic = {round(test_stat,1)} D.F. = {2*length(nnd)} p-value = {round(pchisq(test_stat,2*length(nnd)),2)}")
Test statistic = 37216.7 D.F. = 70930 p-value = 0
library(ggspatial) hexgrid <- st_make_grid(neighbourhoods,square=FALSE,n=30) %>% st_sf() %>% mutate(id=row_number()) hexgrid <- watermain %>% st_join(hexgrid) %>% count(id) %>% st_drop_geometry() %>% left_join(hexgrid,.) %>% mutate(n=ifelse(is.na(n),0,n)) ggplot(hexgrid) + annotation_map_tile(zoomin=0,type='osmgrayscale') + geom_sf(data=hexgrid,aes(fill=n),alpha=0.5,lwd=NA) + scale_fill_distiller(name='Breaks',direction=1, palette='Reds')