install.packages(c(“tigris”, “GWmodel”, “dplyr”))
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(GWmodel)
## Loading required package: maptools
## Loading required package: sp
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
## Welcome to GWmodel version 2.2-7.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(tmap)
library(rgdal)
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-27, (SVN revision 1148)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/sf/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
library(dplyr)
setwd(“~/Desktop/CP 8883/Assignments/Untitled_Message”)
restaurantPts= read.csv('restaurantPts.csv')
OLSModel <- lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
OLSModel
##
## Call:
## lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
##
## Coefficients:
## (Intercept) old_pct bachelor_pct clinton_pct
## 2276.367 -3.093 -9.771 -15.901
summary(OLSModel)
##
## Call:
## lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2105.9 -1404.0 -1095.4 -671.8 21338.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2276.3671 16.8504 135.093 < 2e-16 ***
## old_pct -3.0934 0.5784 -5.348 8.91e-08 ***
## bachelor_pct -9.7705 0.3155 -30.971 < 2e-16 ***
## clinton_pct -15.9013 0.2789 -57.013 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3831 on 793318 degrees of freedom
## Multiple R-squared: 0.009048, Adjusted R-squared: 0.009045
## F-statistic: 2415 on 3 and 793318 DF, p-value: < 2.2e-16
#---- Prepare data for on the County level ----#
## Retrieve a county level shapefile from the census. the resolution is 20 m, which means that it is not a big dataset, it is generalized. Make GEOID an integer, because we will be joining it to an integer field below.
options(tigris_use_cache=TRUE)
cnty <- counties(cb=TRUE, resolution="20m", year=2019) %>% mutate(GEOID = as.integer(GEOID))
## Produce summary statistics on our data on the field "COUNTY"
GWR_input = restaurantPts %>%
select(c(County, fn, gn, old_pct, bachelor_pct, clinton_pct, median_income, pop_density)) %>%
mutate(median_income = median_income/10000, pop_density = pop_density/1000, gn = gn * 100) %>%
group_by(County) %>%
summarise_all(list(mean = ~mean(.), n=~n()))
## Delete those extra hanging columns---they all say the same thing and delete counties w/ fewer than 10 restaurants.
GWR_input <- GWR_input[,-c(10:15)]
GWR_input <- filter(GWR_input, fn_n > 10)
## Join the data to the county shapefile!
GWR_inputGeo = GWR_input %>%
left_join(cnty %>% filter(GEOID %in% GWR_input$County) %>%
select(c(GEOID, geometry)), by=c('County' = 'GEOID'), copy=FALSE) %>% st_as_sf() %>% as_Spatial()
##Let's take a look at colors (uncomment me!)
#install.packages("shinyjs")
#tmaptools::palette_explorer()
library(RColorBrewer)
## Make your map: You can ignore the warnings.
tmap_mode('plot')
## tmap mode set to plotting
tm_shape(cnty, bbox = st_bbox(GWR_inputGeo)) +
tm_polygons(col = "gray") +
tm_shape(GWR_inputGeo) +
tm_polygons("gn_mean", style="fixed", palette = "BrBG", breaks=c(0, 10, 20, 30, 40, 50, max(GWR_inputGeo$gn_mean)))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
#------END CNTY -----------------#
##Prints in the console!
########### We need to change the projection to a meter based projection. W are using lambert conformal conic
GWR_inputGeo <- spTransform(GWR_inputGeo, CRS("+proj=lcc +lon_0=-90 +lat_1=33 +lat_2=45"))
########## Let's figure out our bandwidth: first let's use number of neighbors
myBandwidthKNeighbors <- bw.gwr(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
adaptive = T,
data=GWR_inputGeo)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Adaptive bandwidth: 1778 CV score: 937594260
## Adaptive bandwidth: 1107 CV score: 916219188
## Adaptive bandwidth: 691 CV score: 892861737
## Adaptive bandwidth: 435 CV score: 873899444
## Adaptive bandwidth: 275 CV score: 862814121
## Adaptive bandwidth: 178 CV score: 863593215
## Adaptive bandwidth: 337 CV score: 866148677
## Adaptive bandwidth: 239 CV score: 861989901
## Adaptive bandwidth: 214 CV score: 862115357
## Adaptive bandwidth: 251 CV score: 862185663
## Adaptive bandwidth: 227 CV score: 862003375
## Adaptive bandwidth: 241 CV score: 862041658
## Adaptive bandwidth: 232 CV score: 861967351
## Adaptive bandwidth: 233 CV score: 862042394
## Adaptive bandwidth: 236 CV score: 861992148
## Adaptive bandwidth: 233 CV score: 862042394
## Adaptive bandwidth: 234 CV score: 861991447
## Adaptive bandwidth: 233 CV score: 862042394
## Adaptive bandwidth: 233 CV score: 862042394
## Adaptive bandwidth: 232 CV score: 861967351
######### Let's figure out our bandwidth: second, let's use distance!
myBandwidthDistance <- bw.gwr(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
adaptive = F,
data=GWR_inputGeo)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 2822927 CV score: 989790993
## Fixed bandwidth: 1745014 CV score: 943220953
## Fixed bandwidth: 1078827 CV score: 909492711
## Fixed bandwidth: 667100.5 CV score: 882730896
## Fixed bandwidth: 412639.7 CV score: 871354444
## Fixed bandwidth: 255374.3 CV score: 937868165
## Fixed bandwidth: 509835.1 CV score: 874570875
## Fixed bandwidth: 352569.6 CV score: 876062519
## Fixed bandwidth: 449765 CV score: 872209641
## Fixed bandwidth: 389695 CV score: 871788558
## Fixed bandwidth: 426820.3 CV score: 871533169
## Fixed bandwidth: 403875.6 CV score: 871405725
## Fixed bandwidth: 418056.2 CV score: 871389409
## Fixed bandwidth: 409292.1 CV score: 871357822
## Fixed bandwidth: 414708.6 CV score: 871361994
## Fixed bandwidth: 411361 CV score: 871353364
## Fixed bandwidth: 410570.8 CV score: 871354132
## Fixed bandwidth: 411849.4 CV score: 871353447
## Fixed bandwidth: 411059.2 CV score: 871353525
## Fixed bandwidth: 411547.6 CV score: 871353346
## Fixed bandwidth: 411662.9 CV score: 871353365
## Fixed bandwidth: 411476.3 CV score: 871353346
## Fixed bandwidth: 411432.3 CV score: 871353350
## Fixed bandwidth: 411503.6 CV score: 871353345
## Fixed bandwidth: 411520.4 CV score: 871353345
## Fixed bandwidth: 411493.2 CV score: 871353345
## Fixed bandwidth: 411510 CV score: 871353345
## Fixed bandwidth: 411513.9 CV score: 871353345
## Fixed bandwidth: 411507.5 CV score: 871353345
## Fixed bandwidth: 411511.5 CV score: 871353345
## Fixed bandwidth: 411509 CV score: 871353345
## Fixed bandwidth: 411510.6 CV score: 871353345
## Fixed bandwidth: 411509.6 CV score: 871353345
## Fixed bandwidth: 411510.2 CV score: 871353345
print(paste("Best number of neighbors for my kernel is:", myBandwidthKNeighbors))
## [1] "Best number of neighbors for my kernel is: 232"
print(paste("Best distance for my kernel is:", as.integer(myBandwidthDistance)/1000, "KM"))
## [1] "Best distance for my kernel is: 411.509 KM"
myGWR_model <- gwr.basic(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
adaptive = T,
data = GWR_inputGeo,
bw = myBandwidthKNeighbors)
## Warning in proj4string(data): CRS object has comment, which is lost in output
print(myGWR_model)
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2021-09-30 16:51:00
## Call:
## gwr.basic(formula = fn_mean ~ old_pct_mean + bachelor_pct_mean +
## clinton_pct_mean, data = GWR_inputGeo, bw = myBandwidthKNeighbors,
## adaptive = T)
##
## Dependent (y) variable: fn_mean
## Independent variables: old_pct_mean bachelor_pct_mean clinton_pct_mean
## Number of data points: 2865
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1950.8 -343.8 -4.4 348.8 3467.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2891.0184 59.9794 48.200 < 2e-16 ***
## old_pct_mean -35.3500 2.5779 -13.713 < 2e-16 ***
## bachelor_pct_mean -30.1110 1.8986 -15.860 < 2e-16 ***
## clinton_pct_mean -6.2971 0.9278 -6.787 1.39e-11 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 608.5 on 2861 degrees of freedom
## Multiple R-squared: 0.1884
## Adjusted R-squared: 0.1876
## F-statistic: 221.4 on 3 and 2861 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 1059203915
## Sigma(hat): 608.2458
## AIC: 44871.14
## AICc: 44871.16
## BIC: 42075.74
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 232 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 1419.9114 2432.7170 2677.5012 2993.2224 4653.0898
## old_pct_mean -103.8444 -48.9084 -31.5176 -21.0531 7.0644
## bachelor_pct_mean -46.0024 -28.7907 -17.2197 -6.3167 41.4822
## clinton_pct_mean -28.5349 -13.2060 -7.1344 -2.6663 14.9042
## ************************Diagnostic information*************************
## Number of data points: 2865
## Effective number of parameters (2trace(S) - trace(S'S)): 171.0258
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 2693.974
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 44296.6
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 44156.21
## BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42171.22
## Residual sum of squares: 792423429
## R-square value: 0.3928255
## Adjusted R-square value: 0.354265
##
## ***********************************************************************
## Program stops at: 2021-09-30 16:51:01
###Let's see how the overall model performed: where the coefficients were postitive and negative.
tm_shape(myGWR_model$SDF) +
tm_polygons("old_pct_mean", size = 0.1, midpoint=NA,
border.alpha = 0, palette = "BrBG", n = 5,
palette="Spectral",
title='G(n) Coeff: Seniors') +
tm_shape(cnty) +
tm_polygons(alpha=0) +
tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
#tmap_save(fig, filename='Cnty_GWR_gn.png', dpi=600)
tm_shape(myGWR_model$SDF) +
tm_polygons("clinton_pct_mean", size = 0.1, midpoint=NA,
border.alpha = 0, palette = "BrBG", n = 5,
palette="Spectral",
title='G(n) Coeff: Clinton Voters') +
tm_shape(cnty) +
tm_polygons(alpha=0) +
tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
myGWR_model2 <- gwr.basic(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean + pop_density_mean,
adaptive = T,
data = GWR_inputGeo,
bw = myBandwidthKNeighbors)
## Warning in proj4string(data): CRS object has comment, which is lost in output
print(myGWR_model2)
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2021-09-30 16:51:06
## Call:
## gwr.basic(formula = fn_mean ~ old_pct_mean + bachelor_pct_mean +
## clinton_pct_mean + pop_density_mean, data = GWR_inputGeo,
## bw = myBandwidthKNeighbors, adaptive = T)
##
## Dependent (y) variable: fn_mean
## Independent variables: old_pct_mean bachelor_pct_mean clinton_pct_mean pop_density_mean
## Number of data points: 2865
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1964.7 -346.7 -2.2 346.9 3460.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2890.9902 59.8138 48.333 < 2e-16 ***
## old_pct_mean -37.3531 2.6167 -14.275 < 2e-16 ***
## bachelor_pct_mean -28.2077 1.9492 -14.471 < 2e-16 ***
## clinton_pct_mean -5.4228 0.9494 -5.712 1.23e-08 ***
## pop_density_mean -17.7861 4.3314 -4.106 4.13e-05 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 606.8 on 2860 degrees of freedom
## Multiple R-squared: 0.1932
## Adjusted R-squared: 0.192
## F-statistic: 171.2 on 4 and 2860 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 1052995789
## Sigma(hat): 606.4607
## AIC: 44856.29
## AICc: 44856.32
## BIC: 42074.82
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 232 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 1364.38093 2422.75456 2712.17000 3018.34754 4473.1376
## old_pct_mean -118.72331 -50.24002 -37.19831 -26.93128 7.0973
## bachelor_pct_mean -43.41370 -21.16524 -10.15731 -1.44605 25.9557
## clinton_pct_mean -35.58512 -10.55604 -4.03994 -0.70695 12.5466
## pop_density_mean -399.24574 -173.96027 -67.03277 12.56304 343.9635
## ************************Diagnostic information*************************
## Number of data points: 2865
## Effective number of parameters (2trace(S) - trace(S'S)): 208.7937
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 2656.206
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 44270.06
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 44094.63
## BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42310.4
## Residual sum of squares: 767805307
## R-square value: 0.4116885
## Adjusted R-square value: 0.3654263
##
## ***********************************************************************
## Program stops at: 2021-09-30 16:51:08
tm_shape(myGWR_model2$SDF) +
tm_polygons("pop_density_mean", size = 0.1, midpoint=NA,
border.alpha = 0, palette = "BrBG", n = 5,
palette="Spectral",
title='G(n) Coeff: Mean County Population Density') +
tm_shape(cnty) +
tm_polygons(alpha=0) +
tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
###Let's see how the overall model performed: where it performed well!
tm_shape(myGWR_model$SDF) +
tm_polygons(c("Local_R2")) +
tm_shape(cnty) +
tm_polygons(alpha=0)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
###Let's see how the overall model performed: where it performed well!
tm_shape(myGWR_model2$SDF) +
tm_polygons(c("Local_R2")) +
tm_shape(cnty) +
tm_polygons(alpha=0)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output