install.packages(c(“tigris”, “GWmodel”, “dplyr”))

library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(GWmodel)
## Loading required package: maptools
## Loading required package: sp
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
## Welcome to GWmodel version 2.2-7.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(tmap)
library(rgdal)
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## 
## rgdal: version: 1.5-27, (SVN revision 1148)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/sf/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
library(dplyr)

setwd(“~/Desktop/CP 8883/Assignments/Untitled_Message”)

restaurantPts= read.csv('restaurantPts.csv')
OLSModel <- lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)  
OLSModel
## 
## Call:
## lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
## 
## Coefficients:
##  (Intercept)       old_pct  bachelor_pct   clinton_pct  
##     2276.367        -3.093        -9.771       -15.901
summary(OLSModel)
## 
## Call:
## lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2105.9 -1404.0 -1095.4  -671.8 21338.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2276.3671    16.8504 135.093  < 2e-16 ***
## old_pct        -3.0934     0.5784  -5.348 8.91e-08 ***
## bachelor_pct   -9.7705     0.3155 -30.971  < 2e-16 ***
## clinton_pct   -15.9013     0.2789 -57.013  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3831 on 793318 degrees of freedom
## Multiple R-squared:  0.009048,   Adjusted R-squared:  0.009045 
## F-statistic:  2415 on 3 and 793318 DF,  p-value: < 2.2e-16
#---- Prepare data for on the County level ----#

## Retrieve a county level shapefile from the census. the resolution is 20 m, which means that it is not a big dataset, it is generalized. Make GEOID an integer, because we will be joining it to an integer field below.

options(tigris_use_cache=TRUE)
cnty <- counties(cb=TRUE, resolution="20m", year=2019) %>% mutate(GEOID = as.integer(GEOID))

## Produce summary statistics on our data on the field "COUNTY" 
GWR_input = restaurantPts %>% 
  select(c(County, fn, gn, old_pct, bachelor_pct, clinton_pct, median_income, pop_density)) %>%
  mutate(median_income = median_income/10000, pop_density = pop_density/1000, gn = gn * 100) %>%
  group_by(County) %>% 
  summarise_all(list(mean = ~mean(.), n=~n())) 

## Delete those extra hanging columns---they all say the same thing and delete counties w/ fewer than 10 restaurants.
GWR_input <- GWR_input[,-c(10:15)]
GWR_input <- filter(GWR_input, fn_n > 10) 

## Join the data to the county shapefile!
GWR_inputGeo = GWR_input %>% 
  left_join(cnty %>% filter(GEOID %in% GWR_input$County) %>% 
              select(c(GEOID, geometry)), by=c('County' = 'GEOID'), copy=FALSE) %>% st_as_sf() %>% as_Spatial() 

##Let's take a look at colors (uncomment me!)
#install.packages("shinyjs")
#tmaptools::palette_explorer() 
library(RColorBrewer)

## Make your map: You can ignore the warnings.
tmap_mode('plot')
## tmap mode set to plotting
tm_shape(cnty, bbox = st_bbox(GWR_inputGeo)) + 
  tm_polygons(col = "gray") +
tm_shape(GWR_inputGeo) + 
  tm_polygons("gn_mean", style="fixed", palette = "BrBG", breaks=c(0, 10, 20, 30, 40, 50, max(GWR_inputGeo$gn_mean)))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

#------END CNTY -----------------#
##Prints in the console!

########### We need to change the projection to a meter based projection. W are using lambert conformal conic

GWR_inputGeo <- spTransform(GWR_inputGeo, CRS("+proj=lcc +lon_0=-90 +lat_1=33 +lat_2=45"))

########## Let's figure out our bandwidth: first let's use number of neighbors

myBandwidthKNeighbors <- bw.gwr(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
             adaptive = T,
             data=GWR_inputGeo) 
## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Adaptive bandwidth: 1778 CV score: 937594260 
## Adaptive bandwidth: 1107 CV score: 916219188 
## Adaptive bandwidth: 691 CV score: 892861737 
## Adaptive bandwidth: 435 CV score: 873899444 
## Adaptive bandwidth: 275 CV score: 862814121 
## Adaptive bandwidth: 178 CV score: 863593215 
## Adaptive bandwidth: 337 CV score: 866148677 
## Adaptive bandwidth: 239 CV score: 861989901 
## Adaptive bandwidth: 214 CV score: 862115357 
## Adaptive bandwidth: 251 CV score: 862185663 
## Adaptive bandwidth: 227 CV score: 862003375 
## Adaptive bandwidth: 241 CV score: 862041658 
## Adaptive bandwidth: 232 CV score: 861967351 
## Adaptive bandwidth: 233 CV score: 862042394 
## Adaptive bandwidth: 236 CV score: 861992148 
## Adaptive bandwidth: 233 CV score: 862042394 
## Adaptive bandwidth: 234 CV score: 861991447 
## Adaptive bandwidth: 233 CV score: 862042394 
## Adaptive bandwidth: 233 CV score: 862042394 
## Adaptive bandwidth: 232 CV score: 861967351
######### Let's figure out our bandwidth: second, let's use distance!

myBandwidthDistance <- bw.gwr(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
             adaptive = F,
             data=GWR_inputGeo) 
## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 2822927 CV score: 989790993 
## Fixed bandwidth: 1745014 CV score: 943220953 
## Fixed bandwidth: 1078827 CV score: 909492711 
## Fixed bandwidth: 667100.5 CV score: 882730896 
## Fixed bandwidth: 412639.7 CV score: 871354444 
## Fixed bandwidth: 255374.3 CV score: 937868165 
## Fixed bandwidth: 509835.1 CV score: 874570875 
## Fixed bandwidth: 352569.6 CV score: 876062519 
## Fixed bandwidth: 449765 CV score: 872209641 
## Fixed bandwidth: 389695 CV score: 871788558 
## Fixed bandwidth: 426820.3 CV score: 871533169 
## Fixed bandwidth: 403875.6 CV score: 871405725 
## Fixed bandwidth: 418056.2 CV score: 871389409 
## Fixed bandwidth: 409292.1 CV score: 871357822 
## Fixed bandwidth: 414708.6 CV score: 871361994 
## Fixed bandwidth: 411361 CV score: 871353364 
## Fixed bandwidth: 410570.8 CV score: 871354132 
## Fixed bandwidth: 411849.4 CV score: 871353447 
## Fixed bandwidth: 411059.2 CV score: 871353525 
## Fixed bandwidth: 411547.6 CV score: 871353346 
## Fixed bandwidth: 411662.9 CV score: 871353365 
## Fixed bandwidth: 411476.3 CV score: 871353346 
## Fixed bandwidth: 411432.3 CV score: 871353350 
## Fixed bandwidth: 411503.6 CV score: 871353345 
## Fixed bandwidth: 411520.4 CV score: 871353345 
## Fixed bandwidth: 411493.2 CV score: 871353345 
## Fixed bandwidth: 411510 CV score: 871353345 
## Fixed bandwidth: 411513.9 CV score: 871353345 
## Fixed bandwidth: 411507.5 CV score: 871353345 
## Fixed bandwidth: 411511.5 CV score: 871353345 
## Fixed bandwidth: 411509 CV score: 871353345 
## Fixed bandwidth: 411510.6 CV score: 871353345 
## Fixed bandwidth: 411509.6 CV score: 871353345 
## Fixed bandwidth: 411510.2 CV score: 871353345
print(paste("Best number of neighbors for my kernel is:", myBandwidthKNeighbors))
## [1] "Best number of neighbors for my kernel is: 232"
print(paste("Best distance for my kernel is:", as.integer(myBandwidthDistance)/1000, "KM"))
## [1] "Best distance for my kernel is: 411.509 KM"
myGWR_model <- gwr.basic(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
                adaptive = T,
                data = GWR_inputGeo,
                bw = myBandwidthKNeighbors)  
## Warning in proj4string(data): CRS object has comment, which is lost in output
print(myGWR_model)
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2021-09-30 16:51:00 
##    Call:
##    gwr.basic(formula = fn_mean ~ old_pct_mean + bachelor_pct_mean + 
##     clinton_pct_mean, data = GWR_inputGeo, bw = myBandwidthKNeighbors, 
##     adaptive = T)
## 
##    Dependent (y) variable:  fn_mean
##    Independent variables:  old_pct_mean bachelor_pct_mean clinton_pct_mean
##    Number of data points: 2865
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##     Min      1Q  Median      3Q     Max 
## -1950.8  -343.8    -4.4   348.8  3467.4 
## 
##    Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)       2891.0184    59.9794  48.200  < 2e-16 ***
##    old_pct_mean       -35.3500     2.5779 -13.713  < 2e-16 ***
##    bachelor_pct_mean  -30.1110     1.8986 -15.860  < 2e-16 ***
##    clinton_pct_mean    -6.2971     0.9278  -6.787 1.39e-11 ***
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 608.5 on 2861 degrees of freedom
##    Multiple R-squared: 0.1884
##    Adjusted R-squared: 0.1876 
##    F-statistic: 221.4 on 3 and 2861 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 1059203915
##    Sigma(hat): 608.2458
##    AIC:  44871.14
##    AICc:  44871.16
##    BIC:  42075.74
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 232 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                           Min.   1st Qu.    Median   3rd Qu.      Max.
##    Intercept         1419.9114 2432.7170 2677.5012 2993.2224 4653.0898
##    old_pct_mean      -103.8444  -48.9084  -31.5176  -21.0531    7.0644
##    bachelor_pct_mean  -46.0024  -28.7907  -17.2197   -6.3167   41.4822
##    clinton_pct_mean   -28.5349  -13.2060   -7.1344   -2.6663   14.9042
##    ************************Diagnostic information*************************
##    Number of data points: 2865 
##    Effective number of parameters (2trace(S) - trace(S'S)): 171.0258 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 2693.974 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 44296.6 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 44156.21 
##    BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42171.22 
##    Residual sum of squares: 792423429 
##    R-square value:  0.3928255 
##    Adjusted R-square value:  0.354265 
## 
##    ***********************************************************************
##    Program stops at: 2021-09-30 16:51:01
###Let's see how the overall model performed: where the coefficients were postitive and negative.
tm_shape(myGWR_model$SDF) +
  tm_polygons("old_pct_mean", size = 0.1, midpoint=NA,
          border.alpha = 0, palette = "BrBG", n = 5, 
          palette="Spectral", 
          title='G(n) Coeff: Seniors') +
  tm_shape(cnty) +  
  tm_polygons(alpha=0) + 
  tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

#tmap_save(fig, filename='Cnty_GWR_gn.png', dpi=600)
tm_shape(myGWR_model$SDF) +
  tm_polygons("clinton_pct_mean", size = 0.1, midpoint=NA,
          border.alpha = 0, palette = "BrBG", n = 5, 
          palette="Spectral", 
          title='G(n) Coeff: Clinton Voters') +
  tm_shape(cnty) +  
  tm_polygons(alpha=0) + 
  tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

myGWR_model2 <- gwr.basic(fn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean + pop_density_mean,
                adaptive = T,
                data = GWR_inputGeo,
                bw = myBandwidthKNeighbors)  
## Warning in proj4string(data): CRS object has comment, which is lost in output
print(myGWR_model2)
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2021-09-30 16:51:06 
##    Call:
##    gwr.basic(formula = fn_mean ~ old_pct_mean + bachelor_pct_mean + 
##     clinton_pct_mean + pop_density_mean, data = GWR_inputGeo, 
##     bw = myBandwidthKNeighbors, adaptive = T)
## 
##    Dependent (y) variable:  fn_mean
##    Independent variables:  old_pct_mean bachelor_pct_mean clinton_pct_mean pop_density_mean
##    Number of data points: 2865
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##     Min      1Q  Median      3Q     Max 
## -1964.7  -346.7    -2.2   346.9  3460.9 
## 
##    Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)       2890.9902    59.8138  48.333  < 2e-16 ***
##    old_pct_mean       -37.3531     2.6167 -14.275  < 2e-16 ***
##    bachelor_pct_mean  -28.2077     1.9492 -14.471  < 2e-16 ***
##    clinton_pct_mean    -5.4228     0.9494  -5.712 1.23e-08 ***
##    pop_density_mean   -17.7861     4.3314  -4.106 4.13e-05 ***
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 606.8 on 2860 degrees of freedom
##    Multiple R-squared: 0.1932
##    Adjusted R-squared: 0.192 
##    F-statistic: 171.2 on 4 and 2860 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 1052995789
##    Sigma(hat): 606.4607
##    AIC:  44856.29
##    AICc:  44856.32
##    BIC:  42074.82
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 232 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                            Min.    1st Qu.     Median    3rd Qu.      Max.
##    Intercept         1364.38093 2422.75456 2712.17000 3018.34754 4473.1376
##    old_pct_mean      -118.72331  -50.24002  -37.19831  -26.93128    7.0973
##    bachelor_pct_mean  -43.41370  -21.16524  -10.15731   -1.44605   25.9557
##    clinton_pct_mean   -35.58512  -10.55604   -4.03994   -0.70695   12.5466
##    pop_density_mean  -399.24574 -173.96027  -67.03277   12.56304  343.9635
##    ************************Diagnostic information*************************
##    Number of data points: 2865 
##    Effective number of parameters (2trace(S) - trace(S'S)): 208.7937 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 2656.206 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 44270.06 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 44094.63 
##    BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 42310.4 
##    Residual sum of squares: 767805307 
##    R-square value:  0.4116885 
##    Adjusted R-square value:  0.3654263 
## 
##    ***********************************************************************
##    Program stops at: 2021-09-30 16:51:08
tm_shape(myGWR_model2$SDF) +
  tm_polygons("pop_density_mean", size = 0.1, midpoint=NA,
          border.alpha = 0, palette = "BrBG", n = 5, 
          palette="Spectral", 
          title='G(n) Coeff: Mean County Population Density') +
  tm_shape(cnty) +  
  tm_polygons(alpha=0) + 
  tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

###Let's see how the overall model performed: where it performed well!
tm_shape(myGWR_model$SDF) + 
  tm_polygons(c("Local_R2")) + 
  tm_shape(cnty) + 
  tm_polygons(alpha=0)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

###Let's see how the overall model performed: where it performed well!
tm_shape(myGWR_model2$SDF) + 
  tm_polygons(c("Local_R2")) + 
  tm_shape(cnty) + 
  tm_polygons(alpha=0)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output