pkgs <- c("tigris", "GWmodel","tidyverse","psych","tmap","sf","rgdal")

#install.packages("shinjs")

list.packages <- pkgs[!(pkgs %in% 
                          installed.packages()[,"Package"])]
if(length(list.packages)) install.packages(list.packages)

invisible(sapply(pkgs, library, character.only = T))

restaurantPts= read.csv('restaurantPts.csv')

Q1.

The features that predict chain restaurants in the U.S. include low walkability, percent of voters who voted for Donald Trump, lower income and lower education.

Q2.

The total number of UAs, MSAs, and counties in the US is 3,486; 919; and 3,091 respectively.

UAs <-length(unique(restaurantPts$UA_NAME))
MSAs <-length(unique(restaurantPts$MSA_NAME))
counties <-length(unique(restaurantPts$County))
data.frame(UAs =UAs, MSAs=MSAs, counties=counties)

##    UAs MSAs counties
## 1 3486  919     3091

Q3.

chainniest <- aggregate(restaurantPts[,"fn"], by=list(restaurantPts$UA_NAME),FUN=mean)

chainniest1 <- aggregate(restaurantPts[,"gn"], by=list(restaurantPts$UA_NAME),FUN=mean)

final_Table <- cbind(chainniest,chainniest1)
chainniest_table <- final_Table[final_Table$Group.1 %in%
                                  c("Lake Land'Or, VA", "Mont Belvieu, TX", "Presidio, TX", "Castle Rock, WA","Harrisburg, SD"),]

colnames(chainniest_table)[1:4] <- c("UA_Name","fn","UA_Name","gn")
chainniest_table <- arrange(chainniest_table[,-3],desc(fn))

mutate(chainniest_table, fn,fn=round(fn,digits = 0))%>%
  mutate(gn,gn=round(gn,digits = 2))

##            UA_Name    fn   gn
## 1 Lake Land'Or, VA 21358 1.00
## 2 Mont Belvieu, TX 21358 1.00
## 3     Presidio, TX 21358 1.00
## 4  Castle Rock, WA 10870 0.67
## 5   Harrisburg, SD 10681 0.50

Q4.

One more unit increase in percent of “old” population result in a decrease of 3 unit in fn.

One more unit increase in percent who voted for Clinton result in a decrease of about 16 unit in fn.

One more unit increase in percent with bachelor degree result in a decrease of about 10 unit in fn.

OLSModel <- lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts) #bigger neighbor smoother  

summary(OLSModel)

## 
## Call:
## lm(formula = fn ~ old_pct + bachelor_pct + clinton_pct, data = restaurantPts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2105.9 -1404.0 -1095.4  -671.8 21338.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2276.3671    16.8504 135.093  < 2e-16 ***
## old_pct        -3.0934     0.5784  -5.348 8.91e-08 ***
## bachelor_pct   -9.7705     0.3155 -30.971  < 2e-16 ***
## clinton_pct   -15.9013     0.2789 -57.013  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3831 on 793318 degrees of freedom
## Multiple R-squared:  0.009048,   Adjusted R-squared:  0.009045 
## F-statistic:  2415 on 3 and 793318 DF,  p-value: < 2.2e-16

Q5

The best number of neighbors for my kernel is: 232.

The best distance for my kernel is: 411.509 KM.

options(tigris_use_cache=TRUE)
cnty <- counties(cb=TRUE, resolution="20m", year=2019) %>% mutate(GEOID = as.integer(GEOID))

GWR_input = restaurantPts %>% 
  select(c(County, fn, gn, old_pct, bachelor_pct, clinton_pct, median_income, pop_density)) %>%
  mutate(median_income = median_income/10000, pop_density = pop_density/1000, gn = gn * 100) %>%
  group_by(County) %>% 
  summarise_all(list(mean = ~mean(.), n=~n()))

GWR_input <- GWR_input[,-c(10:15)]

GWR_input <- filter(GWR_input, fn_n > 10)

## Join the data to the county shapefile!
GWR_inputGeo = GWR_input %>% 
  left_join(cnty %>% filter(GEOID %in% GWR_input$County) %>% 
              select(c(GEOID, geometry)), by=c('County' = 'GEOID'), copy=FALSE) %>% st_as_sf() %>% as_Spatial()

print(paste("Best number of neighbors for my kernel is:", myBandwidthKNeighbors))

## [1] "Best number of neighbors for my kernel is: 232"

print(paste("Best distance for my kernel is:", as.integer(myBandwidthDistance)/1000, "KM"))

## [1] "Best distance for my kernel is: 411.509 KM"

Q6

The bigger bandwidth means a more smooth results.

Q7

This model is mean g(n) as a function of average percent of older population, average percent with bachelors and average percent Clinton votes.

The map shows that one unit increase in average percent of County Clinton Votes result in between 0 to 0.6 unit increase in g(n) for counties color coded light to dark green. Most of such counties are located in Southern California, Arizona, Minnesota, Iowa, Oklahoma and Northern Texas.

myGWR_model <- gwr.basic(gn_mean ~ old_pct_mean + bachelor_pct_mean + clinton_pct_mean,
                adaptive = T,
                data = GWR_inputGeo,
                bw = myBandwidthKNeighbors)

## Warning in proj4string(data): CRS object has comment, which is lost in output

tmap_mode('plot')

## tmap mode set to plotting

tm_shape(myGWR_model$SDF, bbox = bbox_new) +
  tm_polygons("clinton_pct_mean", size = 0.1, midpoint=NA,
          border.alpha = 0, palette = "BrBG", n = 5, 
          palette="Spectral", 
          title='G(n) Coeff: for Mean Co. Clinton Votes') +
  tm_shape(cnty) +  
  tm_polygons(alpha=0) + 
  tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01))

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

Q8

This model is mean g(n) as a function of average median income, average population density and average percent with bachelors degree.

The map shows that there is a positive relation between average population density and average g(n) for counties color coded yellow to red; thus, as average population density increase, average g(n) also increases.Most of such counties are located in North and South Dakota.

tmap_mode('plot')

## tmap mode set to plotting

tm_shape(myGWR_model$SDF, bbox = bbox_new) +
  tm_polygons("pop_density_mean", size = 0.1, midpoint=NA,
          border.alpha = 0, n = 5, 
          palette=c('lightblue','khaki1','red3'), 
          title='G(n) Coeff: Mean Co. Pop.Density') +
  tm_shape(cnty, bbox = bbox_new) +  
  tm_polygons(alpha=0) + 
  tm_layout(legend.text.size = 0.6, title.size = 0.5, legend.position = c(0.01, 0.01), title.position = c('right','top'))

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

Q9

This model is mean g(n) as a function of average median income, average population density and average percent with bachelors degree.

The map shows that the model does very well in counties located in the north western, and north eastern part of the country. The R^2 for these counties is between 0.2 to 1.0.

tm_shape(myGWR_model$SDF) + 
  tm_polygons(c("Local_R2")) + 
  tm_shape(cnty) + 
  tm_polygons(alpha=0)

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

Assignment 3_Part2

Gabriel Appiah

9/29/2021

Q1.

Q2.

Q3.

Q4.

Q5

Q6

Q7

Q8

Q9