#Load in Packages
library(sf)
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## Warning: package 'spData' was built under R version 4.3.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.2
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.3.3
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(GWmodel)
## Warning: package 'GWmodel' was built under R version 4.3.3
## Loading required package: robustbase
## Warning: package 'robustbase' was built under R version 4.3.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.3.2
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(scales)
## Warning: package 'scales' was built under R version 4.3.2
# Load in Data
setwd("C:/Users/kajsa/OneDrive/Documents/Binghamton/Senior - Spring/DIDA 370/finaldata")
chic <- st_read("ComArea_ACS14_f.shp")
## Reading layer `ComArea_ACS14_f' from data source
## `C:\Users\kajsa\OneDrive\Documents\Binghamton\Senior - Spring\DIDA 370\finaldata\ComArea_ACS14_f.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 86 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS84(DD)
chic1 <- chic %>%
mutate("perc_pov" = Pov14/Pop2014)
chic1
## Simple feature collection with 77 features and 87 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS: WGS84(DD)
## First 10 features:
## ComAreaID community TRACTCnt shape_area shape_len Pop2012 Pop2014
## 1 35 DOUGLAS 10 46004621 31027.05 18238 19430
## 2 36 OAKLAND 3 16913961 19565.51 5918 6473
## 3 37 FULLER PARK 2 19916705 25339.09 2876 2543
## 4 38 GRAND BOULEVARD 14 48492503 28196.84 21929 22531
## 5 39 KENWOOD 7 29071742 23325.17 17841 18217
## 6 4 LINCOLN SQUARE 11 71352328 36624.60 39493 39547
## 7 40 WASHINGTON PARK 6 42373881 28175.32 11717 11647
## 8 41 HYDE PARK 12 45105380 29746.71 25681 26705
## 9 42 WOODLAWN 11 57815180 46936.96 25983 25451
## 10 1 ROGERS PARK 12 51259902 34052.40 54991 55053
## PopChng PopM PopMP PopF PopFP Under5 Under5P Under18 Under18P Over18
## 1 3.1645 8294 42.6866 11136 57.3134 1888 9.7169 2904 14.9460 16526
## 2 4.4791 2499 38.6065 3974 61.3935 725 11.2004 1938 29.9397 4535
## 3 -6.1450 1218 47.8962 1325 52.1038 531 20.8808 590 23.2009 1953
## 4 1.3540 9681 42.9675 12850 57.0325 2505 11.1180 5659 25.1165 16872
## 5 1.0428 8543 46.8958 9674 53.1042 1191 6.5378 3537 19.4159 14680
## 6 0.0683 19760 49.9659 19787 50.0341 2839 7.1788 6610 16.7143 32937
## 7 -0.2996 4850 41.6416 6797 58.3584 1486 12.7587 4367 37.4946 7280
## 8 1.9547 12941 48.4591 13764 51.5409 1690 6.3284 3727 13.9562 22978
## 9 -1.0343 11345 44.5759 14106 55.4241 1854 7.2846 6718 26.3958 18733
## 10 0.0563 27309 49.6049 27744 50.3951 3601 6.5410 10281 18.6747 44772
## Over18P Over21 Over21P Over65 Over65P Wht14 Wht14P Blk14 Blk14P AI14
## 1 85.0540 15033 77.3700 2834 14.5857 2752 14.1637 14005 72.0793 86
## 2 70.0603 4213 65.0857 601 9.2847 234 3.6150 6072 93.8050 6
## 3 76.7991 1870 73.5352 542 21.3134 122 4.7975 2311 90.8769 0
## 4 74.8835 15795 70.1034 2914 12.9333 1050 4.6602 21150 93.8707 156
## 5 80.5841 14171 77.7900 2791 15.3209 3834 21.0463 12754 70.0115 129
## 6 83.2857 32196 81.4120 3660 9.2548 31209 78.9162 1780 4.5010 386
## 7 62.5054 6806 58.4356 915 7.8561 228 1.9576 11392 97.8106 157
## 8 86.0438 19792 74.1135 3264 12.2224 13821 51.7544 9110 34.1135 176
## 9 73.6042 16777 65.9188 2735 10.7461 2586 10.1607 22160 87.0693 167
## 10 81.3253 41477 75.3401 4677 8.4954 33374 60.6216 15860 28.8086 558
## AI14P AS14 AS14P NHP14 NHP14P Oth14 Oth14P Hisp14 Hisp14P Property_C
## 1 0.4426 2687 13.8291 50 0.2573 203 1.0448 426 2.1925 814
## 2 0.0927 140 2.1628 0 0.0000 33 0.5098 122 1.8848 200
## 3 0.0000 0 0.0000 0 0.0000 132 5.1907 191 7.5108 207
## 4 0.6924 138 0.6125 90 0.3994 369 1.6377 474 2.1038 884
## 5 0.7081 1660 9.1124 0 0.0000 331 1.8170 701 3.8481 497
## 6 0.9761 5565 14.0719 101 0.2554 1757 4.4428 6236 15.7686 722
## 7 1.3480 16 0.1374 0 0.0000 35 0.3005 246 2.1121 676
## 8 0.6591 3622 13.5630 19 0.0711 1025 3.8382 1681 6.2947 674
## 9 0.6562 903 3.5480 5 0.0196 238 0.9351 617 2.4243 1108
## 10 1.0136 4162 7.5600 108 0.1962 3084 5.6019 13530 24.5763 1104
## PropCrRt Violent_C VlntCrRt PerCInc14 PPop14 Pov14 field_37 ChldPov14 NoHS14
## 1 0.0419 793 0.0408 215254 16884 6229 36.8929 1496 1520
## 2 0.0309 203 0.0314 54204 6445 2500 38.7898 719 717
## 3 0.0814 249 0.0979 32828 2533 927 36.5969 204 414
## 4 0.0392 1044 0.0463 293535 22272 7588 34.0697 2487 2260
## 5 0.0273 426 0.0234 261582 17997 4232 23.5150 1141 1141
## 6 0.0183 463 0.0117 461996 39013 5145 13.1879 1148 3065
## 7 0.0580 1052 0.0903 87459 11546 5550 48.0686 2526 1311
## 8 0.0252 378 0.0142 414505 24047 5323 22.1358 664 686
## 9 0.0435 1467 0.0576 186586 24179 9139 37.7973 3243 2443
## 10 0.0201 1022 0.0186 287055 52115 14173 27.1956 3401 5712
## HSGrad14 SmClg14 ClgGrad14 LaborFrc Unemp14 Pov50 Pov50P Pov125 Pov125P
## 1 2314 3429 4703 8869 1620 1498 8.8723 7016 41.5541
## 2 718 1274 1084 3139 865 481 7.4631 3066 47.5718
## 3 592 492 230 1036 378 166 6.5535 1164 45.9534
## 4 3391 5035 3867 10756 2619 2164 9.7162 9349 41.9765
## 5 1589 2968 6861 9283 1304 1203 6.6844 5181 28.7881
## 6 3809 5771 17730 25794 2037 1504 3.8551 6846 17.5480
## 7 1682 2006 1100 4668 1388 1320 11.4325 6425 55.6470
## 8 1388 2734 12261 13566 1029 1450 6.0299 6330 26.3234
## 9 3271 5316 4016 10534 2356 2328 9.6282 11270 46.6107
## 10 6720 8009 15420 31100 2921 3138 6.0213 17629 33.8271
## Pov150 Pov150P Pov185 Pov185P Pov200 Pov200P COIave HISave SESave
## 1 7681 45.4928 8418 49.8579 8901 52.7186 -0.1271 42.7985 48.7474
## 2 3358 52.1024 3715 57.6416 3735 57.9519 -0.2499 51.4700 52.5017
## 3 1342 52.9807 1510 59.6131 1540 60.7975 -0.8429 56.4702 48.2353
## 4 10354 46.4889 11665 52.3752 12066 54.1756 -0.3428 41.7933 48.1779
## 5 5689 31.6108 6774 37.6396 7308 40.6068 0.2452 35.6855 58.4564
## 6 8350 21.4031 10136 25.9811 10600 27.1704 0.5169 26.7975 63.3062
## 7 7170 62.0994 8038 69.6172 8256 71.5053 -0.5520 53.9975 49.0668
## 8 6891 28.6564 7953 33.0727 8432 35.0647 0.3641 27.1333 60.8513
## 9 12882 53.2776 14440 59.7212 14852 61.4252 -0.1543 45.6919 49.8982
## 10 20569 39.4685 23774 45.6183 25890 49.6786 0.0026 38.1292 59.1711
## Hlitave BirthRate FertRate LoBirthR PrenScrn PretBrth TeenBirth Assault
## 1 233.4417 10.3 42 11 76 10.2 34.2 13.6
## 2 250.0172 17.5 63 13 75 11.5 54.5 18.9
## 3 243.1150 11.9 60 17 71 14.3 69.2 49.6
## 4 230.7388 14.3 58 12 74 13.7 54.8 32.1
## 5 252.5250 12.2 51 11 77 11.9 25.7 15.2
## 6 243.4111 17.1 61 8 80 9.7 38.4 5.0
## 7 234.1639 19.3 72 17 74 16.9 82.6 44.6
## 8 230.3696 9.7 33 5 80 5.5 7.6 5.8
## 9 239.6065 15.1 60 17 73 16.3 51.1 31.1
## 10 243.5293 16.4 62 11 73 11.2 40.8 7.7
## BrstCancr CancerAll Colorect DiabetM FirearmM InfntMR LungCancer ProstateC
## 1 34.3 269.9 33.2 119.1 9.1 13.4 74.5 85.5
## 2 20.6 159.7 14.5 88.7 12.6 8.2 54.5 54.2
## 3 8.5 258.9 21.1 111.7 22.6 22.6 89.6 70.5
## 4 22.6 218.3 27.7 82.6 25.8 12.1 63.8 39.0
## 5 30.9 196.4 34.2 45.5 17.4 8.9 49.1 46.2
## 6 21.7 153.2 8.6 55.4 6.1 3.8 43.1 27.6
## 7 26.0 258.0 31.9 88.2 39.5 19.3 79.8 67.0
## 8 33.6 144.0 11.7 34.0 5.0 10.4 34.9 24.1
## 9 32.9 241.3 23.9 82.1 25.0 11.5 69.8 58.0
## 10 23.3 176.9 25.3 77.1 5.2 6.4 36.7 21.7
## Stroke ChlBLLS ChlLeadP GonorrF GonorrM Tuberc
## 1 62.1 482.2 0 1063.3 727.4 4.2
## 2 43.7 435.4 0 1655.4 1629.3 6.7
## 3 82.4 489.9 2 1061.9 1556.4 0.0
## 4 46.7 590.4 1 1454.6 1680.0 13.2
## 5 31.5 397.9 0 610.2 549.1 0.0
## 6 36.9 273.3 0 98.8 195.5 8.5
## 7 43.3 502.4 0 2145.8 2058.0 5.0
## 8 26.0 369.0 0 216.6 168.4 5.3
## 9 60.0 444.3 1 1382.0 1818.6 17.4
## 10 33.7 364.7 0 322.5 423.3 11.4
## geometry perc_pov
## 1 MULTIPOLYGON (((-87.60914 4... 0.3205867
## 2 MULTIPOLYGON (((-87.59215 4... 0.3862197
## 3 MULTIPOLYGON (((-87.6288 41... 0.3645301
## 4 MULTIPOLYGON (((-87.60671 4... 0.3367804
## 5 MULTIPOLYGON (((-87.59215 4... 0.2323105
## 6 MULTIPOLYGON (((-87.67441 4... 0.1300984
## 7 MULTIPOLYGON (((-87.60604 4... 0.4765176
## 8 MULTIPOLYGON (((-87.58038 4... 0.1993260
## 9 MULTIPOLYGON (((-87.57714 4... 0.3590822
## 10 MULTIPOLYGON (((-87.65456 4... 0.2574428
##############################################################
#How do Health Indicators Predict Poverty
health_model <- chic1 %>%
select(c(perc_pov, CancerAll, DiabetM, InfntMR, LoBirthR, ComAreaID, shape_area, shape_len)) %>%
filter(!perc_pov %in% "NA") %>%
mutate(perc_pov = as.numeric(perc_pov))
summary(lm4 <- lm(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, data = health_model))
##
## Call:
## lm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR,
## data = health_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18722 -0.04413 0.00574 0.04517 0.26996
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0196693 0.0478056 0.411 0.68197
## CancerAll -0.0007147 0.0004023 -1.776 0.07990 .
## DiabetM 0.0023227 0.0006903 3.365 0.00123 **
## InfntMR 0.0076688 0.0035099 2.185 0.03215 *
## LoBirthR 0.0124491 0.0039317 3.166 0.00226 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08418 on 72 degrees of freedom
## Multiple R-squared: 0.5547, Adjusted R-squared: 0.5299
## F-statistic: 22.42 on 4 and 72 DF, p-value: 4.728e-12
#Testing for spatial autocorrelation in the residuals
list <- health_model %>%
poly2nb(st_geometry(health_model)) %>%
nb2listw(zero.policy = TRUE)
#run the Moran's I test for regression residuals
lm.morantest(lm4, list)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR +
## LoBirthR, data = health_model)
## weights: list
##
## Moran I statistic standard deviate = 2.6787, p-value = 0.003696
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.165344909 -0.023103101 0.004949233
#From this output, we see that the Moran I p-value indicates statistically significant spatial autocorrelation because the value is below 0.05. This provides us strong evidence AGAINST the null hypothesis of NO spatial autocorrelation
#The observed Moran I is a positive number which indicates a positive spatial autocorrelation. Overall, we can conclude that there is statistically significant positive spatial autocorrelation in the residuals. This means that nearby observations are likely to have similar residual values.
# Spatial Error Model to Control for the Spatial Dependency
lm_health_error <- errorsarlm(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR,
data = health_model,
listw = list,
zero.policy = TRUE,
na.action = na.omit)
summary(lm_health_error)
##
## Call:errorsarlm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR +
## LoBirthR, data = health_model, listw = list, na.action = na.omit,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1696996 -0.0433084 0.0053114 0.0424771 0.2845409
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04977972 0.05041855 0.9873 0.3234811
## CancerAll -0.00064657 0.00037894 -1.7063 0.0879581
## DiabetM 0.00224488 0.00065166 3.4449 0.0005714
## InfntMR 0.00571237 0.00320467 1.7825 0.0746655
## LoBirthR 0.01002130 0.00347658 2.8825 0.0039451
##
## Lambda: 0.43128, LR test value: 5.812, p-value: 0.015917
## Asymptotic standard error: 0.13553
## z-value: 3.1821, p-value: 0.0014621
## Wald statistic: 10.126, p-value: 0.0014621
##
## Log likelihood: 86.79523 for error model
## ML residual variance (sigma squared): 0.0058883, (sigma: 0.076735)
## Number of observations: 77
## Number of parameters estimated: 7
## AIC: -159.59, (AIC for lm: -155.78)
# We can see that the lamda value is 0.43 which is statistically significant because it is larger than 0.05. This tells us there is spatial autocorrelation within the residuals of our models.
# The AIC value is -159.59 compared to the AIC for the linear model of -155.78. Because the error model's AIC is low, we can suggest a small improvement in the model.
seResiduals4 <- rep(0, length(health_model$perc_pov))
resIndex4 <- lm_health_error$residuals %>% names() %>% as.integer();
seResiduals4[resIndex4] <- lm_health_error$residuals
#perform the Moran Test
list %>%
moran.test(seResiduals4, ., zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: seResiduals4
## weights: .
##
## Moran I statistic standard deviate = 0.17123, p-value = 0.432
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.0009837955 -0.0131578947 0.0050549195
# Because our p-value is above 0.05, we can see the Moran test is insignificant which tells us that we have controlled for the spatial variation in the error terms
bwVal2 <-GWmodel::bw.gwr(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR,
data = health_model %>% sf::as_Spatial(),
approach = 'AICc', kernel = 'bisquare',
adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 55 AICc value: -161.5842
## Adaptive bandwidth (number of nearest neighbours): 42 AICc value: -165.8242
## Adaptive bandwidth (number of nearest neighbours): 33 AICc value: -162.85
## Adaptive bandwidth (number of nearest neighbours): 46 AICc value: -164.5742
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: -164.6257
## Adaptive bandwidth (number of nearest neighbours): 42 AICc value: -165.8242
bwVal2
## [1] 42
lm.gwr2 <- gwr.basic(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR,
data = health_model %>% sf::as_Spatial(),
bw = bwVal2, kernel = "bisquare", adaptive = TRUE)
print(lm.gwr2)
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2024-04-25 18:00:21.843685
## Call:
## gwr.basic(formula = perc_pov ~ CancerAll + DiabetM + InfntMR +
## LoBirthR, data = health_model %>% sf::as_Spatial(), bw = bwVal2,
## kernel = "bisquare", adaptive = TRUE)
##
## Dependent (y) variable: perc_pov
## Independent variables: CancerAll DiabetM InfntMR LoBirthR
## Number of data points: 77
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18722 -0.04413 0.00574 0.04517 0.26996
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0196693 0.0478056 0.411 0.68197
## CancerAll -0.0007147 0.0004023 -1.776 0.07990 .
## DiabetM 0.0023227 0.0006903 3.365 0.00123 **
## InfntMR 0.0076688 0.0035099 2.185 0.03215 *
## LoBirthR 0.0124491 0.0039317 3.166 0.00226 **
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.08418 on 72 degrees of freedom
## Multiple R-squared: 0.5547
## Adjusted R-squared: 0.5299
## F-statistic: 22.42 on 4 and 72 DF, p-value: 4.728e-12
## ***Extra Diagnostic information
## Residual sum of squares: 0.5101695
## Sigma(hat): 0.08247581
## AIC: -155.7784
## AICc: -154.5784
## BIC: -192.6528
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 42 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -0.26026907 -0.01077319 0.03276930 0.08130904 0.1820
## CancerAll -0.00256234 -0.00097222 -0.00068624 -0.00021006 0.0004
## DiabetM -0.00032220 0.00096532 0.00163113 0.00361353 0.0097
## InfntMR -0.00198885 0.00187100 0.00767581 0.01667062 0.0225
## LoBirthR 0.00171588 0.00703163 0.00937281 0.01557529 0.0259
## ************************Diagnostic information*************************
## Number of data points: 77
## Effective number of parameters (2trace(S) - trace(S'S)): 23.68848
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 53.31152
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -165.8242
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -200.8561
## BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): -215.519
## Residual sum of squares: 0.2606169
## R-square value: 0.7724975
## Adjusted R-square value: 0.6694765
##
## ***********************************************************************
## Program stops at: 2024-04-25 18:00:22.208215
health_model$gwr_exp_coeff <- lm.gwr2$SDF$DiabetM
model_one <- as(health_model, Class = "Spatial")
plot_one <- spplot(model_one, "gwr_exp_coeff")
plot_one

health_model$gwr_exp_coeff <- lm.gwr2$SDF$LoBirthR
model_one <- as(health_model, Class = "Spatial")
plot_two <- spplot(model_one, "gwr_exp_coeff")
plot_two

library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.2
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
plot_grid(plot_one, plot_two,
labels = c('Diabetes-related deaths per 100,000 persons', 'Infant mortality rate: deaths per 1000 live births'),
hjust = -0.1, vjust = 4,label_size = 10)
