######Libraries#######
######################
library(MASS) #
library(elsa) #
## Loading required package: sp
## Loading required package: raster
##
## Attaching package: 'raster'
## The following objects are masked from 'package:MASS':
##
## area, select
library(ggplot2) #
library(ggExtra) #
library(grid) #
library(gridExtra) #
library(kableExtra) #
library(leaflet) #
library(maptools) #
## Checking rgeos availability: TRUE
library(raster) #
library(RColorBrewer)#
library(rgdal) #
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(rgeos) #
## rgeos version: 0.5-5, (SVN revision 640)
## GEOS runtime version: 3.8.0-CAPI-1.13.1
## Linking to sp version: 1.4-4
## Polygon checking: TRUE
library(shiny) #
##
## Attaching package: 'shiny'
## The following object is masked from 'package:ggExtra':
##
## runExample
library(sf) #
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp) #
library(spatialreg) #
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
## Registered S3 methods overwritten by 'spatialreg':
## method from
## residuals.stsls spdep
## deviance.stsls spdep
## coef.stsls spdep
## print.stsls spdep
## summary.stsls spdep
## print.summary.stsls spdep
## residuals.gmsar spdep
## deviance.gmsar spdep
## coef.gmsar spdep
## fitted.gmsar spdep
## print.gmsar spdep
## summary.gmsar spdep
## print.summary.gmsar spdep
## print.lagmess spdep
## summary.lagmess spdep
## print.summary.lagmess spdep
## residuals.lagmess spdep
## deviance.lagmess spdep
## coef.lagmess spdep
## fitted.lagmess spdep
## logLik.lagmess spdep
## fitted.SFResult spdep
## print.SFResult spdep
## fitted.ME_res spdep
## print.ME_res spdep
## print.lagImpact spdep
## plot.lagImpact spdep
## summary.lagImpact spdep
## HPDinterval.lagImpact spdep
## print.summary.lagImpact spdep
## print.sarlm spdep
## summary.sarlm spdep
## residuals.sarlm spdep
## deviance.sarlm spdep
## coef.sarlm spdep
## vcov.sarlm spdep
## fitted.sarlm spdep
## logLik.sarlm spdep
## anova.sarlm spdep
## predict.sarlm spdep
## print.summary.sarlm spdep
## print.sarlm.pred spdep
## as.data.frame.sarlm.pred spdep
## residuals.spautolm spdep
## deviance.spautolm spdep
## coef.spautolm spdep
## fitted.spautolm spdep
## print.spautolm spdep
## summary.spautolm spdep
## logLik.spautolm spdep
## print.summary.spautolm spdep
## print.WXImpact spdep
## summary.WXImpact spdep
## print.summary.WXImpact spdep
## predict.SLX spdep
library(spData) #
library(spdep) #
##
## Attaching package: 'spdep'
## The following objects are masked from 'package:spatialreg':
##
## anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
## cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
## create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
## deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
## fitted.SFResult, fitted.spautolm, get.ClusterOption,
## get.coresOption, get.mcOption, get.VerboseOption,
## get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
## gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
## LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
## predict.SLX, print.gmsar, print.ME_res, print.sarlm,
## print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
## print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
## print.summary.stsls, residuals.gmsar, residuals.sarlm,
## residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
## SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
## SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
## stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
## summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## The following objects are masked from 'package:elsa':
##
## geary, moran
library(tmap) #
library(tmaptools) #
library(tidyverse) #
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::combine() masks gridExtra::combine()
## x tidyr::expand() masks Matrix::expand()
## x tidyr::extract() masks raster::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::select() masks raster::select(), MASS::select()
## x tidyr::unpack() masks Matrix::unpack()
library(usmap) #
library(leaps) #
library(caret) #
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(glmnet) #
## Loaded glmnet 4.0-2
library(spatialEco) #
##
## Attaching package: 'spatialEco'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:spData':
##
## elev
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:grid':
##
## explode
## The following object is masked from 'package:elsa':
##
## correlogram
## The following object is masked from 'package:raster':
##
## shift
######################
#Pre-Process
setwd("C:/Users/Lawrence Fulton/OneDrive - Texas State University/D_Drive/Jerry")
#Load shape and merge with data
jerry=read.csv("mapping.csv",fileEncoding="UTF-8-BOM")
colnames(jerry)%>%kbl()%>%kable_classic(html_font = "Cambria")
| x |
|---|
| record_id |
| thcic_id |
| pat_state |
| pat_zip |
| pat_country |
| pat_county |
| public_health_region |
| pat_status |
| sex_code |
| race |
| ethnicity |
| pat_age |
| oth_diag_code_1 |
| poa_oth_diag_code_1 |
| oth_diag_code_2 |
| poa_oth_diag_code_2 |
| oth_diag_code_3 |
| poa_oth_diag_code_3 |
| oth_diag_code_4 |
| poa_oth_diag_code_4 |
| oth_diag_code_5 |
| poa_oth_diag_code_5 |
| oth_diag_code_6 |
| poa_oth_diag_code_6 |
| oth_diag_code_7 |
| poa_oth_diag_code_7 |
| oth_diag_code_8 |
| poa_oth_diag_code_8 |
| oth_diag_code_9 |
| poa_oth_diag_code_9 |
| oth_diag_code_10 |
| poa_oth_diag_code_10 |
| oth_diag_code_11 |
| poa_oth_diag_code_11 |
| oth_diag_code_12 |
| poa_oth_diag_code_12 |
| oth_diag_code_13 |
| poa_oth_diag_code_13 |
| oth_diag_code_14 |
| poa_oth_diag_code_14 |
| oth_diag_code_15 |
| poa_oth_diag_code_15 |
| oth_diag_code_16 |
| poa_oth_diag_code_16 |
| oth_diag_code_17 |
| poa_oth_diag_code_17 |
| oth_diag_code_18 |
| poa_oth_diag_code_18 |
| oth_diag_code_19 |
| poa_oth_diag_code_19 |
| oth_diag_code_20 |
| poa_oth_diag_code_20 |
| oth_diag_code_21 |
| poa_oth_diag_code_21 |
| oth_diag_code_22 |
| poa_oth_diag_code_22 |
| oth_diag_code_23 |
| poa_oth_diag_code_23 |
| oth_diag_code_24 |
| poa_oth_diag_code_24 |
| CD_B570 |
| CD_B571 |
| CD_B572 |
| I429 |
| I428 |
| CD_B573 |
| CD_B5730 |
| CD_B5731 |
| CD_B5732 |
| CD_B5739 |
| CD_B574 |
| CD_B5740 |
| CD_B5741 |
| CD_B5742 |
| CD_B5749 |
| CD_B575 |
| I4510 |
| I452 |
| I441 |
| I442 |
| I472 |
| I443 |
| I444 |
| I445 |
| I446 |
| I447 |
| I450 |
| I451 |
| I453 |
| I454 |
| I455 |
| I458 |
| I459 |
| I44 |
| I440 |
| YQ |
jerry$total=rowSums(jerry[,61:95], na.rm=TRUE)
jerry[,13:96]=NULL
jerry=jerry[jerry$pat_state=="TX",]
jerry=jerry[jerry$total>0,]
jerry[, c(1:5, 7:8 )]=NULL
jerry$sex_code[jerry$sex_code==""]="U"
jerry$sex_code=as.factor(jerry$sex_code)
jerry$male=rep(0, nrow(jerry))
jerry$male[jerry$sex_code=="M"]=1
jerry$female=rep(0,nrow(jerry))
jerry$female[jerry$sex_code=="F"]=1
jerry$sex_code=NULL
jerry$pat_age=as.factor(jerry$pat_age)
jerry$race=as.factor(jerry$race)
jerry$race2=rep(0,nrow(jerry))
jerry$race3=rep(0,nrow(jerry))
jerry$race4=rep(0,nrow(jerry))
jerry$race5=rep(0,nrow(jerry))
jerry$race2[jerry$race==2]=1
jerry$race3[jerry$race==3]=1
jerry$race4[jerry$race==4]=1
jerry$race5[jerry$race==5]=1
jerry$race=NULL
jerry$ethnicity[jerry$ethnicity=="`"]="U"
jerry$ethnicity[jerry$ethnicity==""]="U"
jerry$ethnicity=as.factor(jerry$ethnicity)
jerry$eth1=rep(0,nrow(jerry))
jerry$eth2=rep(0,nrow(jerry))
jerry$eth1[jerry$ethnicity==1]=1
jerry$eth2[jerry$ethnicity==2]=1
jerry$ethnicity=NULL
jerry$pat_age=as.numeric(jerry$pat_age)
jerry$pat_county=as.factor(jerry$pat_county)
write.csv(jerry,"jerry.csv", row.names=FALSE)
#Load Preprocessed Files
#rm(jerry)
formap=read.csv("final.csv")
myshape=shapefile("cb_2018_us_county_500k")
#Reduce Space to Texas and Merge
myshape=subset(myshape,myshape$STATEFP=="48" )
counties=merge(myshape,formap, by="GEOID",all.x=TRUE)
#Omit incomplete cases
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(counties@data)
counties2=sp.na.omit((counties))
## Deleting rows: 60210
missmap(counties2@data)
#Describe
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
round(describe(counties2@data[10:22]),3)%>%kbl(caption="Descriptives")%>%kable_classic(html_font = "Cambria")
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Total | 1 | 252 | 347.575 | 1157.378 | 76.500 | 126.658 | 94.145 | 1.000 | 12983.000 | 12982.000 | 7.693 | 69.247 | 72.908 |
| MeanAge | 2 | 252 | 17.114 | 0.876 | 17.098 | 17.105 | 0.559 | 12.000 | 23.000 | 11.000 | 0.087 | 13.778 | 0.055 |
| Male | 3 | 252 | 165.278 | 540.208 | 39.500 | 61.728 | 48.184 | 0.000 | 6158.000 | 6158.000 | 7.750 | 71.207 | 34.030 |
| Female | 4 | 252 | 123.310 | 408.708 | 25.500 | 44.574 | 33.358 | 0.000 | 4597.000 | 4597.000 | 7.636 | 68.730 | 25.746 |
| Eth1 | 5 | 252 | 64.060 | 243.830 | 7.000 | 13.931 | 8.896 | 0.000 | 1944.000 | 1944.000 | 5.885 | 36.358 | 15.360 |
| Eth2 | 6 | 252 | 245.730 | 860.698 | 53.000 | 91.668 | 69.682 | 0.000 | 9888.000 | 9888.000 | 8.201 | 77.007 | 54.219 |
| Race2 | 7 | 252 | 1.274 | 6.847 | 0.000 | 0.079 | 0.000 | 0.000 | 80.000 | 80.000 | 8.156 | 77.526 | 0.431 |
| Race3 | 8 | 252 | 17.571 | 97.493 | 1.000 | 3.149 | 1.483 | 0.000 | 1217.000 | 1217.000 | 10.104 | 110.210 | 6.142 |
| Race4 | 9 | 252 | 69.944 | 194.634 | 16.000 | 29.396 | 19.274 | 0.000 | 1801.000 | 1801.000 | 6.233 | 44.787 | 12.261 |
| Race5 | 10 | 252 | 13.202 | 60.067 | 1.000 | 2.842 | 1.483 | 0.000 | 787.000 | 787.000 | 9.665 | 111.422 | 3.784 |
| Population | 11 | 252 | 115054.536 | 410461.235 | 19211.000 | 33417.619 | 23047.758 | 272.000 | 4713325.000 | 4713053.000 | 7.569 | 69.643 | 25856.627 |
| PopKM2 | 12 | 252 | 47.109 | 137.891 | 8.602 | 15.897 | 10.749 | 0.107 | 1166.873 | 1166.766 | 5.584 | 35.662 | 8.686 |
| Rate | 13 | 252 | 394.257 | 159.947 | 385.607 | 385.500 | 160.420 | 35.868 | 1166.667 | 1130.799 | 1.002 | 3.025 | 10.076 |
#######################Initial LM#################################
mylm=lm(Rate ~ Male+Eth1+Race2+Race3+Race4+Race5
+PopKM2,
data = counties2@data)
counties2@data$res1 <- residuals(mylm)
counties2@data$sd_breaks1=scale(counties2@data$res1)[,1]
###########################LM1####################################
summary(mylm)
##
## Call:
## lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5 +
## PopKM2, data = counties2@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -360.78 -111.33 -2.75 88.34 770.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 397.83104 11.12800 35.750 <2e-16 ***
## Male 0.67037 0.46333 1.447 0.1492
## Eth1 -0.15229 0.08746 -1.741 0.0829 .
## Race2 -7.40576 4.41657 -1.677 0.0949 .
## Race3 -0.48880 0.66057 -0.740 0.4600
## Race4 -0.52273 0.76661 -0.682 0.4960
## Race5 -1.37937 0.93516 -1.475 0.1415
## PopKM2 -0.67545 0.27332 -2.471 0.0141 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155.8 on 244 degrees of freedom
## Multiple R-squared: 0.07755, Adjusted R-squared: 0.05108
## F-statistic: 2.93 on 7 and 244 DF, p-value: 0.0058
#my_breaks <- c(-18,-14,-3,-2,-1,1,2,3,14, 18)
list.queen<-poly2nb(counties2, queen=TRUE)
W=rwm=nb2listw(list.queen, style="W", zero.policy=TRUE)
lm.morantest(mylm, rwm, alternative="two.sided", zero.policy=TRUE)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## Moran I statistic standard deviate = 5.6292, p-value = 1.811e-08
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 0.204760917 -0.007003787 0.001415207
lm.LMtests(mylm, rwm, test=c("LMerr","LMlag","RLMerr","RLMlag","SARMA"), zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## LMerr = 28.55, df = 1, p-value = 9.129e-08
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## LMlag = 32.708, df = 1, p-value = 1.071e-08
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## RLMerr = 1.6385, df = 1, p-value = 0.2005
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## RLMlag = 5.7958, df = 1, p-value = 0.01606
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
##
## SARMA = 34.346, df = 2, p-value = 3.482e-08
#Run Appropriate Spatial Model, Check Residuals
mylag1=stsls(Rate~Male+Eth1+Race2+Race3+Race4+Race5
+PopKM2,
zero.policy=TRUE, data=counties2@data, W)
## Warning: Function stsls moved to the spatialreg package
counties2@data$reslag1=mylag1$res
counties2@data$sd_breaks2=scale(counties2@data$reslag1)[,1]
moran1=moran.mc(counties2@data$reslag1, W, 50, zero.policy=TRUE)
#LISA
#mylisa1=lisa(counties2, d1=0, d2=2000, statistic="I", zcol="Deaths.100K")
summary(mylag1)
##
## Call:spatialreg::stsls(formula = formula, data = data, listw = listw,
## zero.policy = zero.policy, na.action = na.action, robust = robust,
## HC = HC, legacy = legacy, W2X = W2X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -333.4712 -90.7110 5.7683 75.9161 660.4662
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Rho 0.814825 0.233505 3.4895 0.0004838
## (Intercept) 76.588111 92.629802 0.8268 0.4083395
## Male 0.190201 0.449183 0.4234 0.6719754
## Eth1 -0.089724 0.082677 -1.0852 0.2778163
## Race2 -6.565064 4.082957 -1.6079 0.1078529
## Race3 0.230699 0.643532 0.3585 0.7199774
## Race4 -0.030058 0.721417 -0.0417 0.9667656
## Race5 -0.467407 0.901717 -0.5184 0.6042124
## PopKM2 -0.359646 0.267977 -1.3421 0.1795703
##
## Residual variance (sigma squared): 20675, (sigma: 143.79)
moran1
##
## Monte-Carlo simulation of Moran I
##
## data: counties2@data$reslag1
## weights: W
## number of simulations + 1: 51
##
## statistic = -0.17115, observed rank = 1, p-value = 0.9804
## alternative hypothesis: greater
#mylisa1
SS=var(counties2@data$Rate)*(nrow(counties2@data)-1)
R2=(SS-mylag1$sse)/SS
R2
## [1] 0.2176077
#Map Residuals
#mycol=colorRampPalette(c("red","blue"), alpha=TRUE)
#mycol=mycol(12)
qpal<-colorNumeric("Blues", counties2@data$sd_breaks2, alpha=TRUE, na.color="white")
leaflet(counties2) %>%
#Overlays
addPolygons(stroke = FALSE,
fillOpacity = 1,
smoothFactor = 0.2,
color=~qpal(sd_breaks2),
group="Residuals",
popup = paste("County: ", counties2@data$NAME, "<br>",
"Residual: ",
round(counties2@data$reslag1,2), "<br>")) %>%
#Base Diagrams
addPolylines(data = counties2,
color = "black",
opacity = 1,
weight = 2,
group="County Boundaries")%>%
fitBounds(-124.8, -66.9, 24.4,49.4) %>%
setView(-100, 31.8, zoom = 5)%>%
addLegend("bottomright", opacity=1, pal = qpal,
values = ~sd_breaks2,
title = "Residuals")%>%
addLayersControl(
baseGroups=c("County Boundaries", "Reset for Overlay"),
overlayGroups = c("Residuals"),
options = layersControlOptions(collapsed = FALSE)
)
#Interactive Plot
library(leaflet)
qpal<-colorNumeric("Reds", counties2@data$Rate)
leaflet(counties2) %>%
#Overlays
addPolygons(stroke = FALSE,
fillOpacity = 1,
smoothFactor = 0.2,
color=~qpal(Rate),
group="Rate",
popup = paste("County: ", counties2@data$NAME, "<br>",
"Rate: ",
round(counties2@data$Rate,2), "<br>")) %>%
#Base Diagrams
addPolylines(data = counties2,
color = "black",
opacity = 1,
weight = 2,
group="County Boundaries")%>%
fitBounds(-124.8, -66.9, 24.4,49.4) %>%
setView(-100, 31.8, zoom = 5)%>%
addLegend("bottomright", opacity=1, pal = qpal,
values = ~Rate,
title = "Rate per 100K")%>%
addLayersControl(
baseGroups=c("County Boundaries", "Reset for Overlay"),
overlayGroups = c("Rates per 100K"),
options = layersControlOptions(collapsed = FALSE)
)