######Libraries#######
######################
library(MASS)        #
library(elsa)        #
## Loading required package: sp
## Loading required package: raster
## 
## Attaching package: 'raster'
## The following objects are masked from 'package:MASS':
## 
##     area, select
library(ggplot2)     #
library(ggExtra)     #
library(grid)        #
library(gridExtra)   #
library(kableExtra)  #
library(leaflet)     #
library(maptools)    #
## Checking rgeos availability: TRUE
library(raster)      #
library(RColorBrewer)#
library(rgdal)       #
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(rgeos)       #
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-4 
##  Polygon checking: TRUE
library(shiny)       #
## 
## Attaching package: 'shiny'
## The following object is masked from 'package:ggExtra':
## 
##     runExample
library(sf)          #
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(sp)          #
library(spatialreg)  #
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
library(spData)      #
library(spdep)       #
## 
## Attaching package: 'spdep'
## The following objects are masked from 'package:spatialreg':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## The following objects are masked from 'package:elsa':
## 
##     geary, moran
library(tmap)        #
library(tmaptools)   #
library(tidyverse)   #
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::combine()    masks gridExtra::combine()
## x tidyr::expand()     masks Matrix::expand()
## x tidyr::extract()    masks raster::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x tidyr::pack()       masks Matrix::pack()
## x dplyr::select()     masks raster::select(), MASS::select()
## x tidyr::unpack()     masks Matrix::unpack()
library(usmap)       #
library(leaps)       #
library(caret)       #
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(glmnet)      #
## Loaded glmnet 4.0-2
library(spatialEco)  #
## 
## Attaching package: 'spatialEco'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:spData':
## 
##     elev
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:grid':
## 
##     explode
## The following object is masked from 'package:elsa':
## 
##     correlogram
## The following object is masked from 'package:raster':
## 
##     shift
######################

#Pre-Process

setwd("C:/Users/Lawrence Fulton/OneDrive - Texas State University/D_Drive/Jerry")
#Load shape and merge with data
jerry=read.csv("mapping.csv",fileEncoding="UTF-8-BOM")
colnames(jerry)%>%kbl()%>%kable_classic(html_font = "Cambria")
x
record_id
thcic_id
pat_state
pat_zip
pat_country
pat_county
public_health_region
pat_status
sex_code
race
ethnicity
pat_age
oth_diag_code_1
poa_oth_diag_code_1
oth_diag_code_2
poa_oth_diag_code_2
oth_diag_code_3
poa_oth_diag_code_3
oth_diag_code_4
poa_oth_diag_code_4
oth_diag_code_5
poa_oth_diag_code_5
oth_diag_code_6
poa_oth_diag_code_6
oth_diag_code_7
poa_oth_diag_code_7
oth_diag_code_8
poa_oth_diag_code_8
oth_diag_code_9
poa_oth_diag_code_9
oth_diag_code_10
poa_oth_diag_code_10
oth_diag_code_11
poa_oth_diag_code_11
oth_diag_code_12
poa_oth_diag_code_12
oth_diag_code_13
poa_oth_diag_code_13
oth_diag_code_14
poa_oth_diag_code_14
oth_diag_code_15
poa_oth_diag_code_15
oth_diag_code_16
poa_oth_diag_code_16
oth_diag_code_17
poa_oth_diag_code_17
oth_diag_code_18
poa_oth_diag_code_18
oth_diag_code_19
poa_oth_diag_code_19
oth_diag_code_20
poa_oth_diag_code_20
oth_diag_code_21
poa_oth_diag_code_21
oth_diag_code_22
poa_oth_diag_code_22
oth_diag_code_23
poa_oth_diag_code_23
oth_diag_code_24
poa_oth_diag_code_24
CD_B570
CD_B571
CD_B572
I429
I428
CD_B573
CD_B5730
CD_B5731
CD_B5732
CD_B5739
CD_B574
CD_B5740
CD_B5741
CD_B5742
CD_B5749
CD_B575
I4510
I452
I441
I442
I472
I443
I444
I445
I446
I447
I450
I451
I453
I454
I455
I458
I459
I44
I440
YQ
jerry$total=rowSums(jerry[,61:95], na.rm=TRUE)
jerry[,13:96]=NULL
jerry=jerry[jerry$pat_state=="TX",]
jerry=jerry[jerry$total>0,]
jerry[, c(1:5, 7:8 )]=NULL
jerry$sex_code[jerry$sex_code==""]="U"
jerry$sex_code=as.factor(jerry$sex_code)
jerry$male=rep(0, nrow(jerry))
jerry$male[jerry$sex_code=="M"]=1
jerry$female=rep(0,nrow(jerry))
jerry$female[jerry$sex_code=="F"]=1
jerry$sex_code=NULL

jerry$pat_age=as.factor(jerry$pat_age)

jerry$race=as.factor(jerry$race)
jerry$race2=rep(0,nrow(jerry))
jerry$race3=rep(0,nrow(jerry))
jerry$race4=rep(0,nrow(jerry))
jerry$race5=rep(0,nrow(jerry))
jerry$race2[jerry$race==2]=1
jerry$race3[jerry$race==3]=1
jerry$race4[jerry$race==4]=1
jerry$race5[jerry$race==5]=1
jerry$race=NULL

jerry$ethnicity[jerry$ethnicity=="`"]="U"
jerry$ethnicity[jerry$ethnicity==""]="U"
jerry$ethnicity=as.factor(jerry$ethnicity)
jerry$eth1=rep(0,nrow(jerry))
jerry$eth2=rep(0,nrow(jerry))
jerry$eth1[jerry$ethnicity==1]=1
jerry$eth2[jerry$ethnicity==2]=1
jerry$ethnicity=NULL
jerry$pat_age=as.numeric(jerry$pat_age)
jerry$pat_county=as.factor(jerry$pat_county)
write.csv(jerry,"jerry.csv", row.names=FALSE)

#Load Preprocessed Files

#rm(jerry)

formap=read.csv("final.csv")
myshape=shapefile("cb_2018_us_county_500k")

#Reduce Space to Texas and Merge

myshape=subset(myshape,myshape$STATEFP=="48" )
counties=merge(myshape,formap, by="GEOID",all.x=TRUE)

#Omit incomplete cases

library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(counties@data)

counties2=sp.na.omit((counties))
## Deleting rows: 60210
missmap(counties2@data)

#Describe

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
round(describe(counties2@data[10:22]),3)%>%kbl(caption="Descriptives")%>%kable_classic(html_font = "Cambria")
Descriptives
vars n mean sd median trimmed mad min max range skew kurtosis se
Total 1 252 347.575 1157.378 76.500 126.658 94.145 1.000 12983.000 12982.000 7.693 69.247 72.908
MeanAge 2 252 17.114 0.876 17.098 17.105 0.559 12.000 23.000 11.000 0.087 13.778 0.055
Male 3 252 165.278 540.208 39.500 61.728 48.184 0.000 6158.000 6158.000 7.750 71.207 34.030
Female 4 252 123.310 408.708 25.500 44.574 33.358 0.000 4597.000 4597.000 7.636 68.730 25.746
Eth1 5 252 64.060 243.830 7.000 13.931 8.896 0.000 1944.000 1944.000 5.885 36.358 15.360
Eth2 6 252 245.730 860.698 53.000 91.668 69.682 0.000 9888.000 9888.000 8.201 77.007 54.219
Race2 7 252 1.274 6.847 0.000 0.079 0.000 0.000 80.000 80.000 8.156 77.526 0.431
Race3 8 252 17.571 97.493 1.000 3.149 1.483 0.000 1217.000 1217.000 10.104 110.210 6.142
Race4 9 252 69.944 194.634 16.000 29.396 19.274 0.000 1801.000 1801.000 6.233 44.787 12.261
Race5 10 252 13.202 60.067 1.000 2.842 1.483 0.000 787.000 787.000 9.665 111.422 3.784
Population 11 252 115054.536 410461.235 19211.000 33417.619 23047.758 272.000 4713325.000 4713053.000 7.569 69.643 25856.627
PopKM2 12 252 47.109 137.891 8.602 15.897 10.749 0.107 1166.873 1166.766 5.584 35.662 8.686
Rate 13 252 394.257 159.947 385.607 385.500 160.420 35.868 1166.667 1130.799 1.002 3.025 10.076

Run A Model and Check Residuals

#######################Initial LM#################################
mylm=lm(Rate ~ Male+Eth1+Race2+Race3+Race4+Race5
        +PopKM2,
          data = counties2@data)
counties2@data$res1 <- residuals(mylm)                               
counties2@data$sd_breaks1=scale(counties2@data$res1)[,1] 
###########################LM1####################################
summary(mylm)
## 
## Call:
## lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5 + 
##     PopKM2, data = counties2@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -360.78 -111.33   -2.75   88.34  770.09 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 397.83104   11.12800  35.750   <2e-16 ***
## Male          0.67037    0.46333   1.447   0.1492    
## Eth1         -0.15229    0.08746  -1.741   0.0829 .  
## Race2        -7.40576    4.41657  -1.677   0.0949 .  
## Race3        -0.48880    0.66057  -0.740   0.4600    
## Race4        -0.52273    0.76661  -0.682   0.4960    
## Race5        -1.37937    0.93516  -1.475   0.1415    
## PopKM2       -0.67545    0.27332  -2.471   0.0141 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 155.8 on 244 degrees of freedom
## Multiple R-squared:  0.07755,    Adjusted R-squared:  0.05108 
## F-statistic:  2.93 on 7 and 244 DF,  p-value: 0.0058
#my_breaks <- c(-18,-14,-3,-2,-1,1,2,3,14, 18)

list.queen<-poly2nb(counties2, queen=TRUE)
W=rwm=nb2listw(list.queen, style="W", zero.policy=TRUE)
lm.morantest(mylm, rwm, alternative="two.sided", zero.policy=TRUE)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## Moran I statistic standard deviate = 5.6292, p-value = 1.811e-08
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.204760917     -0.007003787      0.001415207
lm.LMtests(mylm, rwm, test=c("LMerr","LMlag","RLMerr","RLMlag","SARMA"),        zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## LMerr = 28.55, df = 1, p-value = 9.129e-08
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## LMlag = 32.708, df = 1, p-value = 1.071e-08
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## RLMerr = 1.6385, df = 1, p-value = 0.2005
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## RLMlag = 5.7958, df = 1, p-value = 0.01606
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## SARMA = 34.346, df = 2, p-value = 3.482e-08

#Run Appropriate Spatial Model, Check Residuals

mylag1=stsls(Rate~Male+Eth1+Race2+Race3+Race4+Race5
        +PopKM2,
          zero.policy=TRUE, data=counties2@data, W)
## Warning: Function stsls moved to the spatialreg package
counties2@data$reslag1=mylag1$res
counties2@data$sd_breaks2=scale(counties2@data$reslag1)[,1] 
moran1=moran.mc(counties2@data$reslag1, W, 50, zero.policy=TRUE) 
#LISA
#mylisa1=lisa(counties2, d1=0, d2=2000, statistic="I", zcol="Deaths.100K")
summary(mylag1)
## 
## Call:spatialreg::stsls(formula = formula, data = data, listw = listw, 
##     zero.policy = zero.policy, na.action = na.action, robust = robust, 
##     HC = HC, legacy = legacy, W2X = W2X)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -333.4712  -90.7110    5.7683   75.9161  660.4662 
## 
## Coefficients: 
##              Estimate Std. Error t value  Pr(>|t|)
## Rho          0.814825   0.233505  3.4895 0.0004838
## (Intercept) 76.588111  92.629802  0.8268 0.4083395
## Male         0.190201   0.449183  0.4234 0.6719754
## Eth1        -0.089724   0.082677 -1.0852 0.2778163
## Race2       -6.565064   4.082957 -1.6079 0.1078529
## Race3        0.230699   0.643532  0.3585 0.7199774
## Race4       -0.030058   0.721417 -0.0417 0.9667656
## Race5       -0.467407   0.901717 -0.5184 0.6042124
## PopKM2      -0.359646   0.267977 -1.3421 0.1795703
## 
## Residual variance (sigma squared): 20675, (sigma: 143.79)
moran1
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  counties2@data$reslag1 
## weights: W  
## number of simulations + 1: 51 
## 
## statistic = -0.17115, observed rank = 1, p-value = 0.9804
## alternative hypothesis: greater
#mylisa1


SS=var(counties2@data$Rate)*(nrow(counties2@data)-1)
R2=(SS-mylag1$sse)/SS
R2
## [1] 0.2176077

#Map Residuals

#mycol=colorRampPalette(c("red","blue"), alpha=TRUE)
#mycol=mycol(12)
qpal<-colorNumeric("Blues", counties2@data$sd_breaks2, alpha=TRUE, na.color="white") 

leaflet(counties2) %>%
  #Overlays
  addPolygons(stroke = FALSE, 
              fillOpacity = 1, 
              smoothFactor = 0.2, 
              color=~qpal(sd_breaks2),
              group="Residuals",
              popup = paste("County: ", counties2@data$NAME, "<br>",
                            "Residual: ", 
                            round(counties2@data$reslag1,2), "<br>")) %>%
 
  #Base Diagrams
  addPolylines(data = counties2, 
               color = "black", 
               opacity = 1, 
               weight = 2, 
               group="County Boundaries")%>%
  
  fitBounds(-124.8, -66.9, 24.4,49.4) %>% 
  setView(-100, 31.8, zoom = 5)%>%
  
  addLegend("bottomright", opacity=1, pal = qpal, 
            values = ~sd_breaks2,
            title = "Residuals")%>%
  

  
  addLayersControl(
    baseGroups=c("County Boundaries", "Reset for Overlay"),
    overlayGroups = c("Residuals"),
    options = layersControlOptions(collapsed = FALSE)
  )

#Interactive Plot

library(leaflet)
qpal<-colorNumeric("Reds", counties2@data$Rate) 

leaflet(counties2) %>%
  #Overlays
  addPolygons(stroke = FALSE, 
              fillOpacity = 1, 
              smoothFactor = 0.2, 
              color=~qpal(Rate),
              group="Rate",
              popup = paste("County: ", counties2@data$NAME, "<br>",
                            "Rate: ", 
                            round(counties2@data$Rate,2), "<br>")) %>%
 
  #Base Diagrams
  addPolylines(data = counties2, 
               color = "black", 
               opacity = 1, 
               weight = 2, 
               group="County Boundaries")%>%
  
  fitBounds(-124.8, -66.9, 24.4,49.4) %>% 
  setView(-100, 31.8, zoom = 5)%>%
  
  addLegend("bottomright", opacity=1, pal = qpal, 
            values = ~Rate,
            title = "Rate per 100K")%>%
  

  
  addLayersControl(
    baseGroups=c("County Boundaries", "Reset for Overlay"),
    overlayGroups = c("Rates per 100K"),
    options = layersControlOptions(collapsed = FALSE)
  )