######Libraries#######
######################
library(MASS)        #
library(elsa)        #

## Loading required package: sp

## Loading required package: raster

## 
## Attaching package: 'raster'

## The following objects are masked from 'package:MASS':
## 
##     area, select

library(ggplot2)     #
library(ggExtra)     #
library(grid)        #
library(gridExtra)   #
library(kableExtra)  #
library(leaflet)     #
library(maptools)    #

## Checking rgeos availability: TRUE

library(raster)      #
library(RColorBrewer)#
library(rgdal)       #

## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/Lawrence Fulton/OneDrive - Texas State University/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.

library(rgeos)       #

## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-4 
##  Polygon checking: TRUE

library(shiny)       #

## 
## Attaching package: 'shiny'

## The following object is masked from 'package:ggExtra':
## 
##     runExample

library(sf)          #

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

library(sp)          #
library(spatialreg)  #

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: Matrix

## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep

library(spData)      #
library(spdep)       #

## 
## Attaching package: 'spdep'

## The following objects are masked from 'package:spatialreg':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm

## The following objects are masked from 'package:elsa':
## 
##     geary, moran

library(tmap)        #
library(tmaptools)   #
library(tidyverse)   #

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::combine()    masks gridExtra::combine()
## x tidyr::expand()     masks Matrix::expand()
## x tidyr::extract()    masks raster::extract()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
## x tidyr::pack()       masks Matrix::pack()
## x dplyr::select()     masks raster::select(), MASS::select()
## x tidyr::unpack()     masks Matrix::unpack()

library(usmap)       #
library(leaps)       #
library(caret)       #

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library(glmnet)      #

## Loaded glmnet 4.0-2

library(spatialEco)  #

## 
## Attaching package: 'spatialEco'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:spData':
## 
##     elev

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following object is masked from 'package:grid':
## 
##     explode

## The following object is masked from 'package:elsa':
## 
##     correlogram

## The following object is masked from 'package:raster':
## 
##     shift

######################

#Pre-Process

setwd("C:/Users/Lawrence Fulton/OneDrive - Texas State University/D_Drive/Jerry")
#Load shape and merge with data
jerry=read.csv("mapping.csv",fileEncoding="UTF-8-BOM")
colnames(jerry)%>%kbl()%>%kable_classic(html_font = "Cambria")

x
record_id
thcic_id
pat_state
pat_zip
pat_country
pat_county
public_health_region
pat_status
sex_code
race
ethnicity
pat_age
oth_diag_code_1
poa_oth_diag_code_1
oth_diag_code_2
poa_oth_diag_code_2
oth_diag_code_3
poa_oth_diag_code_3
oth_diag_code_4
poa_oth_diag_code_4
oth_diag_code_5
poa_oth_diag_code_5
oth_diag_code_6
poa_oth_diag_code_6
oth_diag_code_7
poa_oth_diag_code_7
oth_diag_code_8
poa_oth_diag_code_8
oth_diag_code_9
poa_oth_diag_code_9
oth_diag_code_10
poa_oth_diag_code_10
oth_diag_code_11
poa_oth_diag_code_11
oth_diag_code_12
poa_oth_diag_code_12
oth_diag_code_13
poa_oth_diag_code_13
oth_diag_code_14
poa_oth_diag_code_14
oth_diag_code_15
poa_oth_diag_code_15
oth_diag_code_16
poa_oth_diag_code_16
oth_diag_code_17
poa_oth_diag_code_17
oth_diag_code_18
poa_oth_diag_code_18
oth_diag_code_19
poa_oth_diag_code_19
oth_diag_code_20
poa_oth_diag_code_20
oth_diag_code_21
poa_oth_diag_code_21
oth_diag_code_22
poa_oth_diag_code_22
oth_diag_code_23
poa_oth_diag_code_23
oth_diag_code_24
poa_oth_diag_code_24
CD_B570
CD_B571
CD_B572
I429
I428
CD_B573
CD_B5730
CD_B5731
CD_B5732
CD_B5739
CD_B574
CD_B5740
CD_B5741
CD_B5742
CD_B5749
CD_B575
I4510
I452
I441
I442
I472
I443
I444
I445
I446
I447
I450
I451
I453
I454
I455
I458
I459
I44
I440
YQ

jerry$total=rowSums(jerry[,61:95], na.rm=TRUE)

jerry[,13:96]=NULL
jerry=jerry[jerry$pat_state=="TX",]
jerry=jerry[jerry$total>0,]
jerry[, c(1:5, 7:8 )]=NULL
jerry$sex_code[jerry$sex_code==""]="U"
jerry$sex_code=as.factor(jerry$sex_code)
jerry$male=rep(0, nrow(jerry))
jerry$male[jerry$sex_code=="M"]=1
jerry$female=rep(0,nrow(jerry))
jerry$female[jerry$sex_code=="F"]=1
jerry$sex_code=NULL

jerry$pat_age=as.factor(jerry$pat_age)

jerry$race=as.factor(jerry$race)
jerry$race2=rep(0,nrow(jerry))
jerry$race3=rep(0,nrow(jerry))
jerry$race4=rep(0,nrow(jerry))
jerry$race5=rep(0,nrow(jerry))
jerry$race2[jerry$race==2]=1
jerry$race3[jerry$race==3]=1
jerry$race4[jerry$race==4]=1
jerry$race5[jerry$race==5]=1
jerry$race=NULL

jerry$ethnicity[jerry$ethnicity=="`"]="U"
jerry$ethnicity[jerry$ethnicity==""]="U"
jerry$ethnicity=as.factor(jerry$ethnicity)
jerry$eth1=rep(0,nrow(jerry))
jerry$eth2=rep(0,nrow(jerry))
jerry$eth1[jerry$ethnicity==1]=1
jerry$eth2[jerry$ethnicity==2]=1
jerry$ethnicity=NULL
jerry$pat_age=as.numeric(jerry$pat_age)
jerry$pat_county=as.factor(jerry$pat_county)
write.csv(jerry,"jerry.csv", row.names=FALSE)

#Load Preprocessed Files

#rm(jerry)

formap=read.csv("final.csv")
myshape=shapefile("cb_2018_us_county_500k")

#Reduce Space to Texas and Merge

myshape=subset(myshape,myshape$STATEFP=="48" )
counties=merge(myshape,formap, by="GEOID",all.x=TRUE)

#Omit incomplete cases

library(Amelia)

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

missmap(counties@data)

counties2=sp.na.omit((counties))

## Deleting rows: 60210

missmap(counties2@data)

#Describe

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

round(describe(counties2@data[10:22]),3)%>%kbl(caption="Descriptives")%>%kable_classic(html_font = "Cambria")

Descriptives
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Total	1	252	347.575	1157.378	76.500	126.658	94.145	1.000	12983.000	12982.000	7.693	69.247	72.908
MeanAge	2	252	17.114	0.876	17.098	17.105	0.559	12.000	23.000	11.000	0.087	13.778	0.055
Male	3	252	165.278	540.208	39.500	61.728	48.184	0.000	6158.000	6158.000	7.750	71.207	34.030
Female	4	252	123.310	408.708	25.500	44.574	33.358	0.000	4597.000	4597.000	7.636	68.730	25.746
Eth1	5	252	64.060	243.830	7.000	13.931	8.896	0.000	1944.000	1944.000	5.885	36.358	15.360
Eth2	6	252	245.730	860.698	53.000	91.668	69.682	0.000	9888.000	9888.000	8.201	77.007	54.219
Race2	7	252	1.274	6.847	0.000	0.079	0.000	0.000	80.000	80.000	8.156	77.526	0.431
Race3	8	252	17.571	97.493	1.000	3.149	1.483	0.000	1217.000	1217.000	10.104	110.210	6.142
Race4	9	252	69.944	194.634	16.000	29.396	19.274	0.000	1801.000	1801.000	6.233	44.787	12.261
Race5	10	252	13.202	60.067	1.000	2.842	1.483	0.000	787.000	787.000	9.665	111.422	3.784
Population	11	252	115054.536	410461.235	19211.000	33417.619	23047.758	272.000	4713325.000	4713053.000	7.569	69.643	25856.627
PopKM2	12	252	47.109	137.891	8.602	15.897	10.749	0.107	1166.873	1166.766	5.584	35.662	8.686
Rate	13	252	394.257	159.947	385.607	385.500	160.420	35.868	1166.667	1130.799	1.002	3.025	10.076

Run A Model and Check Residuals

#######################Initial LM#################################
mylm=lm(Rate ~ Male+Eth1+Race2+Race3+Race4+Race5
        +PopKM2,
          data = counties2@data)
counties2@data$res1 <- residuals(mylm)                               
counties2@data$sd_breaks1=scale(counties2@data$res1)[,1] 
###########################LM1####################################
summary(mylm)

## 
## Call:
## lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5 + 
##     PopKM2, data = counties2@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -360.78 -111.33   -2.75   88.34  770.09 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 397.83104   11.12800  35.750   <2e-16 ***
## Male          0.67037    0.46333   1.447   0.1492    
## Eth1         -0.15229    0.08746  -1.741   0.0829 .  
## Race2        -7.40576    4.41657  -1.677   0.0949 .  
## Race3        -0.48880    0.66057  -0.740   0.4600    
## Race4        -0.52273    0.76661  -0.682   0.4960    
## Race5        -1.37937    0.93516  -1.475   0.1415    
## PopKM2       -0.67545    0.27332  -2.471   0.0141 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 155.8 on 244 degrees of freedom
## Multiple R-squared:  0.07755,    Adjusted R-squared:  0.05108 
## F-statistic:  2.93 on 7 and 244 DF,  p-value: 0.0058

#my_breaks <- c(-18,-14,-3,-2,-1,1,2,3,14, 18)

list.queen<-poly2nb(counties2, queen=TRUE)
W=rwm=nb2listw(list.queen, style="W", zero.policy=TRUE)
lm.morantest(mylm, rwm, alternative="two.sided", zero.policy=TRUE)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## Moran I statistic standard deviate = 5.6292, p-value = 1.811e-08
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.204760917     -0.007003787      0.001415207

lm.LMtests(mylm, rwm, test=c("LMerr","LMlag","RLMerr","RLMlag","SARMA"),        zero.policy=TRUE)

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## LMerr = 28.55, df = 1, p-value = 9.129e-08
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## LMlag = 32.708, df = 1, p-value = 1.071e-08
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## RLMerr = 1.6385, df = 1, p-value = 0.2005
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## RLMlag = 5.7958, df = 1, p-value = 0.01606
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Rate ~ Male + Eth1 + Race2 + Race3 + Race4 + Race5
## + PopKM2, data = counties2@data)
## weights: rwm
## 
## SARMA = 34.346, df = 2, p-value = 3.482e-08

#Run Appropriate Spatial Model, Check Residuals

mylag1=stsls(Rate~Male+Eth1+Race2+Race3+Race4+Race5
        +PopKM2,
          zero.policy=TRUE, data=counties2@data, W)

## Warning: Function stsls moved to the spatialreg package

counties2@data$reslag1=mylag1$res
counties2@data$sd_breaks2=scale(counties2@data$reslag1)[,1] 
moran1=moran.mc(counties2@data$reslag1, W, 50, zero.policy=TRUE) 
#LISA
#mylisa1=lisa(counties2, d1=0, d2=2000, statistic="I", zcol="Deaths.100K")
summary(mylag1)

## 
## Call:spatialreg::stsls(formula = formula, data = data, listw = listw, 
##     zero.policy = zero.policy, na.action = na.action, robust = robust, 
##     HC = HC, legacy = legacy, W2X = W2X)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -333.4712  -90.7110    5.7683   75.9161  660.4662 
## 
## Coefficients: 
##              Estimate Std. Error t value  Pr(>|t|)
## Rho          0.814825   0.233505  3.4895 0.0004838
## (Intercept) 76.588111  92.629802  0.8268 0.4083395
## Male         0.190201   0.449183  0.4234 0.6719754
## Eth1        -0.089724   0.082677 -1.0852 0.2778163
## Race2       -6.565064   4.082957 -1.6079 0.1078529
## Race3        0.230699   0.643532  0.3585 0.7199774
## Race4       -0.030058   0.721417 -0.0417 0.9667656
## Race5       -0.467407   0.901717 -0.5184 0.6042124
## PopKM2      -0.359646   0.267977 -1.3421 0.1795703
## 
## Residual variance (sigma squared): 20675, (sigma: 143.79)

moran1

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  counties2@data$reslag1 
## weights: W  
## number of simulations + 1: 51 
## 
## statistic = -0.17115, observed rank = 1, p-value = 0.9804
## alternative hypothesis: greater

#mylisa1


SS=var(counties2@data$Rate)*(nrow(counties2@data)-1)
R2=(SS-mylag1$sse)/SS
R2

## [1] 0.2176077

#Map Residuals

#mycol=colorRampPalette(c("red","blue"), alpha=TRUE)
#mycol=mycol(12)
qpal<-colorNumeric("Blues", counties2@data$sd_breaks2, alpha=TRUE, na.color="white") 

leaflet(counties2) %>%
  #Overlays
  addPolygons(stroke = FALSE, 
              fillOpacity = 1, 
              smoothFactor = 0.2, 
              color=~qpal(sd_breaks2),
              group="Residuals",
              popup = paste("County: ", counties2@data$NAME, "<br>",
                            "Residual: ", 
                            round(counties2@data$reslag1,2), "<br>")) %>%
 
  #Base Diagrams
  addPolylines(data = counties2, 
               color = "black", 
               opacity = 1, 
               weight = 2, 
               group="County Boundaries")%>%
  
  fitBounds(-124.8, -66.9, 24.4,49.4) %>% 
  setView(-100, 31.8, zoom = 5)%>%
  
  addLegend("bottomright", opacity=1, pal = qpal, 
            values = ~sd_breaks2,
            title = "Residuals")%>%
  

  
  addLayersControl(
    baseGroups=c("County Boundaries", "Reset for Overlay"),
    overlayGroups = c("Residuals"),
    options = layersControlOptions(collapsed = FALSE)
  )

#Interactive Plot

library(leaflet)
qpal<-colorNumeric("Reds", counties2@data$Rate) 

leaflet(counties2) %>%
  #Overlays
  addPolygons(stroke = FALSE, 
              fillOpacity = 1, 
              smoothFactor = 0.2, 
              color=~qpal(Rate),
              group="Rate",
              popup = paste("County: ", counties2@data$NAME, "<br>",
                            "Rate: ", 
                            round(counties2@data$Rate,2), "<br>")) %>%
 
  #Base Diagrams
  addPolylines(data = counties2, 
               color = "black", 
               opacity = 1, 
               weight = 2, 
               group="County Boundaries")%>%
  
  fitBounds(-124.8, -66.9, 24.4,49.4) %>% 
  setView(-100, 31.8, zoom = 5)%>%
  
  addLegend("bottomright", opacity=1, pal = qpal, 
            values = ~Rate,
            title = "Rate per 100K")%>%
  

  
  addLayersControl(
    baseGroups=c("County Boundaries", "Reset for Overlay"),
    overlayGroups = c("Rates per 100K"),
    options = layersControlOptions(collapsed = FALSE)
  )

Jerry

Larry

7/24/2020

Run A Model and Check Residuals