Libraries and Working Directory

######Libraries#######
######################
library(Amelia)      #

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

library(MASS)        #
library(elsa)        #

## Loading required package: sp

## Loading required package: raster

## 
## Attaching package: 'raster'

## The following objects are masked from 'package:MASS':
## 
##     area, select

library(ggplot2)     #
library(ggExtra)     #
library(ggplot2)     #
library(ggpubr)      #

## Warning: package 'ggpubr' was built under R version 4.0.2

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:raster':
## 
##     rotate

library(glmnet)      #

## Warning: package 'glmnet' was built under R version 4.0.2

## Loading required package: Matrix

## Loaded glmnet 4.0-2

library(grid)        #
library(gridExtra)   #
library(leaflet)     #

## Warning: package 'leaflet' was built under R version 4.0.2

library(maptools)    #

## Checking rgeos availability: TRUE

library(raster)      #
library(RColorBrewer)#
library(rgdal)       #

## Warning: package 'rgdal' was built under R version 4.0.2

## rgdal: version: 1.5-10, (SVN revision 1006)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: C:/Users/lfult/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: C:/Users/lfult/Documents/R/win-library/4.0/rgdal/proj
## Linking to sp version:1.4-2
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.

library(rgeos)       #

## rgeos version: 0.5-3, (SVN revision 634)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-2 
##  Polygon checking: TRUE

library(shiny)       #

## 
## Attaching package: 'shiny'

## The following object is masked from 'package:ggExtra':
## 
##     runExample

library(sf)          #

## Warning: package 'sf' was built under R version 4.0.2

## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1

library(sp)          #
library(spatialreg)  #

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep

library(spData)      #
library(spdep)       #

## 
## Attaching package: 'spdep'

## The following objects are masked from 'package:spatialreg':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm

## The following objects are masked from 'package:elsa':
## 
##     geary, moran

library(tmap)        #
library(tmaptools)   #
library(tidyverse)   #

## -- Attaching packages ----------------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.3     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## v purrr   0.3.4

## Warning: package 'tibble' was built under R version 4.0.2

## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::combine() masks gridExtra::combine()
## x tidyr::expand()  masks Matrix::expand()
## x tidyr::extract() masks raster::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x tidyr::pack()    masks Matrix::pack()
## x dplyr::select()  masks raster::select(), MASS::select()
## x tidyr::unpack()  masks Matrix::unpack()

library(usmap)       #
library(scales)      #

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(psych)       #

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:scales':
## 
##     alpha, rescale

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(caret)       #

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library(DAAG)        #

## Warning: package 'DAAG' was built under R version 4.0.2

## 
## Attaching package: 'DAAG'

## The following object is masked from 'package:MASS':
## 
##     hills

library(leaps)       #
library(biscale)     #

## Warning: package 'biscale' was built under R version 4.0.2

library(ResourceSelection)

## ResourceSelection 0.3-5   2019-07-22

###############################################################
#########################Preset Data###########################
setwd("D:/covid19")
###############################################################

Read Geography

###############Shapes#####################
state=shapefile("states")
landarea=shapefile("cb_2018_us_state_500k.shp")
###############Master#####################

Read Data

masterfile=read.csv("redostate.csv")
##########################################

Read Online File

##############LiveData#####################
#deaths=read.csv("https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv", fileEncoding="UTF-8-BOM")
deaths=read.csv("deaths.csv")
##########################################

Aggregate

##############Aggregate###################
deaths=aggregate(deaths[,ncol(deaths)],by=list(deaths$stateFIPS), FUN=sum, dissolve=T)
colnames(deaths)=c("GEOID", "deaths")
##########################################

############Merge#########################
master=merge(masterfile, deaths, by = "GEOID", type="left")
##########################################

Convert FIPS to Character

##############Convert FIPS to Character###
master$GEOID=as.character(master$GEOID)
for (i in 1:nrow(master)){
  if (nchar(master$GEOID[i])==1){
    master$GEOID[i]=paste0("0", master$GEOID[i])
  }
}
##########################################

Feature Engineering / Variable Removal

master$DeathRate=master$deaths/(master$Pop2020/100000)
master$deaths=master$X=master$State=NULL
master$STATE_FIPS=master$GEOID
landarea@data$STATE_FIPS=landarea@data$STATEFP

Final Merge

state=merge(state, master, 
  by="STATE_FIPS", type="left", all.x=TRUE)
state=merge(state, landarea@data,  by="STATE_FIPS", type="left", all.x=TRUE)
state@data$ALAND=as.numeric(state@data$ALAND)
state@data$PopDensity=as.numeric(state@data$Pop2020)/as.numeric(state@data$ALAND)
state@data$STATENS=state@data$AFFGEOID=state@data$GEOID=
  state@data$STUSPS=state@data$NAME=state@data$LSAD=
  state@data$ALAND=state@data$AWATER=state@data$STATEFP=NULL
state@data$GEOID.x=state@data$GEOID.y=NULL
colnames(state@data)

##  [1] "STATE_FIPS"     "STATE_NAME"     "DRAWSEQ"        "SUB_REGION"    
##  [5] "STATE_ABBR"     "Minority"       "NoPlurality"    "Wparty"        
##  [9] "Pop2020"        "UE2019"         "Poverty"        "Smoking"       
## [13] "Obesity"        "Alcohol"        "Alzheimers"     "Asthma"        
## [17] "Atrial_Fib"     "Cancer"         "Chr_Kidney"     "COPD"          
## [21] "Depression"     "Diabetes"       "Drug_Abuse"     "HIV"           
## [25] "Heart_Fail"     "Hep_B_C"        "Hyperlipidemia" "Hypertension"  
## [29] "Isch_Ht_Dis"    "Stroke"         "DeathRate"      "PopDensity"

Validate No Missing

missmap(state@data)

#Interactive Plot

library(viridis)

## Loading required package: viridisLite

## 
## Attaching package: 'viridis'

## The following object is masked from 'package:scales':
## 
##     viridis_pal

#colfunc <- colorRampPalette(c("dark green","dark red"))

qpal<-colorNumeric("Reds",state@data$DeathRate, alpha=.1)
pal <- colorFactor(c("blue", "red"), domain=c("R","D"))

state2=state
lab=read.csv("labs.csv")
lab$Long=as.numeric(lab$Long)
lab$Lat=as.numeric(lab$Lat)
lab$Party=as.factor(lab$Name)
lab$STATE_NAME=as.character(lab$State)
state2@data$FIPS2=as.numeric(state2$STATE_FIPS)
state2=merge(state2,lab,by="FIPS2", type="left")
leaflet() %>%
  #Overlays
  addPolygons(data=state2,
              stroke = FALSE, 
              fillOpacity = 1, 
              smoothFactor = 0.2, 
              color=~qpal(DeathRate),
              popup = paste("State: ", state@data$STATE_NAME, "<br>",
                            "Deaths / 100K: ", 
                            round(state@data$DeathRate,2), "<br>")) %>%
  #Base Diagrams
  addPolylines(data = state2, 
               color = "black", 
               opacity = 1, 
               weight = 2)%>%
   fitBounds(-124.8, -66.9, 24.4,49.4) %>% 
  setView(-98.6, 39.83, zoom = 4)%>%
  
  
  addCircleMarkers(data=state2,
             lng=state2@data$Long, 
             lat=state2@data$Lat, 
             label=state2@data$Party, 
             color=~pal(state2@data$Party),
             radius=1,
             opacity=1,
            labelOptions = labelOptions(noHide=T,  textsize = "15px", direction='bottom', textOnly='true'))%>%
  

  addLegend(data=state2,
            "bottomright", opacity=1, pal = qpal, 
            values = ~DeathRate,
            title = "Deaths/100K")

Scale

state@data$Pop2020=NULL
for (i in 6:ncol(state@data)){state@data[,i]=
  as.numeric(scale(state@data[,i]))}

Select Non-Correlated PCs

We find a single PC that accounts for more than 1/2 of the variance and is unidirectional. This PC represents the population health status.

a=prcomp(state@data[, 11:29], center=TRUE, scale=TRUE, retx=TRUE)
checkvar=a$sdev^2/sum(a$sdev^2)
checkvar #75%+ capture

##  [1] 0.434807070 0.200819671 0.118016482 0.097549533 0.038993931 0.028466408
##  [7] 0.016725112 0.015425404 0.010613091 0.008198213 0.006745493 0.005815484
## [13] 0.005183298 0.004035386 0.002473606 0.002075681 0.001726863 0.001455126
## [19] 0.000874147

state@data$PC1=a$x[,1]
state@data$PC2=a$x[,2]
state@data$PC3=a$x[,3]

state@data[, 11:29]=NULL

Plot Death Rate by Percent Democrat

forplot=read.csv("forsplot.csv")
forplot$Wparty=as.factor(forplot$Wparty)
ggplot(forplot, aes(x=Proportion_Democrat, y=Death_Rate))+
  geom_point(aes(x=Proportion_Democrat, y=Death_Rate,col=Wparty))+
  ylab("Deaths with COVID-19 per 100K")+
  xlab("Proportion Voting Clinton")+
  geom_smooth()+
  geom_rug()+
  geom_hline(yintercept=75, linetype="dashed",color="black")+
  scale_color_manual(values = c("blue", "red"))

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

boxplot(forplot$Death_Rate~forplot$Wparty, 
        horizontal=TRUE, col=c("blue", "red"), xlab="Death Rate", ylab="Party")

Interactions and Scaling

We generate an interaction term between health of the population and the governors’ party.

state@data$WpPC1=state@data$Wparty*state@data$PC1

Describe

library(psych)
describe(state@data[, 6:ncol(state@data)])

##             vars  n mean   sd median trimmed  mad   min  max range  skew
## Minority       1 51 0.00 1.00  -0.12   -0.09 0.99 -1.42 3.75  5.17  1.22
## NoPlurality    2 51 0.00 1.00  -0.46   -0.20 0.00 -0.46 2.14  2.60  1.65
## Wparty         3 51 0.00 1.00  -0.97    0.00 0.00 -0.97 1.01  1.98  0.04
## UE2019         4 51 0.00 1.00  -0.11   -0.09 0.93 -1.51 2.94  4.45  0.85
## Poverty        5 51 0.00 1.00  -0.02   -0.06 1.12 -1.89 2.49  4.38  0.52
## DeathRate      6 51 0.00 1.00  -0.32   -0.18 0.72 -1.03 3.38  4.41  1.64
## PopDensity     7 51 0.00 1.00  -0.20   -0.17 0.07 -0.26 6.91  7.17  6.46
## PC1            8 51 0.00 2.87   0.99    0.17 3.18 -5.94 4.61 10.56 -0.45
## PC2            9 51 0.00 1.95   0.01   -0.16 1.92 -3.30 8.18 11.48  1.36
## PC3           10 51 0.00 1.50  -0.03   -0.08 1.89 -2.38 4.34  6.72  0.50
## WpPC1         11 51 0.08 2.84  -0.07    0.06 3.31 -5.33 5.77 11.10  0.09
##             kurtosis   se
## Minority        2.32 0.14
## NoPlurality     0.73 0.14
## Wparty         -2.04 0.14
## UE2019          0.37 0.14
## Poverty        -0.35 0.14
## DeathRate       2.44 0.14
## PopDensity     41.63 0.14
## PC1            -1.07 0.40
## PC2             3.95 0.27
## PC3            -0.23 0.21
## WpPC1          -1.10 0.40

LM Data

state@data$Pop2020=NULL

bs=lm(DeathRate ~ .,data=state@data[,6:ncol(state@data)])
summary(bs)

## 
## Call:
## lm(formula = DeathRate ~ ., data = state@data[, 6:ncol(state@data)])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19979 -0.38047 -0.05135  0.29777  1.80447 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006926   0.092002  -0.075 0.940363    
## Minority    -0.231005   0.130014  -1.777 0.083212 .  
## NoPlurality  0.048641   0.099191   0.490 0.626543    
## Wparty      -0.056386   0.109244  -0.516 0.608591    
## UE2019       0.187787   0.135756   1.383 0.174257    
## Poverty      0.198107   0.167246   1.185 0.243198    
## PopDensity  -0.258286   0.160657  -1.608 0.115769    
## PC1          0.200730   0.043536   4.611 4.06e-05 ***
## PC2          0.388402   0.098644   3.937 0.000321 ***
## PC3         -0.213214   0.094105  -2.266 0.028954 *  
## WpPC1        0.084256   0.036778   2.291 0.027314 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6567 on 40 degrees of freedom
## Multiple R-squared:  0.655,  Adjusted R-squared:  0.5688 
## F-statistic: 7.595 on 10 and 40 DF,  p-value: 1.235e-06

state@data$res0 <- residuals(bs)  
state@data$sd_breaks0=scale(state@data$res0)[,1] 

my_breaks <- c(-14,-3,-2,-1,1,2,3,14)

tm_shape(state, bbox=tmaptools::bb(xlim=c(-14, -5), ylim=c(2, 9),width=5, ext=5, relative = TRUE)) + tm_fill("sd_breaks0", title = "Residuals", style = "fixed", breaks = my_breaks, palette = "-RdBu") +  tm_borders(alpha = 0.1) +  tm_layout(main.title = "Residuals", main.title.size = 1,
legend.position = c("right", "bottom"), legend.title.size = 0.8)

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

## Variable(s) "sd_breaks0" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

par(mfrow=c(2,1))
hist(bs$residuals)
plot(scale(state@data$DeathRate)~scale(bs$fitted.values))
abline(lm(scale(state@data$DeathRate)~scale(bs$fitted.values)))

summary(bs)

## 
## Call:
## lm(formula = DeathRate ~ ., data = state@data[, 6:ncol(state@data)])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19979 -0.38047 -0.05135  0.29777  1.80447 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.006926   0.092002  -0.075 0.940363    
## Minority    -0.231005   0.130014  -1.777 0.083212 .  
## NoPlurality  0.048641   0.099191   0.490 0.626543    
## Wparty      -0.056386   0.109244  -0.516 0.608591    
## UE2019       0.187787   0.135756   1.383 0.174257    
## Poverty      0.198107   0.167246   1.185 0.243198    
## PopDensity  -0.258286   0.160657  -1.608 0.115769    
## PC1          0.200730   0.043536   4.611 4.06e-05 ***
## PC2          0.388402   0.098644   3.937 0.000321 ***
## PC3         -0.213214   0.094105  -2.266 0.028954 *  
## WpPC1        0.084256   0.036778   2.291 0.027314 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6567 on 40 degrees of freedom
## Multiple R-squared:  0.655,  Adjusted R-squared:  0.5688 
## F-statistic: 7.595 on 10 and 40 DF,  p-value: 1.235e-06

vif(bs)

##    Minority NoPlurality      Wparty      UE2019     Poverty  PopDensity 
##      1.9600      1.1408      1.3838      2.1369      3.2433      2.9927 
##         PC1         PC2         PC3       WpPC1 
##      1.8156      4.3050      2.3025      1.2633

Moran’s I and LaGrangian Multiplier Diagnostics

list.queen<-poly2nb(state, queen=TRUE)
W=rwm=nb2listw(list.queen, style="W", zero.policy=TRUE)
test1=lm.morantest(bs, rwm, alternative="two.sided", zero.policy=TRUE)
temptest=lm.LMtests(bs, rwm, test=c("LMerr","LMlag","RLMerr","RLMlag","SARMA"), 
              zero.policy=TRUE)
m=test1$estimate[1]
n=test1$p.value
a=temptest$LMerr$p.value
b=temptest$LMlag$p.value
y=temptest$RLMerr$p.value
z=temptest$RLMlag$p.value
ans=c(m,n,a,b,y,z)
names(ans)=c("Moran's I","p-value","LMerror", "LMLag", "RLMerr", "RLMlag")
ans

##  Moran's I    p-value    LMerror      LMLag     RLMerr     RLMlag 
## 0.07372044 0.13500090 0.44821902 0.09784855 0.54460509 0.11154570

Outliers Omitted

###############Shapes#####################
state=shapefile("states")
landarea=shapefile("cb_2018_us_state_500k.shp")
###############Master#####################
deaths=read.csv("withoutoutliers.csv")
##########################################

##############Aggregate###################
deaths=aggregate(deaths[,ncol(deaths)],by=list(deaths$STATE_FIPS), FUN=sum, dissolve=T)
colnames(deaths)=c("GEOID", "deaths")
##########################################

############Merge#########################
master=merge(masterfile, deaths, by = "GEOID", type="left")
##########################################


##############Convert FIPS to Character###
master$GEOID=as.character(master$GEOID)
for (i in 1:nrow(master)){
  if (nchar(master$GEOID[i])==1){
    master$GEOID[i]=paste0("0", master$GEOID[i])
  }
}
##########################################

master$DeathRate=master$deaths/(master$Pop2020/100000)
master$deaths=master$X=master$State=NULL
master$STATE_FIPS=master$GEOID
landarea@data$STATE_FIPS=landarea@data$STATEFP

state=merge(state, master, 
  by="STATE_FIPS", type="left", all.x=TRUE)
state=merge(state, landarea@data,  by="STATE_FIPS", type="left", all.x=TRUE)
state@data$ALAND=as.numeric(state@data$ALAND)
state@data$PopDensity=as.numeric(state@data$Pop2020)/as.numeric(state@data$ALAND)
state@data$STATENS=state@data$AFFGEOID=state@data$GEOID=
  state@data$STUSPS=state@data$NAME=state@data$LSAD=
  state@data$ALAND=state@data$AWATER=state@data$STATEFP=NULL
state@data$GEOID.x=state@data$GEOID.y=NULL
colnames(state@data)

##  [1] "STATE_FIPS"     "STATE_NAME"     "DRAWSEQ"        "SUB_REGION"    
##  [5] "STATE_ABBR"     "Minority"       "NoPlurality"    "Wparty"        
##  [9] "Pop2020"        "UE2019"         "Poverty"        "Smoking"       
## [13] "Obesity"        "Alcohol"        "Alzheimers"     "Asthma"        
## [17] "Atrial_Fib"     "Cancer"         "Chr_Kidney"     "COPD"          
## [21] "Depression"     "Diabetes"       "Drug_Abuse"     "HIV"           
## [25] "Heart_Fail"     "Hep_B_C"        "Hyperlipidemia" "Hypertension"  
## [29] "Isch_Ht_Dis"    "Stroke"         "DeathRate"      "PopDensity"

state@data$Pop2020=NULL
for (i in 6:ncol(state@data)){state@data[,i]=
  as.numeric(scale(state@data[,i]))}

library(spatialEco)

## Warning: package 'spatialEco' was built under R version 4.0.2

## 
## Attaching package: 'spatialEco'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:spData':
## 
##     elev

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following object is masked from 'package:grid':
## 
##     explode

## The following object is masked from 'package:elsa':
## 
##     correlogram

## The following object is masked from 'package:raster':
## 
##     shift

state=sp.na.omit((state))

## Deleting rows: 15171921

a=prcomp(state@data[, 11:29], center=TRUE, scale=TRUE, retx=TRUE)
checkvar=a$sdev^2/sum(a$sdev^2)
checkvar

##  [1] 0.445671698 0.200331078 0.114981838 0.084651232 0.041118619 0.032819976
##  [7] 0.017356539 0.016452778 0.010497278 0.007536200 0.006816099 0.005742985
## [13] 0.004324818 0.004088729 0.002547805 0.001957423 0.001334424 0.001106613
## [19] 0.000663867

state@data$PC1=a$x[,1]
state@data$PC2=a$x[,2]
state@data$PC3=a$x[,3]

state@data$WpPC1=state@data$Wparty*state@data$PC1
#tate@data$WpPC2=state@data$Wparty*state@data$PC2
#state@data$WpPC3=state@data$Wparty*state@data$PC3
state@data[, 11:29]=NULL

mylm2=lm(DeathRate ~ .,zero.policy=TRUE, data=state@data[6:ncol(state@data)])

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'zero.policy' will be disregarded

summary(mylm2)

## 
## Call:
## lm(formula = DeathRate ~ ., data = state@data[6:ncol(state@data)], 
##     zero.policy = TRUE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.50364 -0.50346 -0.08804  0.26056  2.47239 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.00676    0.13769   0.049   0.9611  
## Minority     0.42075    0.22493   1.871   0.0695 .
## NoPlurality  0.07817    0.15140   0.516   0.6088  
## Wparty      -0.26009    0.17083  -1.522   0.1366  
## UE2019       0.15871    0.20189   0.786   0.4369  
## Poverty     -0.27032    0.24294  -1.113   0.2732  
## PopDensity  -0.01297    0.25244  -0.051   0.9593  
## PC1         -0.07417    0.06647  -1.116   0.2719  
## PC2          0.00493    0.17228   0.029   0.9773  
## PC3          0.14498    0.12746   1.137   0.2629  
## WpPC1        0.05341    0.05427   0.984   0.3316  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9428 on 36 degrees of freedom
## Multiple R-squared:  0.3044, Adjusted R-squared:  0.1112 
## F-statistic: 1.575 on 10 and 36 DF,  p-value: 0.1539

vif(mylm2)

##    Minority NoPlurality      Wparty      UE2019     Poverty  PopDensity 
##      2.6183      1.1863      1.5104      2.1093      3.0544      3.2979 
##         PC1         PC2         PC3       WpPC1 
##      1.9360      5.8470      1.8367      1.2367

Flu Analysis

myflu=read.csv("2018flu.csv")
t.test(myflu$flu~myflu$gov, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  myflu$flu by myflu$gov
## t = 1.0344, df = 49, p-value = 0.306
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3397552  1.0605044
## sample estimates:
## mean in group 0 mean in group 1 
##        3.973819        3.613445

var.test(myflu$flu~myflu$gov)

## 
##  F test to compare two variances
## 
## data:  myflu$flu by myflu$gov
## F = 1.4319, num df = 25, denom df = 24, p-value = 0.3825
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6343314 3.2107438
## sample estimates:
## ratio of variances 
##           1.431947

t.test(state@data$DeathRate~state@data$Wparty)

## 
##  Welch Two Sample t-test
## 
## data:  state@data$DeathRate by state@data$Wparty
## t = 0.65427, df = 44.984, p-value = 0.5163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3971477  0.7793133
## sample estimates:
## mean in group -0.928049888607655   mean in group 1.05460214614506 
##                       0.08944301                      -0.10163978

Initial Descriptives and Testing

myflu=read.csv("flupneumonia.csv",stringsAsFactors =TRUE)
describe(myflu)

##          vars   n  mean    sd median trimmed   mad min  max range skew kurtosis
## STATE*      1 250 25.50 14.46  25.50   25.50 18.53   1 50.0  49.0 0.00    -1.22
## Year*       2 250  3.00  1.42   3.00    3.00  1.48   1  5.0   4.0 0.00    -1.31
## Rate        3 250 15.10  3.76  14.65   14.86  3.34   7 29.6  22.6 0.68     0.80
## Governor    4 250  0.48  0.50   0.00    0.48  0.00   0  1.0   1.0 0.08    -2.00
##            se
## STATE*   0.91
## Year*    0.09
## Rate     0.24
## Governor 0.03

attach(myflu)


boxplot(Rate~Governor, data=myflu, notch=TRUE, horizontal=TRUE, 
        col=c("blue","red"), main="Flu / Pneumonia Deaths / 100K, Blue=Democrat, Red=Republican", xlab="Deaths / 100K", ylab="Governor Party, 1 = Democrat")

myaov=aov(Rate~Governor+Year, myflu)
summary(myaov)

##              Df Sum Sq Mean Sq F value Pr(>F)  
## Governor      1     21   21.14   1.531 0.2171  
## Year          4    132   32.88   2.382 0.0522 .
## Residuals   244   3368   13.80                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

State Analysis

Doc Larry

as of 8/31/2020