Exploratory Data Analysis

Libraries

print("Loading R Libraries...")
## [1] "Loading R Libraries..."
#############################################################################
knitr::opts_chunk$set(echo = TRUE)  
library(Amelia)
library(car)
library(corrplot)
library(ggcorrplot)
library(ggplot2)
library(kableExtra)
library(knitr)
library(leaflet)  
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.0.3
library(maptools)    
library(MASS)
library(psych)
library(QuantPsyc)
library(raster)   
library(reticulate)
library(sf)          
library(shiny)       
library(sp)          
library(spatialEco)
library(spatialreg)  
library(spData)      
library(spdep)       
library(tmap)        
library(tmaptools)   
library(totalcensus)
library(rmapshaper)
#############################################################################

Load County Data

print("Loading data for county analysis...")
## [1] "Loading data for county analysis..."
#############################################################################
h=read.csv("d:/heart/updated thf4.csv")
counties0<- shapefile("d:/heart/cb_2018_us_county_500k.shp")
counties0=ms_simplify(counties0, method="vis", keep_shapes=TRUE )
## Warning in sp::proj4string(sp): CRS object has comment, which is lost in output
geodata=read.csv("d:/heart/geoinputnew.csv")
geodata$GEOID=as.character(geodata$GEOID)
for (i in 1:nrow(geodata)){
  if (nchar(geodata$GEOID[i])<5){
    geodata$GEOID[i]=paste0("0", geodata$GEOID[i])}
}

Missing

#Merge and delete counties without data

counties=merge(counties0, geodata, by="GEOID", type="left", all.x=TRUE)
counties=sp.na.omit((counties))
## Deleting rows: 13272829303436395661637576788081838687889092949596102103109110125127130147148153160166170172179189204206209210212214216231237243255267269270279280283284286291292297299300303304305306307308309321322323324325326327328331332334335337340343348360362370372383387393394396402404414416421427431435451464467468474482483484490494502503504505506507508509510511512516517562563566569570571574575578583584586592595596597602605606607608609610611621627629631634639640641643650657659666667675676679681683685689690692694695697700702704705723733740741744749750751752755761762765766772784786805808809811814816817818819821824827829830832844846851856867868874875877882883884899903904916924927928932935940941948951957961962966968969985990994998100210031011101410151016101910231027103410361039104010451046106210651068106910701073107610811087109310961097111111161118111911231129113511381145114911501151115511561161117411751193119611991200120112021203120612091210121812201223123112351238124212431248125012531255125612571265127712781283128512871288129112931294130013021303130413051317133913521354135513561363137213781384139413951396139714021405141214151430143114321439144114421443144414481452145814591460146114631465146714681470147114751476147714801482148514871488149114961497150115061507151015141515152015221526152715281530153415371538154115421544155015521553155415621569157215741580158415871589159215951601160316041605161316141615161616171618161916251628164416481649165416641665166816741678168116831686168916911697170017151720172617321733173517411749175017511763176417651766177217771783178418051808181318181826182918321841184318441846184718511857186018641867187018711872187318741880188118821885188618911896189919091910191319161918191919231924192519271932193319511954195619581959196419771980198219861989199720082020202220242038204120502052205420582061206220632064206820812082209821122117212121222123213921452150215621582164216921752181218221852186218721932195219622032204220622172218222222282232224122432247224922502256225722582259226122642267227522792290229222952296230223122313231923232328233323352343234523572361236223702371237423752379238423912400241724192425242624272429243024532457246024622466247024722477247824792485248925042523252525292532253325352536253925412542254625622563256825732574257625772578257925882589259225982605260626072609261626172623262526302632263526422649265326542656265926602666266726692676267826792680268326842689270127092711271627182719272327252737274027412743274827622763276627672780278627872788279228022803280828132816281828232824283028312837283828442849285128522860286428682869287128762883288528902891289228932895289728992901290629072911291529182920292129252931293229342943294429482952295329572964296629722980298229852986298729902998300330063009301330163020302130413053305430563061306730693071307530783079308130883091310131023103311331253138315031513153316131653167316931713181318431923193319531983205320732113212321332223229
counties@data$SumAdmits=NULL
#############################################################################

missmap(counties@data)

Environment Set Up

print("Loading Python Environment...")
## [1] "Loading Python Environment..."
#############################################################################
Sys.setenv('RETICULATE_PYTHON'='C://ProgramData//Anaconda3//')
use_python('C://ProgramData//Anaconda3//python.exe')
use_condaenv("base") 
py$RANDOM_SEED = 1234
matplotlib=import('matplotlib.pyplot')
options(scipen=9); options(digits=3)
#############################################################################

Prepare Functions

print("Load Functions...")
## [1] "Load Functions..."
################################PRINT########################################
myprint=function(x,nm) {
  x%>%kbl(col.names = nm)%>%kable_classic(full_width = F, html_font = "Cambria")}
#############################################################################

##############################COREELATION####################################
corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",colors=c("red", "white","green"),tl.cex = 8, tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat, insig="pch", pch=4)
  print(myplot)}
#############################################################################

#################################SCALE#######################################
myscale=function(x) {(x-min(x))/(max(x)-min(x))}
#############################################################################

Correlations

corfunction(counties@data[, 12:29])

Principal Components

#############################################################################
a=prcomp(counties@data[, 18:24], center=TRUE, scale=TRUE, retx=TRUE)
checkvar=a$sdev^2/sum(a$sdev^2)
names(checkvar)=c("PC1","PC2", "PC3", "PC4", "PC5", "PC6", "PC7")
counties@data$PC1=a$x[,1]
counties@data[, 18:24]=NULL
#############################################################################

checkvar%>%kbl(col.names="% Variance")%>%kable_classic(full_width = F, html_font = "Cambria") 
% Variance
PC1 0.974
PC2 0.012
PC3 0.005
PC4 0.004
PC5 0.003
PC6 0.001
PC7 0.000
corfunction(counties@data[, 12:ncol(counties@data)])

Min Max Scaling

print("Min-Max Scaling...")
## [1] "Min-Max Scaling..."
#############################################################################
temp=counties@data$AdmitRate
for (i in 12:ncol(counties@data)){
  counties@data[,i]=myscale(counties@data[,i])}
counties@data$AdmitRate2=temp
#############################################################################

Models

Linear

#############################################################################
fit1=lm(AdmitRate~., data=counties@data[13:23])
counties@data$m0=residuals(fit1)
#############################################################################

summary(fit1)
## 
## Call:
## lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.1523 -0.0281 -0.0102  0.0147  0.9092 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.11521    0.04364   -2.64   0.0083 ** 
## MeanIncome     0.13202    0.05053    2.61   0.0090 ** 
## MeanProfitMgn  0.12637    0.05549    2.28   0.0229 *  
## MeanCash      -0.09749    0.05063   -1.93   0.0543 .  
## MeanEquity     0.14395    0.04555    3.16   0.0016 ** 
## MeanMedicare   0.04461    0.00824    5.41  6.8e-08 ***
## MeanMedicaid   0.00738    0.01232    0.60   0.5491    
## MeanNonProfit  0.01173    0.00260    4.52  6.6e-06 ***
## MeanAffil      0.05355    0.00686    7.80  8.9e-15 ***
## MeanSTAC       0.04117    0.00300   13.72  < 2e-16 ***
## PC1            0.05083    0.02938    1.73   0.0837 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0575 on 2420 degrees of freedom
## Multiple R-squared:  0.15,   Adjusted R-squared:  0.146 
## F-statistic: 42.7 on 10 and 2420 DF,  p-value: <2e-16
print("####################################################################")
## [1] "####################################################################"
anova(fit1)
## Analysis of Variance Table
## 
## Response: AdmitRate
##                 Df Sum Sq Mean Sq F value         Pr(>F)    
## MeanIncome       1   0.14   0.143   43.17 0.000000000061 ***
## MeanProfitMgn    1   0.08   0.076   22.90 0.000001811236 ***
## MeanCash         1   0.01   0.009    2.86         0.0912 .  
## MeanEquity       1   0.13   0.130   39.25 0.000000000439 ***
## MeanMedicare     1   0.03   0.034   10.28         0.0014 ** 
## MeanMedicaid     1   0.00   0.000    0.06         0.8136    
## MeanNonProfit    1   0.07   0.073   21.98 0.000002909443 ***
## MeanAffil        1   0.31   0.305   92.23        < 2e-16 ***
## MeanSTAC         1   0.63   0.633  191.30        < 2e-16 ***
## PC1              1   0.01   0.010    2.99         0.0837 .  
## Residuals     2420   8.00   0.003                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print("####################################################################")
## [1] "####################################################################"
vif(fit1)
##    MeanIncome MeanProfitMgn      MeanCash    MeanEquity  MeanMedicare 
##          1.10          1.03          1.20          1.26          1.58 
##  MeanMedicaid MeanNonProfit     MeanAffil      MeanSTAC           PC1 
##          1.17          1.04          1.17          1.40          1.23

Spatial Models

#############################################################################
#We coerce the sf object into a new sp object
ncovr_sp <- as(counties, "Spatial")
list.queen<-poly2nb(counties, queen=TRUE)
list.rook<-poly2nb(counties, queen=FALSE)
W=rwm=nb2listw(list.queen, style="W", zero.policy=TRUE)
W2=rwm2=nb2listw(list.rook, style="W", zero.policy=TRUE)
m1=lm.morantest(fit1, rwm, alternative="two.sided", zero.policy=TRUE)
m2=lm.LMtests(fit1, rwm, test = c("LMerr","LMlag","RLMerr","RLMlag","SARMA"), zero.policy=TRUE) 
counties@data$STATEFP=paste0("S",counties@data$STATEFP)
write.csv(counties@data[, c(13:23, 2)], "d:/heart/countiespython.csv", row.names = FALSE)
#############################################################################

m1
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## Moran I statistic standard deviate = 2, p-value = 0.1
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I      Expectation         Variance 
##         0.021527        -0.001022         0.000187
print("####################################################################")
## [1] "####################################################################"
m2
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## LMerr = 3, df = 1, p-value = 0.1
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## LMlag = 2, df = 1, p-value = 0.2
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## RLMerr = 61, df = 1, p-value = 6e-15
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## RLMlag = 60, df = 1, p-value = 1e-14
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = AdmitRate ~ ., data = counties@data[13:23])
## weights: rwm
## 
## SARMA = 62, df = 2, p-value = 3e-14
print("####################################################################")
## [1] "####################################################################"

Queen FOC

#############################################################################
#Queen

mylag1=stsls(AdmitRate ~ ., zero.policy=TRUE, data = counties@data[13:23], W)
## Warning: Function stsls moved to the spatialreg package
counties@data$queen=mylag1$res
moran1=moran.mc(counties@data$queen, W, 50, zero.policy=TRUE) #LISA
#mylisa1=lisa(counties, d1=0, d2=2000, statistic="I", zcol="AdmitRate")
part1=as.data.frame(summary(mylag1)$Coef)
myr1=1-sum(mylag1$residuals^2 / (var(counties@data$AdmitRate)*(nrow(counties@data)-1)))
#############################################################################

summary(mylag1)
## 
## Call:spatialreg::stsls(formula = formula, data = data, listw = listw, 
##     zero.policy = zero.policy, na.action = na.action, robust = robust, 
##     HC = HC, legacy = legacy, W2X = W2X)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1439575 -0.0304633 -0.0095213  0.0166718  0.8816833 
## 
## Coefficients: 
##                 Estimate Std. Error t value  Pr(>|t|)
## Rho           -0.4968078  0.0790875 -6.2817 3.348e-10
## (Intercept)   -0.1005310  0.0445953 -2.2543  0.024177
## MeanIncome     0.1356061  0.0515700  2.6296  0.008550
## MeanProfitMgn  0.1400301  0.0566759  2.4707  0.013484
## MeanCash      -0.0948724  0.0516759 -1.8359  0.066371
## MeanEquity     0.1426501  0.0464818  3.0689  0.002148
## MeanMedicare   0.0465374  0.0084175  5.5287 3.227e-08
## MeanMedicaid  -0.0045307  0.0127110 -0.3564  0.721510
## MeanNonProfit  0.0133506  0.0026635  5.0124 5.375e-07
## MeanAffil      0.0540647  0.0070041  7.7190 1.177e-14
## MeanSTAC       0.0437893  0.0030917 14.1635 < 2.2e-16
## PC1            0.0410687  0.0300187  1.3681  0.171279
## 
## Residual variance (sigma squared): 0.00344, (sigma: 0.0587)
moran1
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  counties@data$queen 
## weights: W  
## number of simulations + 1: 51 
## 
## statistic = 0.2, observed rank = 51, p-value = 0.02
## alternative hypothesis: greater

Rook FOC

#############################################################################
mylag2=stsls(AdmitRate~.,zero.policy=TRUE, data=counties@data[13:23], W2)
## Warning: Function stsls moved to the spatialreg package
counties@data$rook=mylag2$res
moran2=moran.mc(counties@data$rook, W2, 50, zero.policy=TRUE) #LISA
#mylisa2=lisa(counties, d1=0, d2=2000, statistic="I", zcol="AdmitRate")
part2=as.data.frame(summary(mylag2)$Coef)
#############################################################################

summary(mylag2)
## 
## Call:spatialreg::stsls(formula = formula, data = data, listw = listw, 
##     zero.policy = zero.policy, na.action = na.action, robust = robust, 
##     HC = HC, legacy = legacy, W2X = W2X)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1448378 -0.0306670 -0.0093664  0.0168486  0.8811411 
## 
## Coefficients: 
##                 Estimate Std. Error t value  Pr(>|t|)
## Rho           -0.5087675  0.0781682 -6.5086 7.584e-11
## (Intercept)   -0.0992554  0.0446623 -2.2224  0.026259
## MeanIncome     0.1339721  0.0516383  2.5944  0.009475
## MeanProfitMgn  0.1400778  0.0567507  2.4683  0.013576
## MeanCash      -0.0939002  0.0517478 -1.8146  0.069590
## MeanEquity     0.1434405  0.0465450  3.0818  0.002058
## MeanMedicare   0.0467089  0.0084296  5.5411 3.006e-08
## MeanMedicaid  -0.0046416  0.0127208 -0.3649  0.715198
## MeanNonProfit  0.0132299  0.0026646  4.9651 6.868e-07
## MeanAffil      0.0539419  0.0070135  7.6912 1.465e-14
## MeanSTAC       0.0437442  0.0030930 14.1428 < 2.2e-16
## PC1            0.0413981  0.0300544  1.3774  0.168377
## 
## Residual variance (sigma squared): 0.00345, (sigma: 0.0588)
moran2
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  counties@data$rook 
## weights: W2  
## number of simulations + 1: 51 
## 
## statistic = 0.2, observed rank = 51, p-value = 0.02
## alternative hypothesis: greater

R^2 OLS/Queen/Rook

#############################################################################
myr2=1-sum(mylag2$residuals^2 / (var(counties@data$AdmitRate)*(nrow(counties@data)-1)))
mydf=rbind(summary(fit1)$r.squared, myr1, myr2)
row.names(mydf)=c("OLS", "Queen", "Rook")
#############################################################################

myprint(mydf, "R2")
R2
OLS 0.150
Queen 0.115
Rook 0.113

Marginal Models

print("Running submodels...")
## [1] "Running submodels..."
#############################################################################
counties@data$m1=residuals(lm(AdmitRate~MeanIncome, data=counties@data))
counties@data$m2=residuals(lm(AdmitRate~MeanProfitMgn, data=counties@data))
counties@data$m3=residuals(lm(AdmitRate~MeanCash, data=counties@data))
counties@data$m4=residuals(lm(AdmitRate~MeanEquity, data=counties@data))
counties@data$m5=residuals(lm(AdmitRate~MeanMedicare, data=counties@data))
counties@data$m6=residuals(lm(AdmitRate~MeanMedicaid, data=counties@data))
counties@data$m7=residuals(lm(AdmitRate~MeanNonProfit, data=counties@data))
counties@data$m8=residuals(lm(AdmitRate~MeanAffil, data=counties@data))
counties@data$m9=residuals(lm(AdmitRate~MeanSTAC, data=counties@data))
counties@data$m10=residuals(lm(AdmitRate~PC1, data=counties@data))
#############################################################################

Coefficients for County Analysis

#############################################################################
#LM Coefficients for Table 
part0=as.data.frame(summary(fit1)$coefficients)
z=rep(0, 4)
part0=as.data.frame(rbind(z, part0))
rownames(part0)[1]="Rho"

part1=as.data.frame(summary(mylag1)$Coef)
part2=as.data.frame(summary(mylag2)$Coef)
mydf=as.data.frame(cbind(round(part0,3), round(part1,3), round(part2,3)))
colnames(mydf)=c("Linear Model", "SE", "t", "p", 
                  "Queen Est", "SE", "t", "p", 
                 "Rook Est", "SE", "t", "p")

temp=as.data.frame(counties@data[, 13:23])
#############################################################################

mydf%>% kbl()%>%kable_classic(full_width = F, html_font = "Cambria")
Linear Model SE t p Queen Est SE t p Rook Est SE t p
Rho 0.000 0.000 0.000 0.000 -0.497 0.079 -6.282 0.000 -0.509 0.078 -6.509 0.000
(Intercept) -0.115 0.044 -2.640 0.008 -0.101 0.045 -2.254 0.024 -0.099 0.045 -2.222 0.026
MeanIncome 0.132 0.051 2.613 0.009 0.136 0.052 2.630 0.009 0.134 0.052 2.594 0.009
MeanProfitMgn 0.126 0.055 2.277 0.023 0.140 0.057 2.471 0.013 0.140 0.057 2.468 0.014
MeanCash -0.097 0.051 -1.925 0.054 -0.095 0.052 -1.836 0.066 -0.094 0.052 -1.815 0.070
MeanEquity 0.144 0.046 3.161 0.002 0.143 0.046 3.069 0.002 0.143 0.047 3.082 0.002
MeanMedicare 0.045 0.008 5.413 0.000 0.047 0.008 5.529 0.000 0.047 0.008 5.541 0.000
MeanMedicaid 0.007 0.012 0.599 0.549 -0.005 0.013 -0.356 0.722 -0.005 0.013 -0.365 0.715
MeanNonProfit 0.012 0.003 4.516 0.000 0.013 0.003 5.012 0.000 0.013 0.003 4.965 0.000
MeanAffil 0.054 0.007 7.803 0.000 0.054 0.007 7.719 0.000 0.054 0.007 7.691 0.000
MeanSTAC 0.041 0.003 13.716 0.000 0.044 0.003 14.163 0.000 0.044 0.003 14.143 0.000
PC1 0.051 0.029 1.730 0.084 0.041 0.030 1.368 0.171 0.041 0.030 1.377 0.168

Interactive Map

#############################################################################

counties@data$ABB=convert_fips_to_names(counties@data$STATEFP)
qpal<-colorBin("Reds",c(-4, -3,-2,-1,0,1,2,3,4),bins=9,pretty=TRUE, alpha=.1)
qpal2<-colorBin("Blues",c(0,150, 300, 450,600,750,900,1050,1200),bins=9,pretty=TRUE, alpha=.1)

leaf=leaflet() %>%
  #Overlays
  addTiles(group = "OSM (default)") %>%
  #Panes
   addMapPane("borders", zIndex = 410) %>%

  #Base Diagrams

  addPolylines(data = counties, 
               color = "black", 
               opacity = 1, 
               weight = 1, group="Borders", options = pathOptions(pane = "borders"))%>%
  fitBounds(-124.8, -66.9, 24.4,49.4) %>% 

  setView(-98.6, 39.83, zoom = 4)%>%
  addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal2(1000*counties@data$AdmitRate2/3),
              popup = paste("County: ", counties@data$NAME, "<br>", 
                            "Admit Rate/1K: ", 1000*counties@data$AdmitRate2/3), 
              group="Admission Rate") %>%
   addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m0),
              popup = paste("County: ", counties@data$NAME, "<br>", "OLS Residuals: ", counties@data$m0), group="OLS Residuals") %>%
  addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(queen),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Queen Residuals: ", counties@data$queen), group="Queen Residuals") %>%
    addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(rook),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Rook Residuals: ", counties@data$rook), group="Rook Residuals") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m1),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Mean Income Res: ", counties@data$MeanIncome), group="Income") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m2),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Mean Profit Mgn Res: ", counties@data$MeanProfitMgn), group="Profit Margin") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m3),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Mean Cash on Hand Res: ", counties@data$MeanCash), group="Cash on Hand") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m4),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Mean Equity Res: ", counties@data$MeanEquity), group="Equity") %>%
        addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m5),popup = paste("County: ", counties@data$NAME, "<br>", 
              "% Medicare Res: ", round(counties@data$MeanMedicare),3), group="% Medicare") %>%
        addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m6),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "% Medicare Res: ", counties@data$MeanMedicaid), group="% Medicaid") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m7),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "% Non-profit: ", counties@data$MeanProfitMgn), group="% Non-Profit") %>%
      addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m8),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "% Major Med Sch Res: ", counties@data$MeanAffil), group="Med School Aff.") %>%
        addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m9),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "STAC: ", counties@data$MeanSTAC)
              , group="% STAC") %>%
        addPolygons(data=counties,stroke = FALSE, 
              fillOpacity = 1, smoothFactor = 0.2, 
              color=~qpal(m10),
              popup = paste("County: ", counties@data$NAME, "<br>", 
              "Workload Res: ", counties@data$PC1), group="Workload") %>%
  
      addLegend(data=counties, 
            "bottomleft", opacity=1, pal = qpal, 
            values = ~m10,
            title = "Residuals")%>%
      addLegend(data=counties, group="Admission Rate",
            "topleft", opacity=1, pal = qpal2, 
            values = ~AdmitRate2*1000/3,
            title = "Admit Rate")%>%
  
  addLayersControl(
    baseGroups = c("Admission Rate","OLS Residuals", "Queen Residuals", "Rook Residuals", "Income", "Profit Margin", "Cash on Hand","Equity",  "% Medicare", "% Medicaid", "%STAC", "Workload"),
    overlayGroups = c("Borders"),
    options = layersControlOptions(collapsed = TRUE)
  )
  
leaf

ML Models

Tree Models


#############################################################################
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import gc
import sklearn.linear_model   #Need the linear model components
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor as RFR # Random Forest package
from sklearn.ensemble import ExtraTreesRegressor as ETR # Extra Trees package
from sklearn.ensemble import BaggingRegressor as BR 
from sklearn.model_selection import train_test_split as tts
from xgboost import XGBRegressor as XGB
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet 
from sklearn.linear_model import LinearRegression as OLS
from sklearn.metrics import mean_squared_error, r2_score # evaluation metrics

mydata=r.temp

xtrain,xtest,ytrain, ytest=tts(mydata.loc[:,'MeanIncome':'PC1'],mydata.loc[:,'AdmitRate'], test_size=.20, random_state=42 )

t2=RFR(n_estimators = 10,max_depth=40, criterion='mse',n_jobs = -1, random_state = 1234)
t3=ETR(n_estimators = 90,max_depth=40, criterion='mse', n_jobs = -1, random_state = 1234)
t4=XGB(n_estimators=100, n_jobs=-1,learning_rate=0.1, max_depth=1,objective='reg:squarederror', random_state=1234) 
t5=BR(n_estimators=500, random_state=1234)

def myf(x,name):
    temp=x.fit(xtrain,ytrain)
    print(name,": ", round(temp.score(xtest, ytest),4))
#############################################################################
    
myf(t2, 'RFR')
## RFR :  -0.007
myf(t3, 'ETR')
## ETR :  0.2551
myf(t4, 'XGB')
## XGB :  0.3367
myf(t5, 'BR')
## BR :  0.1682

Importance Metrics



#############################################################################
model=XGB(n_estimators=100, n_jobs=-1,learning_rate=0.1, max_depth=1,objective='reg:squarederror', random_state=1234)  
modelfit=model.fit(xtrain, ytrain)
mypred=model.predict(xtest)
mynames=mydata.columns
a=pd.DataFrame(modelfit.feature_importances_)
a.index, a.columns=mynames[1:len(mynames)],["Importance"]
#############################################################################

print(modelfit.score(xtest,ytest))
## 0.3366680308752463
tempdata={'Variable':mynames[1:len(mynames)],'Importance':modelfit.feature_importances_}

Importance Graph

#############################################################################
imp=reticulate::py$a
mynames=row.names(imp)
row.names(imp)=NULL
testdf=data.frame(Name=mynames, Importance=imp)
testdf$Name=as.factor(testdf$Name)
testdf <- testdf[order(testdf$Importance, decreasing=TRUE), ]

#############################################################################


ggplot(testdf, aes(x=reorder(Name,-Importance), y=Importance, fill=Name)) +   geom_bar(stat = "identity") + theme(legend.position = "none",axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),axis.title.x = element_blank())+ geom_text(aes(label=round(Importance,3)), position=position_dodge(width=0.9), vjust=-0.25)

Citations

myf=function(x)
{citation(x)}

Amelia Package

myf("Amelia")
## 
## To cite Amelia in publications use:
## 
##   James Honaker, Gary King, Matthew Blackwell (2011). Amelia II: A
##   Program for Missing Data. Journal of Statistical Software, 45(7),
##   1-47. URL http://www.jstatsoft.org/v45/i07/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{Amelia II}: A Program for Missing Data},
##     author = {James Honaker and Gary King and Matthew Blackwell},
##     journal = {Journal of Statistical Software},
##     year = {2011},
##     volume = {45},
##     number = {7},
##     pages = {1--47},
##     url = {http://www.jstatsoft.org/v45/i07/},
##   }

car Package

myf("car")
## 
## To cite the car package in publications use:
## 
##   John Fox and Sanford Weisberg (2019). An {R} Companion to Applied
##   Regression, Third Edition. Thousand Oaks CA: Sage. URL:
##   https://socialsciences.mcmaster.ca/jfox/Books/Companion/
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {An {R} Companion to Applied Regression},
##     edition = {Third},
##     author = {John Fox and Sanford Weisberg},
##     year = {2019},
##     publisher = {Sage},
##     address = {Thousand Oaks {CA}},
##     url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
##   }

corrplot Package

myf("corrplot")
## 
## To cite corrplot in publications use:
## 
##   Taiyun Wei and Viliam Simko (2017). R package "corrplot":
##   Visualization of a Correlation Matrix (Version 0.84). Available from
##   https://github.com/taiyun/corrplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{corrplot2017,
##     title = {R package "corrplot": Visualization of a Correlation Matrix},
##     author = {Taiyun Wei and Viliam Simko},
##     year = {2017},
##     note = {(Version 0.84)},
##     url = {https://github.com/taiyun/corrplot},
##   }

ggcorrplot / ggplot2 Packages

myf("ggcorrplot")
## 
## To cite package 'ggcorrplot' in publications use:
## 
##   Alboukadel Kassambara (2019). ggcorrplot: Visualization of a
##   Correlation Matrix using 'ggplot2'. R package version 0.1.3.
##   https://CRAN.R-project.org/package=ggcorrplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggcorrplot: Visualization of a Correlation Matrix using 'ggplot2'},
##     author = {Alboukadel Kassambara},
##     year = {2019},
##     note = {R package version 0.1.3},
##     url = {https://CRAN.R-project.org/package=ggcorrplot},
##   }
myf("ggplot2")
## 
## To cite ggplot2 in publications, please use:
## 
##   H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
##   Springer-Verlag New York, 2016.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     author = {Hadley Wickham},
##     title = {ggplot2: Elegant Graphics for Data Analysis},
##     publisher = {Springer-Verlag New York},
##     year = {2016},
##     isbn = {978-3-319-24277-4},
##     url = {https://ggplot2.tidyverse.org},
##   }

kableExtra / knitr Packages

myf("kableExtra")
## 
## To cite package 'kableExtra' in publications use:
## 
##   Hao Zhu (2020). kableExtra: Construct Complex Table with 'kable' and
##   Pipe Syntax. R package version 1.2.1.
##   https://CRAN.R-project.org/package=kableExtra
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {kableExtra: Construct Complex Table with 'kable' and Pipe Syntax},
##     author = {Hao Zhu},
##     year = {2020},
##     note = {R package version 1.2.1},
##     url = {https://CRAN.R-project.org/package=kableExtra},
##   }
myf("knitr")
## 
## To cite the 'knitr' package in publications use:
## 
##   Yihui Xie (2020). knitr: A General-Purpose Package for Dynamic Report
##   Generation in R. R package version 1.29.
## 
##   Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition.
##   Chapman and Hall/CRC. ISBN 978-1498716963
## 
##   Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible
##   Research in R. In Victoria Stodden, Friedrich Leisch and Roger D.
##   Peng, editors, Implementing Reproducible Computational Research.
##   Chapman and Hall/CRC. ISBN 978-1466561595
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.

leaflet / leaflet.extras Packages

myf("leaflet")  
## 
## To cite package 'leaflet' in publications use:
## 
##   Joe Cheng, Bhaskar Karambelkar and Yihui Xie (2019). leaflet: Create
##   Interactive Web Maps with the JavaScript 'Leaflet' Library. R package
##   version 2.0.3. https://CRAN.R-project.org/package=leaflet
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {leaflet: Create Interactive Web Maps with the JavaScript 'Leaflet'
## Library},
##     author = {Joe Cheng and Bhaskar Karambelkar and Yihui Xie},
##     year = {2019},
##     note = {R package version 2.0.3},
##     url = {https://CRAN.R-project.org/package=leaflet},
##   }
myf("leaflet.extras")
## 
## To cite package 'leaflet.extras' in publications use:
## 
##   Bhaskar Karambelkar and Barret Schloerke (2018). leaflet.extras:
##   Extra Functionality for 'leaflet' Package. R package version 1.0.0.
##   https://CRAN.R-project.org/package=leaflet.extras
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {leaflet.extras: Extra Functionality for 'leaflet' Package},
##     author = {Bhaskar Karambelkar and Barret Schloerke},
##     year = {2018},
##     note = {R package version 1.0.0},
##     url = {https://CRAN.R-project.org/package=leaflet.extras},
##   }

maptools Package

myf("maptools")
## 
## To cite package 'maptools' in publications use:
## 
##   Roger Bivand and Nicholas Lewin-Koh (2020). maptools: Tools for
##   Handling Spatial Objects. R package version 1.0-2.
##   https://CRAN.R-project.org/package=maptools
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {maptools: Tools for Handling Spatial Objects},
##     author = {Roger Bivand and Nicholas Lewin-Koh},
##     year = {2020},
##     note = {R package version 1.0-2},
##     url = {https://CRAN.R-project.org/package=maptools},
##   }

MASS Package

myf("MASS")
## 
## To cite the MASS package in publications use:
## 
##   Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with
##   S. Fourth Edition. Springer, New York. ISBN 0-387-95457-0
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {Modern Applied Statistics with S},
##     author = {W. N. Venables and B. D. Ripley},
##     publisher = {Springer},
##     edition = {Fourth},
##     address = {New York},
##     year = {2002},
##     note = {ISBN 0-387-95457-0},
##     url = {http://www.stats.ox.ac.uk/pub/MASS4/},
##   }

##pscyh/QuantPsyc Packages

myf("psych")
## 
## To cite the psych package in publications use:
## 
##   Revelle, W. (2020) psych: Procedures for Personality and
##   Psychological Research, Northwestern University, Evanston, Illinois,
##   USA, https://CRAN.R-project.org/package=psych Version = 2.0.8,.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {William Revelle},
##     organization = { Northwestern University},
##     address = { Evanston, Illinois},
##     year = {2020},
##     note = {R package version 2.0.8},
##     url = {https://CRAN.R-project.org/package=psych},
##   }
myf("QuantPsyc")
## 
## To cite package 'QuantPsyc' in publications use:
## 
##   Thomas D. Fletcher (2012). QuantPsyc: Quantitative Psychology Tools.
##   R package version 1.5. https://CRAN.R-project.org/package=QuantPsyc
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {QuantPsyc: Quantitative Psychology Tools},
##     author = {Thomas D. Fletcher},
##     year = {2012},
##     note = {R package version 1.5},
##     url = {https://CRAN.R-project.org/package=QuantPsyc},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.

raster Package

myf("raster")
## 
## To cite package 'raster' in publications use:
## 
##   Robert J. Hijmans (2020). raster: Geographic Data Analysis and
##   Modeling. R package version 3.3-13.
##   https://CRAN.R-project.org/package=raster
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {raster: Geographic Data Analysis and Modeling},
##     author = {Robert J. Hijmans},
##     year = {2020},
##     note = {R package version 3.3-13},
##     url = {https://CRAN.R-project.org/package=raster},
##   }

reticulate Package

myf("reticulate")
## 
## To cite package 'reticulate' in publications use:
## 
##   Kevin Ushey, JJ Allaire and Yuan Tang (2020). reticulate: Interface
##   to 'Python'. R package version 1.16.
##   https://CRAN.R-project.org/package=reticulate
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {reticulate: Interface to 'Python'},
##     author = {Kevin Ushey and JJ Allaire and Yuan Tang},
##     year = {2020},
##     note = {R package version 1.16},
##     url = {https://CRAN.R-project.org/package=reticulate},
##   }

sf Package

myf("sf")
## 
## To cite package sf in publications, please use:
## 
##   Pebesma, E., 2018. Simple Features for R: Standardized Support for
##   Spatial Vector Data. The R Journal 10 (1), 439-446,
##   https://doi.org/10.32614/RJ-2018-009
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     author = {Edzer Pebesma},
##     title = {{Simple Features for R: Standardized Support for Spatial Vector Data}},
##     year = {2018},
##     journal = {{The R Journal}},
##     doi = {10.32614/RJ-2018-009},
##     url = {https://doi.org/10.32614/RJ-2018-009},
##     pages = {439--446},
##     volume = {10},
##     number = {1},
##   }

shiny Package

myf("shiny")
## 
## To cite package 'shiny' in publications use:
## 
##   Winston Chang, Joe Cheng, JJ Allaire, Yihui Xie and Jonathan
##   McPherson (2020). shiny: Web Application Framework for R. R package
##   version 1.5.0. https://CRAN.R-project.org/package=shiny
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {shiny: Web Application Framework for R},
##     author = {Winston Chang and Joe Cheng and JJ Allaire and Yihui Xie and Jonathan McPherson},
##     year = {2020},
##     note = {R package version 1.5.0},
##     url = {https://CRAN.R-project.org/package=shiny},
##   }

sp Package

myf("sp")
## 
## To cite package sp in publications use:
## 
##   Pebesma, E.J., R.S. Bivand, 2005. Classes and methods for spatial
##   data in R. R News 5 (2), https://cran.r-project.org/doc/Rnews/.
## 
##   Roger S. Bivand, Edzer Pebesma, Virgilio Gomez-Rubio, 2013. Applied
##   spatial data analysis with R, Second edition. Springer, NY.
##   https://asdar-book.org/
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.

spatialEco/spatialreg Packages

myf("spatialEco")
## 
## To cite spatialEco in publications use:
## 
## Evans JS (2020). _spatialEco_. R package version 1.3-1, <URL:
## https://github.com/jeffreyevans/spatialEco>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{spatialEco-package,
##     title = {spatialEco},
##     author = {Jeffrey S. Evans},
##     year = {2020},
##     note = {R package version 1.3-1},
##     url = {https://github.com/jeffreyevans/spatialEco},
##   }
myf("spatialreg")
## 
## To cite spatialreg in publications use one or more of the following as
## appropriate:
## 
##   Roger Bivand, Gianfranco Piras (2015). Comparing Implementations of
##   Estimation Methods for Spatial Econometrics. Journal of Statistical
##   Software, 63(18), 1-36. URL http://www.jstatsoft.org/v63/i18/.
## 
##   Bivand, R. S., Hauke, J., and Kossowski, T. (2013). Computing the
##   Jacobian in Gaussian spatial autoregressive models: An illustrated
##   comparison of available methods. Geographical Analysis, 45(2),
##   150-179. URL https://doi.org/10.1111/gean.12008
## 
##   Roger S. Bivand, Edzer Pebesma, Virgilio Gomez-Rubio, 2013. Applied
##   spatial data analysis with R, Second edition. Springer, NY.
##   http://www.asdar-book.org/
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.

spData/spdep Packages

myf("spData")      
## 
## To cite package 'spData' in publications use:
## 
##   Roger Bivand, Jakub Nowosad and Robin Lovelace (2020). spData:
##   Datasets for Spatial Analysis. R package version 0.3.8.
##   https://CRAN.R-project.org/package=spData
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {spData: Datasets for Spatial Analysis},
##     author = {Roger Bivand and Jakub Nowosad and Robin Lovelace},
##     year = {2020},
##     note = {R package version 0.3.8},
##     url = {https://CRAN.R-project.org/package=spData},
##   }
myf("spdep")
## 
## To cite spdep in publications use one or more of the following as
## appropriate:
## 
##   Bivand, Roger S. and Wong, David W. S. (2018) Comparing
##   implementations of global and local indicators of spatial association
##   TEST, 27(3), 716-748. URL https://doi.org/10.1007/s11749-018-0599-x
## 
##   Roger S. Bivand, Edzer Pebesma, Virgilio Gomez-Rubio, 2013. Applied
##   spatial data analysis with R, Second edition. Springer, NY.
##   http://www.asdar-book.org/
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.

tmap/tmaptools Packages

myf("tmap")        
## 
## To cite tmap/tmaptools in publications use:
## 
## Tennekes M (2018). "tmap: Thematic Maps in R." _Journal of Statistical
## Software_, *84*(6), 1-39. doi: 10.18637/jss.v084.i06 (URL:
## https://doi.org/10.18637/jss.v084.i06).
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{tmap}: Thematic Maps in {R}},
##     author = {Martijn Tennekes},
##     journal = {Journal of Statistical Software},
##     year = {2018},
##     volume = {84},
##     number = {6},
##     pages = {1--39},
##     doi = {10.18637/jss.v084.i06},
##   }
myf("tmaptools")
## 
## To cite package 'tmaptools' in publications use:
## 
##   Martijn Tennekes (2020). tmaptools: Thematic Map Tools. R package
##   version 3.1. https://CRAN.R-project.org/package=tmaptools
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {tmaptools: Thematic Map Tools},
##     author = {Martijn Tennekes},
##     year = {2020},
##     note = {R package version 3.1},
##     url = {https://CRAN.R-project.org/package=tmaptools},
##   }

totalcensus Package

myf("totalcensus")
## 
## To cite package 'totalcensus' in publications use:
## 
##   Guanglai Li (2020). totalcensus: Extract Decennial Census and
##   American Community Survey Data. R package version 0.6.4.
##   https://CRAN.R-project.org/package=totalcensus
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {totalcensus: Extract Decennial Census and American Community Survey Data},
##     author = {Guanglai Li},
##     year = {2020},
##     note = {R package version 0.6.4},
##     url = {https://CRAN.R-project.org/package=totalcensus},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.

rmapshaper Package

myf("rmapshaper")
## 
## To cite package 'rmapshaper' in publications use:
## 
##   Andy Teucher and Kenton Russell (2020). rmapshaper: Client for
##   'mapshaper' for 'Geospatial' Operations. R package version 0.4.4.
##   https://CRAN.R-project.org/package=rmapshaper
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {rmapshaper: Client for 'mapshaper' for 'Geospatial' Operations},
##     author = {Andy Teucher and Kenton Russell},
##     year = {2020},
##     note = {R package version 0.4.4},
##     url = {https://CRAN.R-project.org/package=rmapshaper},
##   }

browsable( tagList( list( tags\(head( tags\)style( “.leaflet .legend { line-height: 10px; font-size: 10px; }”, “.leaflet .legend i{ width: 30px; height: 30px; }” ) ), leaf)))