#Load in Packages
library(sf)     
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(dplyr)   
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(spData) 
## Warning: package 'spData' was built under R version 4.3.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.2
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.3.3
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(GWmodel)
## Warning: package 'GWmodel' was built under R version 4.3.3
## Loading required package: robustbase
## Warning: package 'robustbase' was built under R version 4.3.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.3.2
## Loading required package: Rcpp
## Welcome to GWmodel version 2.3-2.
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(scales)
## Warning: package 'scales' was built under R version 4.3.2
# Load in Data
setwd("C:/Users/kajsa/OneDrive/Documents/Binghamton/Senior - Spring/DIDA 370/finaldata")
chic <- st_read("ComArea_ACS14_f.shp")
## Reading layer `ComArea_ACS14_f' from data source 
##   `C:\Users\kajsa\OneDrive\Documents\Binghamton\Senior - Spring\DIDA 370\finaldata\ComArea_ACS14_f.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 86 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS:  WGS84(DD)
chic1 <- chic %>%
  mutate("perc_pov" = Pov14/Pop2014)
chic1
## Simple feature collection with 77 features and 87 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## Geodetic CRS:  WGS84(DD)
## First 10 features:
##    ComAreaID       community TRACTCnt shape_area shape_len Pop2012 Pop2014
## 1         35         DOUGLAS       10   46004621  31027.05   18238   19430
## 2         36         OAKLAND        3   16913961  19565.51    5918    6473
## 3         37     FULLER PARK        2   19916705  25339.09    2876    2543
## 4         38 GRAND BOULEVARD       14   48492503  28196.84   21929   22531
## 5         39         KENWOOD        7   29071742  23325.17   17841   18217
## 6          4  LINCOLN SQUARE       11   71352328  36624.60   39493   39547
## 7         40 WASHINGTON PARK        6   42373881  28175.32   11717   11647
## 8         41       HYDE PARK       12   45105380  29746.71   25681   26705
## 9         42        WOODLAWN       11   57815180  46936.96   25983   25451
## 10         1     ROGERS PARK       12   51259902  34052.40   54991   55053
##    PopChng  PopM   PopMP  PopF   PopFP Under5 Under5P Under18 Under18P Over18
## 1   3.1645  8294 42.6866 11136 57.3134   1888  9.7169    2904  14.9460  16526
## 2   4.4791  2499 38.6065  3974 61.3935    725 11.2004    1938  29.9397   4535
## 3  -6.1450  1218 47.8962  1325 52.1038    531 20.8808     590  23.2009   1953
## 4   1.3540  9681 42.9675 12850 57.0325   2505 11.1180    5659  25.1165  16872
## 5   1.0428  8543 46.8958  9674 53.1042   1191  6.5378    3537  19.4159  14680
## 6   0.0683 19760 49.9659 19787 50.0341   2839  7.1788    6610  16.7143  32937
## 7  -0.2996  4850 41.6416  6797 58.3584   1486 12.7587    4367  37.4946   7280
## 8   1.9547 12941 48.4591 13764 51.5409   1690  6.3284    3727  13.9562  22978
## 9  -1.0343 11345 44.5759 14106 55.4241   1854  7.2846    6718  26.3958  18733
## 10  0.0563 27309 49.6049 27744 50.3951   3601  6.5410   10281  18.6747  44772
##    Over18P Over21 Over21P Over65 Over65P Wht14  Wht14P Blk14  Blk14P AI14
## 1  85.0540  15033 77.3700   2834 14.5857  2752 14.1637 14005 72.0793   86
## 2  70.0603   4213 65.0857    601  9.2847   234  3.6150  6072 93.8050    6
## 3  76.7991   1870 73.5352    542 21.3134   122  4.7975  2311 90.8769    0
## 4  74.8835  15795 70.1034   2914 12.9333  1050  4.6602 21150 93.8707  156
## 5  80.5841  14171 77.7900   2791 15.3209  3834 21.0463 12754 70.0115  129
## 6  83.2857  32196 81.4120   3660  9.2548 31209 78.9162  1780  4.5010  386
## 7  62.5054   6806 58.4356    915  7.8561   228  1.9576 11392 97.8106  157
## 8  86.0438  19792 74.1135   3264 12.2224 13821 51.7544  9110 34.1135  176
## 9  73.6042  16777 65.9188   2735 10.7461  2586 10.1607 22160 87.0693  167
## 10 81.3253  41477 75.3401   4677  8.4954 33374 60.6216 15860 28.8086  558
##     AI14P AS14   AS14P NHP14 NHP14P Oth14 Oth14P Hisp14 Hisp14P Property_C
## 1  0.4426 2687 13.8291    50 0.2573   203 1.0448    426  2.1925        814
## 2  0.0927  140  2.1628     0 0.0000    33 0.5098    122  1.8848        200
## 3  0.0000    0  0.0000     0 0.0000   132 5.1907    191  7.5108        207
## 4  0.6924  138  0.6125    90 0.3994   369 1.6377    474  2.1038        884
## 5  0.7081 1660  9.1124     0 0.0000   331 1.8170    701  3.8481        497
## 6  0.9761 5565 14.0719   101 0.2554  1757 4.4428   6236 15.7686        722
## 7  1.3480   16  0.1374     0 0.0000    35 0.3005    246  2.1121        676
## 8  0.6591 3622 13.5630    19 0.0711  1025 3.8382   1681  6.2947        674
## 9  0.6562  903  3.5480     5 0.0196   238 0.9351    617  2.4243       1108
## 10 1.0136 4162  7.5600   108 0.1962  3084 5.6019  13530 24.5763       1104
##    PropCrRt Violent_C VlntCrRt PerCInc14 PPop14 Pov14 field_37 ChldPov14 NoHS14
## 1    0.0419       793   0.0408    215254  16884  6229  36.8929      1496   1520
## 2    0.0309       203   0.0314     54204   6445  2500  38.7898       719    717
## 3    0.0814       249   0.0979     32828   2533   927  36.5969       204    414
## 4    0.0392      1044   0.0463    293535  22272  7588  34.0697      2487   2260
## 5    0.0273       426   0.0234    261582  17997  4232  23.5150      1141   1141
## 6    0.0183       463   0.0117    461996  39013  5145  13.1879      1148   3065
## 7    0.0580      1052   0.0903     87459  11546  5550  48.0686      2526   1311
## 8    0.0252       378   0.0142    414505  24047  5323  22.1358       664    686
## 9    0.0435      1467   0.0576    186586  24179  9139  37.7973      3243   2443
## 10   0.0201      1022   0.0186    287055  52115 14173  27.1956      3401   5712
##    HSGrad14 SmClg14 ClgGrad14 LaborFrc Unemp14 Pov50  Pov50P Pov125 Pov125P
## 1      2314    3429      4703     8869    1620  1498  8.8723   7016 41.5541
## 2       718    1274      1084     3139     865   481  7.4631   3066 47.5718
## 3       592     492       230     1036     378   166  6.5535   1164 45.9534
## 4      3391    5035      3867    10756    2619  2164  9.7162   9349 41.9765
## 5      1589    2968      6861     9283    1304  1203  6.6844   5181 28.7881
## 6      3809    5771     17730    25794    2037  1504  3.8551   6846 17.5480
## 7      1682    2006      1100     4668    1388  1320 11.4325   6425 55.6470
## 8      1388    2734     12261    13566    1029  1450  6.0299   6330 26.3234
## 9      3271    5316      4016    10534    2356  2328  9.6282  11270 46.6107
## 10     6720    8009     15420    31100    2921  3138  6.0213  17629 33.8271
##    Pov150 Pov150P Pov185 Pov185P Pov200 Pov200P  COIave  HISave  SESave
## 1    7681 45.4928   8418 49.8579   8901 52.7186 -0.1271 42.7985 48.7474
## 2    3358 52.1024   3715 57.6416   3735 57.9519 -0.2499 51.4700 52.5017
## 3    1342 52.9807   1510 59.6131   1540 60.7975 -0.8429 56.4702 48.2353
## 4   10354 46.4889  11665 52.3752  12066 54.1756 -0.3428 41.7933 48.1779
## 5    5689 31.6108   6774 37.6396   7308 40.6068  0.2452 35.6855 58.4564
## 6    8350 21.4031  10136 25.9811  10600 27.1704  0.5169 26.7975 63.3062
## 7    7170 62.0994   8038 69.6172   8256 71.5053 -0.5520 53.9975 49.0668
## 8    6891 28.6564   7953 33.0727   8432 35.0647  0.3641 27.1333 60.8513
## 9   12882 53.2776  14440 59.7212  14852 61.4252 -0.1543 45.6919 49.8982
## 10  20569 39.4685  23774 45.6183  25890 49.6786  0.0026 38.1292 59.1711
##     Hlitave BirthRate FertRate LoBirthR PrenScrn PretBrth TeenBirth Assault
## 1  233.4417      10.3       42       11       76     10.2      34.2    13.6
## 2  250.0172      17.5       63       13       75     11.5      54.5    18.9
## 3  243.1150      11.9       60       17       71     14.3      69.2    49.6
## 4  230.7388      14.3       58       12       74     13.7      54.8    32.1
## 5  252.5250      12.2       51       11       77     11.9      25.7    15.2
## 6  243.4111      17.1       61        8       80      9.7      38.4     5.0
## 7  234.1639      19.3       72       17       74     16.9      82.6    44.6
## 8  230.3696       9.7       33        5       80      5.5       7.6     5.8
## 9  239.6065      15.1       60       17       73     16.3      51.1    31.1
## 10 243.5293      16.4       62       11       73     11.2      40.8     7.7
##    BrstCancr CancerAll Colorect DiabetM FirearmM InfntMR LungCancer ProstateC
## 1       34.3     269.9     33.2   119.1      9.1    13.4       74.5      85.5
## 2       20.6     159.7     14.5    88.7     12.6     8.2       54.5      54.2
## 3        8.5     258.9     21.1   111.7     22.6    22.6       89.6      70.5
## 4       22.6     218.3     27.7    82.6     25.8    12.1       63.8      39.0
## 5       30.9     196.4     34.2    45.5     17.4     8.9       49.1      46.2
## 6       21.7     153.2      8.6    55.4      6.1     3.8       43.1      27.6
## 7       26.0     258.0     31.9    88.2     39.5    19.3       79.8      67.0
## 8       33.6     144.0     11.7    34.0      5.0    10.4       34.9      24.1
## 9       32.9     241.3     23.9    82.1     25.0    11.5       69.8      58.0
## 10      23.3     176.9     25.3    77.1      5.2     6.4       36.7      21.7
##    Stroke ChlBLLS ChlLeadP GonorrF GonorrM Tuberc
## 1    62.1   482.2        0  1063.3   727.4    4.2
## 2    43.7   435.4        0  1655.4  1629.3    6.7
## 3    82.4   489.9        2  1061.9  1556.4    0.0
## 4    46.7   590.4        1  1454.6  1680.0   13.2
## 5    31.5   397.9        0   610.2   549.1    0.0
## 6    36.9   273.3        0    98.8   195.5    8.5
## 7    43.3   502.4        0  2145.8  2058.0    5.0
## 8    26.0   369.0        0   216.6   168.4    5.3
## 9    60.0   444.3        1  1382.0  1818.6   17.4
## 10   33.7   364.7        0   322.5   423.3   11.4
##                          geometry  perc_pov
## 1  MULTIPOLYGON (((-87.60914 4... 0.3205867
## 2  MULTIPOLYGON (((-87.59215 4... 0.3862197
## 3  MULTIPOLYGON (((-87.6288 41... 0.3645301
## 4  MULTIPOLYGON (((-87.60671 4... 0.3367804
## 5  MULTIPOLYGON (((-87.59215 4... 0.2323105
## 6  MULTIPOLYGON (((-87.67441 4... 0.1300984
## 7  MULTIPOLYGON (((-87.60604 4... 0.4765176
## 8  MULTIPOLYGON (((-87.58038 4... 0.1993260
## 9  MULTIPOLYGON (((-87.57714 4... 0.3590822
## 10 MULTIPOLYGON (((-87.65456 4... 0.2574428
##############################################################
#How do Health Indicators Predict Poverty
health_model <- chic1 %>% 
  select(c(perc_pov, CancerAll, DiabetM, InfntMR, LoBirthR, ComAreaID, shape_area, shape_len)) %>% 
  filter(!perc_pov %in% "NA") %>% 
  mutate(perc_pov = as.numeric(perc_pov))

summary(lm4 <- lm(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, data = health_model))
## 
## Call:
## lm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, 
##     data = health_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18722 -0.04413  0.00574  0.04517  0.26996 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.0196693  0.0478056   0.411  0.68197   
## CancerAll   -0.0007147  0.0004023  -1.776  0.07990 . 
## DiabetM      0.0023227  0.0006903   3.365  0.00123 **
## InfntMR      0.0076688  0.0035099   2.185  0.03215 * 
## LoBirthR     0.0124491  0.0039317   3.166  0.00226 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08418 on 72 degrees of freedom
## Multiple R-squared:  0.5547, Adjusted R-squared:  0.5299 
## F-statistic: 22.42 on 4 and 72 DF,  p-value: 4.728e-12
#Testing for spatial autocorrelation in the residuals
list <- health_model %>% 
  poly2nb(st_geometry(health_model)) %>% 
  nb2listw(zero.policy = TRUE) 

#run the Moran's I test for regression residuals
lm.morantest(lm4, list)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR +
## LoBirthR, data = health_model)
## weights: list
## 
## Moran I statistic standard deviate = 2.6787, p-value = 0.003696
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.165344909     -0.023103101      0.004949233
#From this output, we see that the Moran I p-value indicates statistically significant spatial autocorrelation because the value is below 0.05. This provides us strong evidence AGAINST the null hypothesis of NO spatial autocorrelation 

#The observed Moran I is a positive number which indicates a positive spatial autocorrelation. Overall, we can conclude that there is statistically significant positive spatial autocorrelation in the residuals. This means that nearby observations are likely to have similar residual values. 
# Spatial Error Model to Control for the Spatial Dependency 
lm_health_error <- errorsarlm(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, 
                              data = health_model,
              listw = list,
              zero.policy = TRUE, 
              na.action = na.omit)

summary(lm_health_error)
## 
## Call:errorsarlm(formula = perc_pov ~ CancerAll + DiabetM + InfntMR + 
##     LoBirthR, data = health_model, listw = list, na.action = na.omit, 
##     zero.policy = TRUE)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1696996 -0.0433084  0.0053114  0.0424771  0.2845409 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  0.04977972  0.05041855  0.9873 0.3234811
## CancerAll   -0.00064657  0.00037894 -1.7063 0.0879581
## DiabetM      0.00224488  0.00065166  3.4449 0.0005714
## InfntMR      0.00571237  0.00320467  1.7825 0.0746655
## LoBirthR     0.01002130  0.00347658  2.8825 0.0039451
## 
## Lambda: 0.43128, LR test value: 5.812, p-value: 0.015917
## Asymptotic standard error: 0.13553
##     z-value: 3.1821, p-value: 0.0014621
## Wald statistic: 10.126, p-value: 0.0014621
## 
## Log likelihood: 86.79523 for error model
## ML residual variance (sigma squared): 0.0058883, (sigma: 0.076735)
## Number of observations: 77 
## Number of parameters estimated: 7 
## AIC: -159.59, (AIC for lm: -155.78)
# We can see that the lamda value is 0.43 which is statistically significant because it is larger than 0.05. This tells us there is spatial autocorrelation within the residuals of our models.

# The AIC value is -159.59 compared to the AIC for the linear model of -155.78. Because the error model's AIC is low, we can suggest a small improvement in the model. 
seResiduals4 <- rep(0, length(health_model$perc_pov))
resIndex4 <- lm_health_error$residuals %>% names() %>% as.integer();
seResiduals4[resIndex4] <- lm_health_error$residuals

#perform the Moran Test
list %>%
  moran.test(seResiduals4, ., zero.policy = TRUE) 
## 
##  Moran I test under randomisation
## 
## data:  seResiduals4  
## weights: .    
## 
## Moran I statistic standard deviate = 0.17123, p-value = 0.432
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0009837955     -0.0131578947      0.0050549195
# Because our p-value is above 0.05, we can see the Moran test is insignificant which tells us that we have controlled for the spatial variation in the error terms 
bwVal2 <-GWmodel::bw.gwr(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, 
                              data = health_model %>% sf::as_Spatial(), 
                         approach = 'AICc', kernel = 'bisquare', 
                         adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 55 AICc value: -161.5842 
## Adaptive bandwidth (number of nearest neighbours): 42 AICc value: -165.8242 
## Adaptive bandwidth (number of nearest neighbours): 33 AICc value: -162.85 
## Adaptive bandwidth (number of nearest neighbours): 46 AICc value: -164.5742 
## Adaptive bandwidth (number of nearest neighbours): 37 AICc value: -164.6257 
## Adaptive bandwidth (number of nearest neighbours): 42 AICc value: -165.8242
 bwVal2
## [1] 42
lm.gwr2 <- gwr.basic(perc_pov ~ CancerAll + DiabetM + InfntMR + LoBirthR, 
                              data = health_model %>% sf::as_Spatial(),
                     bw = bwVal2, kernel = "bisquare", adaptive = TRUE)
print(lm.gwr2)
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2024-04-25 18:00:21.843685 
##    Call:
##    gwr.basic(formula = perc_pov ~ CancerAll + DiabetM + InfntMR + 
##     LoBirthR, data = health_model %>% sf::as_Spatial(), bw = bwVal2, 
##     kernel = "bisquare", adaptive = TRUE)
## 
##    Dependent (y) variable:  perc_pov
##    Independent variables:  CancerAll DiabetM InfntMR LoBirthR
##    Number of data points: 77
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18722 -0.04413  0.00574  0.04517  0.26996 
## 
##    Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
##    (Intercept)  0.0196693  0.0478056   0.411  0.68197   
##    CancerAll   -0.0007147  0.0004023  -1.776  0.07990 . 
##    DiabetM      0.0023227  0.0006903   3.365  0.00123 **
##    InfntMR      0.0076688  0.0035099   2.185  0.03215 * 
##    LoBirthR     0.0124491  0.0039317   3.166  0.00226 **
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 0.08418 on 72 degrees of freedom
##    Multiple R-squared: 0.5547
##    Adjusted R-squared: 0.5299 
##    F-statistic: 22.42 on 4 and 72 DF,  p-value: 4.728e-12 
##    ***Extra Diagnostic information
##    Residual sum of squares: 0.5101695
##    Sigma(hat): 0.08247581
##    AIC:  -155.7784
##    AICc:  -154.5784
##    BIC:  -192.6528
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 42 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                     Min.     1st Qu.      Median     3rd Qu.   Max.
##    Intercept -0.26026907 -0.01077319  0.03276930  0.08130904 0.1820
##    CancerAll -0.00256234 -0.00097222 -0.00068624 -0.00021006 0.0004
##    DiabetM   -0.00032220  0.00096532  0.00163113  0.00361353 0.0097
##    InfntMR   -0.00198885  0.00187100  0.00767581  0.01667062 0.0225
##    LoBirthR   0.00171588  0.00703163  0.00937281  0.01557529 0.0259
##    ************************Diagnostic information*************************
##    Number of data points: 77 
##    Effective number of parameters (2trace(S) - trace(S'S)): 23.68848 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 53.31152 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -165.8242 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -200.8561 
##    BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): -215.519 
##    Residual sum of squares: 0.2606169 
##    R-square value:  0.7724975 
##    Adjusted R-square value:  0.6694765 
## 
##    ***********************************************************************
##    Program stops at: 2024-04-25 18:00:22.208215
health_model$gwr_exp_coeff <- lm.gwr2$SDF$DiabetM
model_one <- as(health_model, Class = "Spatial")
plot_one <- spplot(model_one, "gwr_exp_coeff")
plot_one

health_model$gwr_exp_coeff <- lm.gwr2$SDF$LoBirthR
model_one <- as(health_model, Class = "Spatial")
plot_two <- spplot(model_one, "gwr_exp_coeff")
plot_two

library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.2
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
plot_grid(plot_one, plot_two,
          labels = c('Diabetes-related deaths per 100,000 persons', 'Infant mortality rate: deaths per 1000 live births'),
          hjust = -0.1, vjust = 4,label_size = 10)