R Markdown

#Q1. Plot at least two high-quality maps with one using the COVID-19 data and one using a related factor
# You can use either plot method for sf or ggplot method

#read in ACS Pop Data by ZIP
ACS_Pop_ZIP <- st_read('acsPopByZIP.shp')
## Reading layer `acsPopByZip' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module13\R-Spatial_III_Lab\acsPopByZip.shp' using driver `ESRI Shapefile'
## Simple feature collection with 180 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067113 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
crs(ACS_Pop_ZIP)
## CRS arguments:
##  +proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333
## +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft
## +no_defs
plot(ACS_Pop_ZIP)
## Warning: plotting the first 9 out of 13 attributes; use max.plot = 13 to plot
## all

sf::st_transform(ACS_Pop_ZIP, sf::st_crs(2263))-> ACS_Pop_Local
st_crs(ACS_Pop_Local)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
histogram(ACS_Pop_Local$Positiv)

pal <- brewer.pal(7,"OrRd")

plot(ACS_Pop_Local['Positiv'],
     main = 'Number of Positive Cases Per ZIP',
     breaks = "quantile",nbreaks = 7,
     pal = pal,
     border = NA,
     graticule = st_crs(2263),
     reset=FALSE)

ggplot(ACS_Pop_Local)+
  geom_sf(aes(fill=Positiv))

#use classInt package to determine breaks
#add 0.00001 offset to catch the lowest value

breaks_qt <- classIntervals(c(min(ACS_Pop_Local$Positiv)-0.00001,
                              ACS_Pop_Local$Positiv),n=7,style="quantile")
breaks_qt
## style: quantile
##   one of 28,530,983,404 possible partitions of this variable into 7 classes
##      [13.99999,256)      [256,377.8571) [377.8571,490.8571) [490.8571,709.4286) 
##                  25                  27                  26                  25 
## [709.4286,999.7143) [999.7143,1302.143)     [1302.143,2817] 
##                  26                  26                  26
breaks_qt$brks
## [1]   13.99999  256.00000  377.85714  490.85714  709.42857  999.71429 1302.14286
## [8] 2817.00000
ACS_Pop_Local <- mutate(ACS_Pop_Local, posi_cat = cut(Positiv,breaks_qt$brks))

ggplot(ACS_Pop_Local)+
  geom_sf(aes(fill=posi_cat))+
  scale_fill_brewer(palette="OrRd",name='Pos. Cases Per ZIP')+
  labs(x='longitude',y='latitude',
       title="New York City Positive Cases Per ZIP")

names(ACS_Pop_Local)
##  [1] "ZIPCODE"  "PO_NAME"  "POPULAT"  "COUNTY"   "Positiv"  "Total"   
##  [7] "totPop"   "malPctg"  "asianPp"  "blackPp"  "hspncPp"  "whitePp" 
## [13] "eldrlyP"  "geometry" "posi_cat"
#see max of elderlyP
max(ACS_Pop_Local$eldrlyP)
## [1] NA
#correct for NAs
ACS_Pop_Local$eldrlyP[is.na(ACS_Pop_Local$eldrlyP)]<-0
#check to make sure the replacement worked
ACS_Pop_Local$eldrlyP
##   [1]  3239 15770  6391     0   206    66   601  8602  4716  8176  3269  4285
##  [13]  4410  6613  3383   949  6587  9176  8987 13620 12836 16000  4085  6520
##  [25] 10191 10855  3150  5638  8579  8347  5712  3458  2866  2928  3820  2622
##  [37]  6053  1418  7651  1110  4636 10082   887   330  5721  1675  2474  5516
##  [49]  6247  8028  1801  4901  5436  3234 10781 14751  6198  6123  6445  3156
##  [61]  3814  7769  6825  6468  4786  6314  7287  8878 11209  1340  1290  7158
##  [73]  8899 10346  7164 10485  2544  4753  7508  8616   904  9464  2650  2735
##  [85]  3333  3935  3064  4932  5276     0  6671 13807 10483  3771  9131 10407
##  [97]  8977 11484  8904  6083  8554  7167 14059  7126  6610  3963  8236  9856
## [109]  9721  7999  3787 13461 11792  7674 11905  7803 14805 12823     0  2681
## [121]  8583 16223 16567 13690  3157  5339  3080  9839 14027  2569  9159  7082
## [133]  5091  5034  4678  1517  6654  6210  2252  6548  9375  4958     0  4197
## [145]  9517 12724  9131 13983 12297  4792  6452 11067  4414  5620  7348  6257
## [157]  3128  2517  3828  4170  6108  5639  5117  3709  4255  3398  3648  2758
## [169]  3442  8000  4554  9361  6820  2248  6856  1794     0     0  3616   766
#repeat for Positive Cases
ACS_Pop_Local$Positiv
##   [1]  260  712  347   24   44   14   40  518  201  394  116  179  233  502  107
##  [16]  168  441  431  250  433  472  875  428  654  371 1162  351  807  937  801
##  [31]  439  636  301  297  166  328  614  118  244   44  302  489   32   56  806
##  [46]  428  613 1001  883 1087  305  504  672  506 1204 1955 1059 1412 1474  719
##  [61]  795 1518 1286 1269  956 1041 1266 1529 1168 1168   95 1078 1650 2245 1591
##  [76] 2042  321  388 1239 1331  223 1225  397  423  379  362  316  344  417   41
##  [91]  447 1329 1325  475  975 1233 1383  772 1162 1399 1086  972 1016  432  484
## [106]  397 1163 1961  997  801  306 1134  657  814 1336  449 1064 1573  289  337
## [121]  802 1602 1293 1828  567  529  339  675  777  358  560  377  209  305  202
## [136]   76  330  562  298  694 2817 1055  998  998 1732 2196  792 1138 1404  486
## [151]  531 1325  486  859  917  588  366  412  535  675  720  863  760  679  653
## [166]  356  541  470  660 1077  612 1356 1009  342 1496  392  256  256  458   73
ACS_Pop_Local$eldrlyP[is.na(ACS_Pop_Local$Positiv)]<-0

#mutate to create ranking columns for elderly pop and positive cases
#mutate to create percent elderly as well, could be useful to see
ACS_Pop_Local<-mutate(ACS_Pop_Local,Pct_EldrlyP = round((eldrlyP/totPop)*100,digits=1))
ACS_Pop_Local<-mutate(ACS_Pop_Local,Rank_EldrlyP = round((eldrlyP/max(eldrlyP))*100,digits=2))
ACS_Pop_Local<-mutate(ACS_Pop_Local,Rank_Positive = round((Positiv/max(Positiv))*100,digits=2))
#add two ranked columns together and divide by 2 (for 2 variables) to create
#the final index score
ACS_Pop_Local<-mutate(ACS_Pop_Local,Rank_Final = round((Rank_EldrlyP+Rank_Positive)/2,digits=1))

ACS_Pop_Local<-ACS_Pop_Local%>%mutate(eldrlyP=as.integer(eldrlyP))

str(ACS_Pop_Local)
## Classes 'sf' and 'data.frame':   180 obs. of  19 variables:
##  $ ZIPCODE      : chr  "10001" "10002" "10003" "10004" ...
##  $ PO_NAME      : chr  "New York" "New York" "New York" "New York" ...
##  $ POPULAT      : num  22413 81305 55878 2187 8107 ...
##  $ COUNTY       : chr  "New York" "New York" "New York" "New York" ...
##  $ Positiv      : int  260 712 347 24 44 14 40 518 201 394 ...
##  $ Total        : int  571 1358 830 64 137 54 130 1180 561 852 ...
##  $ totPop       : num  25645 76815 54181 NA 8809 ...
##  $ malPctg      : num  51.9 48.2 50.3 NA 42.8 ...
##  $ asianPp      : num  6788 32502 8027 NA 1974 ...
##  $ blackPp      : num  1897 7882 3443 NA 291 ...
##  $ hspncPp      : num  3679 21621 4375 NA 504 ...
##  $ whitePp      : num  16725 27156 42259 NA 6603 ...
##  $ eldrlyP      : int  3239 15770 6391 0 206 66 601 8602 4716 8176 ...
##  $ geometry     :sfc_MULTIPOLYGON of length 180; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:110, 1:2] 981959 981980 981988 981993 982052 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ posi_cat     : Factor w/ 7 levels "(14,256]","(256,378]",..: 2 5 2 1 1 1 1 4 1 3 ...
##  $ Pct_EldrlyP  : num  12.6 20.5 11.8 NA 2.3 2.8 6.6 15.3 14.2 15.9 ...
##  $ Rank_EldrlyP : num  19.55 95.19 38.58 0 1.24 ...
##  $ Rank_Positive: num  9.23 25.28 12.32 0.85 1.56 ...
##  $ Rank_Final   : num  14.4 60.2 25.4 0.4 1.4 0.4 2.5 35.2 17.8 31.7 ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:18] "ZIPCODE" "PO_NAME" "POPULAT" "COUNTY" ...
#set up breaks for all new variables
#breaks elderly pop
breaks_qt_eldrly <- classIntervals(c(min(ACS_Pop_Local$eldrlyP), #-0.00001,
                                     ACS_Pop_Local$eldrlyP),n=7,style="quantile")
#breaks elderly percentage
breaks_qt_eldPct <- classIntervals(c(min(ACS_Pop_Local$Pct_EldrlyP), #-0.00001,
                                     ACS_Pop_Local$Pct_EldrlyP),n=7,style="quantile")
## Warning in classIntervals(c(min(ACS_Pop_Local$Pct_EldrlyP),
## ACS_Pop_Local$Pct_EldrlyP), : var has missing values, omitted in finding classes
#breaks final ranking (quantiles)
breaks_qt_finRank <- classIntervals(c(min(ACS_Pop_Local$Rank_Final), #-0.00001,
                                      ACS_Pop_Local$Rank_Final),n=7,style="quantile")
  
#breaks final ranking (jenks)
breaks_jenks_finRank <- classIntervals(c(min(ACS_Pop_Local$Rank_Final), #-0.00001,
                                      ACS_Pop_Local$Rank_Final),n=7,style="jenks")

#set up category columns for all new variables
ACS_Pop_Local <- mutate(ACS_Pop_Local, eld_cat = cut(eldrlyP,breaks_qt_eldrly$brks))
ACS_Pop_Local<- mutate(ACS_Pop_Local,eldPct_cat = cut(Pct_EldrlyP,breaks_qt_eldPct$brks))
ACS_Pop_Local<- mutate(ACS_Pop_Local,finRank_cat_qt = cut(Rank_Final,breaks_qt_finRank$brks))
ACS_Pop_Local<- mutate(ACS_Pop_Local,finRank_cat_jenks = cut(Rank_Final,breaks_jenks_finRank$brks))

breaks_qt_eldrly$brks
## [1]     0.000  2504.714  3674.143  5042.143  6544.000  8299.429 10428.714
## [8] 16567.000
ggplot(ACS_Pop_Local)+
  geom_sf(aes(fill=finRank_cat_qt))+
  labs(x='longitude',y='latitude',title="Index Map Ranking by ZIP (Quantile Method)")+
  scale_fill_brewer(palette="OrRd",name='Index Map Ranking Score')

#now show ggplot for natural jenks method with labels
#and ggrepel to disperse labels

require(ggrepel);
labelCoords <- ACS_Pop_Local %>% sf::st_centroid() %>%
  dplyr::filter(Rank_Final>70) %>%
  sf::st_coordinates();
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
labelData <- ACS_Pop_Local%>% sf::st_centroid()%>%
  dplyr::filter(Rank_Final>70)%>%
  dplyr::mutate(x=labelCoords[,1],y=labelCoords[,2])
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
ggplot(ACS_Pop_Local)+
  geom_sf(aes(fill=finRank_cat_jenks))+
  geom_label_repel(data=labelData,
                aes(x=x, y=y, label=Rank_Final),
                label.size=0.9,
                size=3,
                segment.color=rgb(0.5,0.5,0.9),
                segment.size=0.8)+
  labs(x='longitude',y='latitude',title="Index Map Ranking by ZIP (Natural Jenks Method)")+
  scale_fill_brewer(palette="OrRd",name='Index Map Ranking Score')

#using sample code data
ACS_Pop_Local %>% sf::st_buffer(30000) %>% sf::st_transform(4326) %>% st_bbox() %>% as.vector() %>%
  ggmap::get_stamenmap(zoom = 10, maptype = 'terrain-background') -> baseMap 
## Source : http://tile.stamen.com/terrain-background/10/300/383.png
## Source : http://tile.stamen.com/terrain-background/10/301/383.png
## Source : http://tile.stamen.com/terrain-background/10/302/383.png
## Source : http://tile.stamen.com/terrain-background/10/300/384.png
## Source : http://tile.stamen.com/terrain-background/10/301/384.png
## Source : http://tile.stamen.com/terrain-background/10/302/384.png
## Source : http://tile.stamen.com/terrain-background/10/300/385.png
## Source : http://tile.stamen.com/terrain-background/10/301/385.png
## Source : http://tile.stamen.com/terrain-background/10/302/385.png
## Source : http://tile.stamen.com/terrain-background/10/300/386.png
## Source : http://tile.stamen.com/terrain-background/10/301/386.png
## Source : http://tile.stamen.com/terrain-background/10/302/386.png
colPal <- RColorBrewer::brewer.pal(8,"OrRd")
colPalBl <- RColorBrewer::brewer.pal(10,"RdBu")

#?RColorBrewer
#check basemap
ggmap(baseMap)

#plotting number of positive cases, want to reverse the color palette
#so that high values, aka "hot" values are red
plot(ACS_Pop_Local%>%sf::st_transform(3857)%>%magrittr::extract("Positiv"),
     main = "Number of Positive COVID-19 Cases",
     nbreaks = 10,
     breaks = 'quantile',
     pal = rev(colPalBl),
     border = 'grey',
     graticulte = st_crs(4326),
     axes = TRUE,
     logz = TRUE,
     bgMap = baseMap)
## Warning in axis(side = side, ...): "graticulte" is not a graphical parameter
## Warning in axis(side = side, ...): "graticulte" is not a graphical parameter
## Warning in title(...): "graticulte" is not a graphical parameter

## Warning in title(...): "graticulte" is not a graphical parameter

# Facet
summary(ACS_Pop_Local$Rank_Final)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.40   17.62   29.05   32.11   44.95   79.20
breaks_rank <- classIntervals(ACS_Pop_Local$Rank_Final, n = 3, style = "quantile")
breaks_rank$brks[1] %<>% magrittr::subtract(0.0001)
ACS_Pop_Local %<>% mutate(Rank_Level = cut(Rank_Final, 
                                              breaks_rank$brks, 
                                              labels = c('Low','Medium','High')))

#names(ACS_Pop_Local)
ggplot(ACS_Pop_Local) + 
  geom_sf(aes(fill=finRank_cat_qt)) +
  #geom_sf(data=philly_crimes_pt, colour=ptColor, size=0.1) +
  scale_fill_brewer(palette = "OrRd", name='Index Ranking') +
  labs(title='Index Ranking for COVID Cases & Elderly Pop',
       caption = 'Data Source: [...]') +
  facet_wrap(~Rank_Level)+
  theme(axis.text.x = element_blank(), 
        axis.text.y = element_blank(),
        legend.position = 'bottom');

Including Plots

Task two will include: -Use ggplot and two other ggplot-compatible packages to create a multi-map figure illustrating the possible relationship between # of food stores, neighborhood racial composition, elderly pop, etc. -These maps should be side by side on one page. Add graticule to at least one of those maps and label some of the features on the map where applicable and appropriate. -The below code illustrates how I layered on top the number of nursing homes (dots sized by number of nursing homes per ZIP) in order to show the relationship between high scoring index ZIPs and potentially more nursing homes per ZIP. Again, the index is based on the number of positive cases and the number of elderly people in each ZIP code.

NYS_Health <- read_csv("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/NYS_Health_Facility.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   `Facility ID` = col_double(),
##   `Facility Phone Number` = col_double(),
##   `Facility Fax Number` = col_double(),
##   `Facility County Code` = col_double(),
##   `Regional Office ID` = col_double(),
##   `Main Site Facility ID` = col_double(),
##   `Facility Latitude` = col_double(),
##   `Facility Longitude` = col_double()
## )
## i Use `spec()` for the full column specifications.
names(NYS_Health)
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"
#sf::st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/nycFoodStore.shp") -> nycFoodStoreSF
#st_crs(nycFoodStoreSF) #ESPG 4326 (aka WGS 1984)

#want to reproject food store CRS into the same as the ZIPs for a more local projects
#sf::st_transform(nycFoodStoreSF, sf::st_crs(2263)) -> nycFoodStoreLocal
#st_crs(nycFoodStoreLocal)

#join ZIPs (that also have covid data) to the food stores
#join the zips that contain food stores, group by the ZIP and by the establishment type

#clean data
NYS_Health<-NYS_Health%>%filter(NYS_Health$`Facility Latitude`>0)

NYS_Health%>%filter(!is.na(NYS_Health$`Facility Latitude`))
## # A tibble: 3,843 x 36
##    `Facility ID` `Facility Name`  `Short Descripti~ Description `Facility Open ~
##            <dbl> <chr>            <chr>             <chr>       <chr>           
##  1           204 Hospice at Lour~ HSPC              Hospice     06/01/1985      
##  2           620 Charles T Sitri~ NH                Residentia~ 02/01/1989      
##  3          1156 East Side Nursi~ NH                Residentia~ 08/01/1979      
##  4          2589 Wellsville Mano~ NH                Residentia~ 02/01/1989      
##  5          3455 Harris Hill Nur~ NH                Residentia~ 04/08/1992      
##  6          3853 Garden City Sur~ DTC               Diagnostic~ 04/07/2008      
##  7          4249 Willcare         CHHA              Certified ~ 05/15/1990      
##  8          4473 Good Shepherd H~ HSPC              Hospice     09/01/2002      
##  9          6230 NYU Langone Rut~ HOSP-EC           Hospital E~ 01/01/2006      
## 10          6482 Endoscopy Cente~ DTC               Diagnostic~ 01/20/2003      
## # ... with 3,833 more rows, and 31 more variables: Facility Address 1 <chr>,
## #   Facility Address 2 <chr>, Facility City <chr>, Facility State <chr>,
## #   Facility Zip Code <chr>, Facility Phone Number <dbl>,
## #   Facility Fax Number <dbl>, Facility Website <chr>,
## #   Facility County Code <dbl>, Facility County <chr>,
## #   Regional Office ID <dbl>, Regional Office <chr>, Main Site Name <chr>,
## #   Main Site Facility ID <dbl>, Operating Certificate Number <chr>,
## #   Operator Name <chr>, Operator Address 1 <chr>, Operator Address 2 <chr>,
## #   Operator City <chr>, Operator State <chr>, Operator Zip Code <chr>,
## #   Cooperator Name <chr>, Cooperator Address <chr>,
## #   Cooperator Address 2 <chr>, Cooperator City <chr>, Cooperator State <chr>,
## #   Cooperator Zip Code <chr>, Ownership Type <chr>, Facility Latitude <dbl>,
## #   Facility Longitude <dbl>, Facility Location <chr>
NYC_Health<-NYS_Health%>%
  filter(NYS_Health$`Facility State`=='New York' & NYS_Health$`Facility County`=='Bronx'|NYS_Health$`Facility County`=='Kings' |NYS_Health$`Facility County`=='New York'| NYS_Health$`Facility County`=='Richmond'| NYS_Health$`Facility County`=='Queens')


unique(NYC_Health$`Facility State`)
## [1] "New York"
names(NYC_Health)
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"
NYC_Nursing<-NYC_Health%>%
  filter(NYC_Health$`Short Description`=='NH')%>%
  mutate(lat = as.numeric(`Facility Latitude`),long = as.numeric(`Facility Longitude`),
         ZipCode = as.numeric(`Facility Zip Code`))

NYC_Nursing_sf<-st_as_sf(NYC_Nursing, coords = c("long","lat"))
class(NYC_Nursing_sf)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
st_crs(NYC_Nursing_sf)<-4326
st_crs(NYC_Nursing_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
sf::st_transform(NYC_Nursing_sf, sf::st_crs(2263))-> Nursing_Local


#join Local Nursing data to ZIPs
NYC_Nursing_Join <- sf::st_join(Nursing_Local,ACS_Pop_Local, join = st_within)

#filter out any bad data for nursing
NYC_Nursing_Join<- NYC_Nursing_Join%>%filter(NYC_Nursing_Join$`Facility Longitude`>-75)

#create number of nursing homes per ZIP
NYC_Nursing_Join <-NYC_Nursing_Join %>%
  group_by(ZipCode) %>%
  summarise(Num_NS = n())

#create object to hold the plot
NYC_Nursing_Join_plt <-NYC_Nursing_Join%>%magrittr::extract('Num_NS') %>%
  plot(breaks = "jenks", main = 'Number of Health Facilities')

plot(NYC_Nursing_Join['Num_NS'])

#want to plot number of nursing homes, colorized by # on top of the final 
#ranking thematic map

labelCoords <- ACS_Pop_Local %>% sf::st_centroid() %>%
  dplyr::filter(Rank_Final>70) %>%
  sf::st_coordinates();
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
labelData <- ACS_Pop_Local%>% sf::st_centroid()%>%
  dplyr::filter(Rank_Final>70)%>%
  dplyr::mutate(x=labelCoords[,1],y=labelCoords[,2])
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
#plot1<-
ptColor <- rgb(0.1,0.9,0.1,0.4);
ggplot(ACS_Pop_Local) + 
  geom_sf(aes(fill=finRank_cat_qt)) +
  geom_sf(data=NYC_Nursing_Join, colour=ptColor, size=(NYC_Nursing_Join$Num_NS/2)) +
  geom_label_repel(data = labelData,
                   aes(x=x,y=y,label = Rank_Final),
                   label.size = .09,
                   size = 3,
                   segment.color = rgb(0.5,0.5,0.9),
                   segment.size = 0.8) +
  #graticulte = st_crs(4326)+
  scale_fill_brewer(palette = "OrRd", name='Index Rank') +
  labs(x='Longitude', y='Latitude', 
       title='Final Index Map with Labels & Sized Dots = # Nursing Homes Per ZIP');

#labelCoords2 <- ACS_Pop_Local %>% sf::st_centroid() %>%
#  dplyr::filter(Positiv>1000) %>%
#  sf::st_coordinates();

#labelData2 <- ACS_Pop_Local%>% sf::st_centroid()%>%
#  dplyr::filter(Positiv>1000)%>%
#  dplyr::mutate(x=labelCoords[,1],y=labelCoords[,2])


#NEED TO MAKE THE 2ND PLOT AGGREGATED TO NURSING HOMES
plot1<-ggplot(ACS_Pop_Local) + 
  geom_sf(aes(fill=posi_cat)) +
  #geom_sf(data=NYC_Nursing_PerZIP_sf, size = 0.1) +#colour=ptColor, size=0.1) +
  #geom_label_repel(data = labelData2,
  #                 aes(x=x,y=y,label = Rank_Final),
  #                 label.size = .09,
  #                 size = 3,
  #                 segment.color = rgb(0.5,0.5,0.9),
  #                 segment.size = 0.8) +
  scale_fill_brewer(palette = "OrRd", name='Positive Cases') +
  labs(x='Longitude', y='Latitude', 
       title='Positive Covid Cases');


plot2<-ggplot(ACS_Pop_Local) + 
  geom_sf(aes(fill=posi_cat)) +
  #geom_sf(data=NYC_Nursing_PerZIP_sf, size = 0.1) +#colour=ptColor, size=0.1) +
  #geom_label_repel(data = labelData2,
  #                 aes(x=x,y=y,label = Rank_Final),
  #                 label.size = .09,
  #                 size = 3,
  #                 segment.color = rgb(0.5,0.5,0.9),
  #                 segment.size = 0.8) +
  scale_fill_brewer(palette = "OrRd", name='Positive Cases') +
  labs(x='Longitude', y='Latitude', 
       title='Positive Covid Cases');



ggarrange(plot1, plot2, nrow = 2, ncol = 1) %>%
  ggexport(filename = file.path(getwd(), '/sf_plot_ggarrange.pdf'),
           width=6.50, height=8.00, # the unit is inch
           pointsize = 9)
## file saved to D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module13/R-Spatial_III_Lab//sf_plot_ggarrange.pdf

Task 3 -Create a web-based interactive map for COIVD-19 data using tmap or leaflet package and save it as a HTML file.