##Add packages

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(RColorBrewer)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
library(knitr)

##Import the raw data of Asian Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")

raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      asian = "B02001_005"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
asian <- raw_ancestry |> 
  mutate(pct_asian = asianE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

asian_map <- ggplot()  + 
  geom_sf(data = asian, 
          mapping = aes(fill = pct_asian,
                        text = paste0(NAME, ":",
                                      "<br>Percent Asian ancestry: ",
                                      percent(pct_asian, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",
                       # na.value = "transparent",
                       name="Percent Asian Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Asian Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ) + 
  geom_sf(data = boros |> filter(boro_name == "Brooklyn"), 
          color = "White", fill = NA, lwd = .5)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text

##Asian ancestry living in NYC.

ggplotly(asian_map, tooltip = "text")

##st_crs(asian)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\raw\geo\nynta2020_23c\nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
st_crs(nabes)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
asian_2263 <- st_transform(asian, 2263)
st_crs(asian_2263)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

asian_nabes <- asian_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

###Asian ancestry statistic in each borough

asian_nabe_stats <- st_drop_geometry(asian_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total Asian Population` = sum(asianM)) |> 
  mutate(`Est. Percent Asian Ancestry` = percent(`Est. Total Asian Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of Hispanic Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")
## Reading layer `Borough Boundaries' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\processed\geo\Borough Boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      hispanic = "B03001_003"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
hispanic <- raw_ancestry |> 
  mutate(pct_hispanic = hispanicE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

hispanic_map <- ggplot()  + 
  geom_sf(data = hispanic, 
          mapping = aes(fill = pct_hispanic,
                        text = paste0(NAME, ":",
                                      "<br>Percent Hispanic ancestry: ",
                                      percent(pct_hispanic, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "Oranges",
                       na.value = "#Greens",
                       # na.value = "transparent",
                       name="Percent Hispanic Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Hispanic Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ) + 
  geom_sf(data = boros |> filter(boro_name == "Brooklyn"), 
          color = "White", fill = NA, lwd = .5)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(hispanic_map, tooltip = "text")

##st_crs(hispanic)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\raw\geo\nynta2020_23c\nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
st_crs(nabes)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
hispanic_2263 <- st_transform(hispanic, 2263)
st_crs(hispanic_2263)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

hispanic_nabes <- hispanic_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
hispanic_nabe_stats <- st_drop_geometry(hispanic_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total Hispanic Population` = sum(hispanicM)) |> 
  mutate(`Est. Percent Hispanic Ancestry` = percent(`Est. Total Hispanic Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of Black Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")
## Reading layer `Borough Boundaries' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\processed\geo\Borough Boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      black = "B02001_003"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
black <- raw_ancestry |> 
  mutate(pct_black = blackE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

black_map <- ggplot()  + 
  geom_sf(data = black, 
          mapping = aes(fill = pct_black,
                        text = paste0(NAME, ":",
                                      "<br>Percent Black ancestry: ",
                                      percent(pct_black, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "PuRd",
                       na.value = "#YlOrRd",
                       # na.value = "transparent",
                       name="Percent Black Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Black Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ) + 
  geom_sf(data = boros |> filter(boro_name == "Brooklyn"), 
          color = "White", fill = NA, lwd = .5)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(black_map, tooltip = "text")

##st_crs(black)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\raw\geo\nynta2020_23c\nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
st_crs(nabes)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
black_2263 <- st_transform(black, 2263)
st_crs(black_2263)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

black_nabes <- black_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

###Black ancestry statistic in each borough

black_nabe_stats <- st_drop_geometry(black_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total black Population` = sum(blackM)) |> 
  mutate(`Est. Percent black Ancestry` = percent(`Est. Total black Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of White Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")
## Reading layer `Borough Boundaries' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\processed\geo\Borough Boundaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      white = "B02001_002"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
white <- raw_ancestry |> 
  mutate(pct_white = whiteE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

white_map <- ggplot()  + 
  geom_sf(data = white, 
          mapping = aes(fill = pct_white,
                        text = paste0(NAME, ":",
                                      "<br>Percent White ancestry: ",
                                      percent(pct_white, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "BuPu",
                       na.value = "#Set3",
                       # na.value = "transparent",
                       name="Percent White Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, White Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ) + 
  geom_sf(data = boros |> filter(boro_name == "Brooklyn"), 
          color = "White", fill = NA, lwd = .5)
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(white_map, tooltip = "text")

##st_crs(white)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
## Reading layer `nynta2020' from data source 
##   `D:\Parson Schoool Of Design\Design Method\Lesson\Week1\Part2\data\raw\geo\nynta2020_23c\nynta2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 262 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
st_crs(nabes)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
white_2263 <- st_transform(white, 2263)
st_crs(white_2263)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

white_nabes <- white_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

###White ancestry statistic in each borough

white_nabe_stats <- st_drop_geometry(white_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total white Population` = sum(whiteM)) |> 
  mutate(`Est. Percent white Ancestry` = percent(`Est. Total white Population`/`Est. Total Population`, accuracy = 1))