##Add packages

library(tidyverse)
library(tidycensus)
library(sf)
library(scales)
library(RColorBrewer)
library(plotly)
library(knitr)

##Import the raw data of Asian Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")

raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      asian = "B02001_005"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
asian <- raw_ancestry |> 
  mutate(pct_asian = asianE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

asian_map <- suppressWarnings( ggplot()  + 
  geom_sf(data = asian, 
          mapping = aes(fill = pct_asian,
                        text = paste0(NAME, ":",
                                      "<br>Percent Asian ancestry: ",
                                      percent(pct_asian, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",
                       # na.value = "transparent",
                       name="Percent Asian Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Asian Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ))

##Asian ancestry living in NYC.

ggplotly(asian_map, tooltip = "text")

##st_crs(asian)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
st_crs(nabes)

asian_2263 <- st_transform(asian, 2263)
st_crs(asian_2263)

#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

asian_nabes <- asian_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)

###Asian ancestry statistic in each borough

asian_nabe_stats <- st_drop_geometry(asian_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total Asian Population` = sum(asianM)) |> 
  mutate(`Est. Percent Asian Ancestry` = percent(`Est. Total Asian Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of Hispanic Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")

raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      hispanic = "B03001_003"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 

hispanic <- raw_ancestry |> 
  mutate(pct_hispanic = hispanicE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

hispanic_map <- ggplot()  + 
  geom_sf(data = hispanic, 
          mapping = aes(fill = pct_hispanic,
                        text = paste0(NAME, ":",
                                      "<br>Percent Hispanic ancestry: ",
                                      percent(pct_hispanic, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "Oranges",
                       na.value = "#Greens",
                       # na.value = "transparent",
                       name="Percent Hispanic Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Hispanic Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  )

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(hispanic_map, tooltip = "text")

##st_crs(hispanic)to print in console the projections of data frame

import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
st_crs(nabes)

hispanic_2263 <- st_transform(hispanic, 2263)
st_crs(hispanic_2263)

#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

hispanic_nabes <- hispanic_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)
hispanic_nabe_stats <- st_drop_geometry(hispanic_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total Hispanic Population` = sum(hispanicM)) |> 
  mutate(`Est. Percent Hispanic Ancestry` = percent(`Est. Total Hispanic Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of Black Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")

raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      black = "B02001_003"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 

black <- raw_ancestry |> 
  mutate(pct_black = blackE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

black_map <- ggplot()  + 
  geom_sf(data = black, 
          mapping = aes(fill = pct_black,
                        text = paste0(NAME, ":",
                                      "<br>Percent Black ancestry: ",
                                      percent(pct_black, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "PuRd",
                       na.value = "#YlOrRd",
                       # na.value = "transparent",
                       name="Percent Black Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, Black Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  )

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(black_map, tooltip = "text")

##st_crs(black)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
st_crs(nabes)

black_2263 <- st_transform(black, 2263)
st_crs(black_2263)

#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

black_nabes <- black_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)

###Black ancestry statistic in each borough

black_nabe_stats <- st_drop_geometry(black_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total black Population` = sum(blackM)) |> 
  mutate(`Est. Percent black Ancestry` = percent(`Est. Total black Population`/`Est. Total Population`, accuracy = 1)) 

##Import the raw data of White Ancestry from the American Community Survey

#### import borough shapefiles from NYC Open Data
boros <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/processed/geo/Borough Boundaries.geojson")

raw_ancestry <- get_acs(geography = "tract", 
                        variables = c(ancestry_pop = "B04006_001",
                                      white = "B02001_002"), 
                        state='NY',
                        county = c('Kings','Bronx','New York','Queens','Richmond'),
                        geometry = T, 
                        year = 2020,
                        output = "wide") 

white <- raw_ancestry |> 
  mutate(pct_white = whiteE/ancestry_popE) |> 
  filter(ancestry_popE > 0)

##Create map by using ggplot

white_map <- ggplot()  + 
  geom_sf(data = white, 
          mapping = aes(fill = pct_white,
                        text = paste0(NAME, ":",
                                      "<br>Percent White ancestry: ",
                                      percent(pct_white, accuracy=1))),
          color = "transparent") +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       palette = "BuPu",
                       na.value = "#Set3",
                       # na.value = "transparent",
                       name="Percent White Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "NYC, White Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2016-20"
  ) 

## Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
## : Ignoring unknown aesthetics: text
ggplotly(white_map, tooltip = "text")

##st_crs(white)to print in console the projections of data frame

## import Neighborhood Tabulation Areas for NYC

nabes <- st_read("D:/Parson Schoool Of Design/Design Method/Lesson/Week1/Part2/data/raw/geo/nynta2020_23c/nynta2020.shp")
st_crs(nabes)

white_2263 <- st_transform(white, 2263)
st_crs(white_2263)

#remove unnecessary fields in the neighborhood shapefile
nabes_selected <- nabes |>
  select(BoroCode, BoroName, NTA2020, NTAName)

white_nabes <- white_2263 |>
  st_join(nabes_selected, 
          left = TRUE,
          join = st_intersects,
          largest = T)

###White ancestry statistic in each borough

white_nabe_stats <- st_drop_geometry(white_nabes) |> 
  group_by(NTAName) |> 
  summarise(Borough = first(BoroName),
            `Est. Total Population` = sum(ancestry_popE),
            `Est. Total white Population` = sum(whiteM)) |> 
  mutate(`Est. Percent white Ancestry` = percent(`Est. Total white Population`/`Est. Total Population`, accuracy = 1))