library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidycensus)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(jsonlite)
##
## 载入程辑包:'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(tidyverse)
library(httr)
library(jsonlite)
library(reshape2)
##
## 载入程辑包:'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
## here() starts at C:/Users/11969/Desktop/Intro to Urban_Analytics/Assignment3
library(yelpr)
library(knitr)
yelpbike<-readRDS("C:/Users/11969/Desktop/Intro to Urban_Analytics/Assignment3/bikerentals.rds")
#Tidying Yelp data
yelp_unique <- yelpbike %>%
distinct(id, .keep_all=TRUE)
glue::glue("Before dropping NA, there were {nrow(yelpbike)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>%
print()
yelp_flat <- yelp_unique %>%
jsonlite::flatten() %>%
as_tibble()
yelp_concat <- yelp_flat %>%
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")),
location.display_address = location.display_address %>%
map_chr(., function(x) str_c(x, collapse=", ")))
concate_list <- function(x){
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat2 <- yelp_concat %>%
mutate(categories = categories %>% map_chr(concate_list))
#identify whether there exists any NA values.
yelp_flat2 %>%
map_dbl(., function(x) sum(is.na(x)))
#verify that missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat2$coordinates.latitude),
is.na(yelp_flat2$coordinates.longitude))
# Drop them.
yelp_dropna1 <- yelp_flat2 %>%
drop_na(coordinates.longitude)
# Converting yelp_dropna1 into a sf object
yelp_sf <- yelp_dropna1 %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# Make them in the same 'EPSG'
st_crs(yelp_sf) <- 4269
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
## To install your API key for use in future sessions, run this function with `install = TRUE`.
## To install your API key for use in future sessions, run this function with `install = TRUE`.
FD_tract <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = "B02001_003",
trans.total = "B08006_001",
trans.car = "B08006_002",
trans.drovealone = "B08006_003",
trans.carpooled = "B08006_004",
trans.pubtrans = "B08006_008",
trans.bicycle = "B08006_014",
trans.walk = "B08006_015",
trans.WfH = "B08006_017",
med_housexp = "B25104_001",
med_realestate_taxes = "B25103_001"
),
year = 2020,
survey = "acs5",
geometry = TRUE,
output = "wide")
)
FD_tract <- FD_tract %>%
select(GEOID,
hhincome = hhincomeE, # New name = old name
race.tot = race.totE,
race.white = race.whiteE,
race.black = race.blackE,
trans.total = trans.totalE,
trans.car = trans.carE,
trans.drovealone = trans.drovealoneE,
trans.carpooled = trans.carpooledE,
trans.pubtrans = trans.pubtransE,
trans.bicycle = trans.bicycleE,
trans.walk = trans.walkE,
trans.WfH = trans.WfHE,
Med_HHExp = med_housexpE,
med_RETaxes = med_realestate_taxesE)
# sf subsets
yelp_in <- yelp_sf[FD_tract %>%
st_union(), ,op = st_intersects]
# Make them have same CRS
FD_tract_Geom <- st_transform(FD_tract, crs=4326)
Yelp_in_Geom <- st_transform(yelp_in, crs=4326)
#Show the bikerentals location
Bike_in_tract <- st_join(FD_tract_Geom, Yelp_in_Geom, join = st_intersects)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col="red") + tm_shape(FD_tract) + tm_borders()
#The relationship between number of bicycle commuters and the location of Bikerentals
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(FD_tract) + tm_polygons( "trans.bicycle") + tm_shape(yelp_in) + tm_dots(col="blue")
All but one of the bikerentals are surrounded by bicycle Commuters areas, a side reflection of the fact that where there is demand, there is supply. The most densely populated bike commuters areas have the highest number of bikerentals.
# Join tract geometry with the number of Bikerentals in tract
test <- st_join(FD_tract_Geom, Yelp_in_Geom %>% mutate(count = 1))
out <- test %>%
group_by(GEOID) %>%
summarise(count = sum(count, na.rm = T))
# Now count the Bikerentals by tract
bike_count_tract <- count(as_tibble(Bike_in_tract), GEOID) %>%
print()
# join back the counts of Bikerentals to the Tract data
FD_tract_Geom_bike <- FD_tract_Geom %>%
left_join(out %>% st_set_geometry(NULL), by = "GEOID")
# Dropping the missing values
bike_census_dropnaHH2 <- FD_tract_Geom_bike[!is.na(FD_tract_Geom_bike$hhincome),]
# Drop the missing values of med_RETaxes
y_census_dropnaHHTX <- bike_census_dropnaHH2[!is.na(bike_census_dropnaHH2$med_RETaxes),]
Are bikerentals in places with high median household incomes?
#ggplot
ggplot(y_census_dropnaHHTX, aes(x=hhincome, y=count)) +
geom_point() +
ylab("Number of Bikerentals and Household Median Income in Tract")
#boxplot
y_census_dropnaHHTX$Bike <- ifelse(y_census_dropnaHHTX$count>0, 1, 0)
boxplot(hhincome~Bike, data=y_census_dropnaHHTX, main="Boxplot of Bikerentals by Income", xlab="Whether Bikerentals are present", ylab="Household median income")
#tmap
tm_shape(y_census_dropnaHHTX) + tm_polygons(col = "hhincome") + tm_shape(yelp_in) + tm_dots(col="blue")
As can be seen from this map, all Bikerentals are not far from higher income areas, and bicycle travel has become one of the most important travel options for people with higher income. The highest density of bikerentals also exists around the areas with the highest incomes. The denser the area, the more enclosed it is.