In this report, we downloaded the 2019, ACS5 census tract data in Atlanta, and also the bike rental shops data within Atlanta from Yelp to explore the association of the location of bike rental shops to the biking mode and income level of local people.
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
FD_tract <- suppressMessages(
get_acs(geography = "tract", # or "block group", "county", "state" etc.
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = "B02001_003",
trans.total = "B08006_001",
# Commuting Modes:
trans.car = "B08006_002",
trans.drovealone = "B08006_003",
trans.carpooled = "B08006_004", # Notice that I was not interested in 005-007 (2 person/ 4 person carpool etc.)
trans.pubtrans = "B08006_008", # Did not want to download any details about the type of public transport (009-0013)
trans.bicycle = "B08006_014",
trans.walk = "B08006_015",
trans.WfH = "B08006_017",
# Community Environment
med_housexp = "B25104_001"
),
year = 2020,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 8%
|
|====== | 9%
|
|========== | 14%
|
|================= | 24%
|
|=================== | 27%
|
|===================== | 29%
|
|====================== | 31%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 46%
|
|========================================== | 60%
|
|================================================= | 69%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|====================================================== | 78%
|
|============================================================= | 87%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|======================================================================| 100%
FD_tract <- FD_tract %>%
select(GEOID,
hhincome = hhincomeE, # New name = old name
race.tot = race.totE,
race.white = race.whiteE,
race.black = race.blackE,
trans.total = trans.totalE,
trans.car = trans.carE,
trans.drovealone = trans.drovealoneE,
trans.carpooled = trans.carpooledE,
trans.pubtrans = trans.pubtransE,
trans.bicycle = trans.bicycleE,
trans.walk = trans.walkE,
trans.WfH = trans.WfHE,
Med_HHExp = med_housexpE
)
atlanta <- places('GA') %>%
filter(NAME == 'Atlanta')
## Retrieving data for the year 2021
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 31%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|====================================== | 54%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|=================================================== | 72%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|========================================================= | 82%
|
|============================================================== | 88%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|=================================================================== | 95%
|
|======================================================================| 100%
tm_shape(FD_tract) + tm_borders() + tm_shape(atlanta) + tm_borders(col = 'red')
FD_tract_filter <- FD_tract[atlanta,]
# Create a map
tm_shape(FD_tract_filter) +
tm_borders(lwd = 0.5,col = 'blue') +
tm_fill() +
tm_legend()
Atlanta_Cities <- FD_tract_filter
# Define Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Get the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiply 1.1 to make the circle a bit larger than the Census Tract.
# See the Yelp explanation of their radius parameter to see why we do this.
bb_center$radius <- r*1.2
return(bb_center)
}
# Creating an empty vector of NA
epsg_id <- 4326
r4all_loop <- vector("list", nrow(Atlanta_Cities))
# Starting a for-loop
for (i in 1:nrow(Atlanta_Cities)){
r4all_loop[[i]] <- Atlanta_Cities %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
# Appending X Y coordinates as seprate columns
ready_4_yelp <- r4all_loop %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
#visualization
#set tmap mode to interactive viewing
tmap_mode('view')
# Show all the result
ready_4_yelp[1:nrow(Atlanta_Cities),] %>%
# Draw a buffer centered at the centroid of Tract polygons.
# Radius of the buffer is the radius we just calculated using loop
st_buffer(., dist = .$radius) %>%
# Display this buffer in red
tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
# Display the original polygon in blue
tm_shape(Atlanta_Cities) + tm_borders(col= 'blue')
# Define a function for accessing Yelp API
get_yelp <- function(tract, category){
# ----------------------------------
# Gets one row of tract information (1,) and category name (str),
# Outputs a list of business data.frame
Sys.sleep(1)
n <- 1
# First request --------------------------------------------------------------
resp <- business_search(api_key = yelp_api,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(resp$total/50)
# out is where the results will be appended to.
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses,
# we know how many.
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
# glue formats string by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
# Stop before going into the loop because we need to
# break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
# Now we know required_n -----------------------------------------------------
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
} #<< end of while loop
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
for (row in 1:nrow(ready_4_yelp)){ yelp_bikerental_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], “bikerentals”)) if (row %% 10 == 0){ print(paste0(“Current row:”, row)) } }
yelp_bikerental <- yelp_bikerental_list %>% bind_rows() %>% as_tibble()
#calculate the number paste0(” how many bike rental places?: “, nrow(yelp_bikerental))
yelp_bikerental %>% print(width=1000)
* Save the Yelp data as an .RData file
```{r,eval=FALSE}
# Save the Yelp data as an .RData file
save(yelp_bikerental, file = "yelp_bikerental.RData")
# Load the Yelp data from the .RData file
load("yelp_bikerental.RData")
# 1.Delete Duplicated Rows
yelp_unique <- yelp_bikerental %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping duplicated rows, there were {nrow(yelp_bikerental)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>%
print()
## Before dropping duplicated rows, there were 91 rows. After dropping them, there are 10 rows
# 2.Flatten nested columns
concate_list <- function(x){
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat <- yelp_unique %>%
# Flattening columns with data frame
jsonlite::flatten() %>%
# Handling list-columns
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")),
location.display_address = location.display_address %>%
map_chr(., function(x) str_c(x, collapse=", ")),
categories = categories %>% map_chr(concate_list))
# concate_list is the custom function
# 3.Delete rows that have missing data in coordinates
# check whether there exists any NA in coor
yelp_flat %>%
map_dbl(., function(x) sum(is.na(x)))
## id alias name
## 0 0 0
## image_url is_closed url
## 0 0 0
## review_count categories rating
## 0 0 0
## transactions price phone
## 0 6 0
## display_phone distance coordinates.latitude
## 0 0 0
## coordinates.longitude location.address1 location.address2
## 0 1 3
## location.address3 location.city location.zip_code
## 3 0 0
## location.country location.state location.display_address
## 0 0 0
# woohoo~ there is no NA in 'coordinates' variable
# 4.Delete rows fall outside of the boundary of atlanta
yelp_sf <- yelp_flat %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
atlanta_sf <- st_transform(atlanta, crs=4326)
# Assuming you have already loaded yelp_flat and defined the yelp_sf object
# Filter Yelp data points inside the Atlanta boundary
yelp_in <- st_intersection(yelp_sf, atlanta_sf)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
tm_shape(yelp_in) + tm_dots(col="red") + tm_shape(Atlanta_Cities) + tm_borders()
## Transform their CRS into the same
FD_tract_Geom <- st_transform(Atlanta_Cities, crs=4326)
Yelp_in_Geom <- st_transform(yelp_in, crs=4326)
rentalshop_in_tract <- st_join(FD_tract_Geom, Yelp_in_Geom, join = st_intersects)
skim(rentalshop_in_tract)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No
## user-defined `sfl` provided. Falling back to `character`.
| Name | rentalshop_in_tract |
| Number of rows | 214 |
| Number of columns | 53 |
| _______________________ | |
| Column type frequency: | |
| character | 34 |
| logical | 1 |
| numeric | 18 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| GEOID.x | 0 | 1.00 | 11 | 11 | 0 | 214 | 0 |
| id | 207 | 0.03 | 22 | 22 | 0 | 7 | 0 |
| alias | 207 | 0.03 | 14 | 39 | 0 | 7 | 0 |
| name | 207 | 0.03 | 4 | 29 | 0 | 7 | 0 |
| image_url | 207 | 0.03 | 68 | 68 | 0 | 7 | 0 |
| url | 207 | 0.03 | 171 | 196 | 0 | 7 | 0 |
| categories | 207 | 0.03 | 29 | 52 | 0 | 7 | 0 |
| transactions | 207 | 0.03 | 0 | 0 | 7 | 1 | 0 |
| price | 212 | 0.01 | 2 | 2 | 0 | 1 | 0 |
| phone | 207 | 0.03 | 12 | 12 | 0 | 7 | 0 |
| display_phone | 207 | 0.03 | 14 | 14 | 0 | 7 | 0 |
| location.address1 | 208 | 0.03 | 0 | 22 | 2 | 5 | 0 |
| location.address2 | 210 | 0.02 | 0 | 7 | 2 | 3 | 0 |
| location.address3 | 209 | 0.02 | 0 | 0 | 5 | 1 | 0 |
| location.city | 207 | 0.03 | 7 | 7 | 0 | 1 | 0 |
| location.zip_code | 207 | 0.03 | 5 | 5 | 0 | 6 | 0 |
| location.country | 207 | 0.03 | 2 | 2 | 0 | 1 | 0 |
| location.state | 207 | 0.03 | 2 | 2 | 0 | 1 | 0 |
| location.display_address | 207 | 0.03 | 17 | 48 | 0 | 7 | 0 |
| STATEFP | 207 | 0.03 | 2 | 2 | 0 | 1 | 0 |
| PLACEFP | 207 | 0.03 | 5 | 5 | 0 | 1 | 0 |
| PLACENS | 207 | 0.03 | 8 | 8 | 0 | 1 | 0 |
| GEOID.y | 207 | 0.03 | 7 | 7 | 0 | 1 | 0 |
| NAME | 207 | 0.03 | 7 | 7 | 0 | 1 | 0 |
| NAMELSAD | 207 | 0.03 | 12 | 12 | 0 | 1 | 0 |
| LSAD | 207 | 0.03 | 2 | 2 | 0 | 1 | 0 |
| CLASSFP | 207 | 0.03 | 2 | 2 | 0 | 1 | 0 |
| PCICBSA | 207 | 0.03 | 1 | 1 | 0 | 1 | 0 |
| PCINECTA | 207 | 0.03 | 1 | 1 | 0 | 1 | 0 |
| MTFCC | 207 | 0.03 | 5 | 5 | 0 | 1 | 0 |
| FUNCSTAT | 207 | 0.03 | 1 | 1 | 0 | 1 | 0 |
| INTPTLAT | 207 | 0.03 | 11 | 11 | 0 | 1 | 0 |
| INTPTLON | 207 | 0.03 | 12 | 12 | 0 | 1 | 0 |
| geometry | 0 | 1.00 | 193 | 1792 | 0 | 214 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| is_closed | 207 | 0.03 | 0 | FAL: 7 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| hhincome | 7 | 0.97 | 77985.67 | 53112.94 | 13577.00 | 39150.50 | 67115.0 | 96380.50 | 250001.00 | ▇▇▂▁▁ |
| race.tot | 0 | 1.00 | 2971.30 | 1217.12 | 0.00 | 2151.75 | 2873.5 | 3753.25 | 7401.00 | ▂▇▆▂▁ |
| race.white | 0 | 1.00 | 1236.98 | 1120.31 | 0.00 | 200.00 | 1006.0 | 1945.50 | 4890.00 | ▇▅▂▁▁ |
| race.black | 0 | 1.00 | 1439.59 | 1355.30 | 0.00 | 367.50 | 984.5 | 2295.50 | 6314.00 | ▇▃▂▁▁ |
| trans.total | 0 | 1.00 | 1521.87 | 712.68 | 0.00 | 1042.00 | 1450.0 | 1977.50 | 3905.00 | ▃▇▆▂▁ |
| trans.car | 0 | 1.00 | 1070.67 | 578.75 | 0.00 | 609.00 | 1001.0 | 1357.75 | 2893.00 | ▅▇▅▂▁ |
| trans.drovealone | 0 | 1.00 | 980.86 | 549.44 | 0.00 | 557.00 | 922.5 | 1270.00 | 2751.00 | ▅▇▅▂▁ |
| trans.carpooled | 0 | 1.00 | 89.81 | 85.59 | 0.00 | 32.25 | 63.5 | 127.50 | 651.00 | ▇▂▁▁▁ |
| trans.pubtrans | 0 | 1.00 | 136.57 | 144.27 | 0.00 | 48.00 | 108.0 | 176.00 | 1194.00 | ▇▁▁▁▁ |
| trans.bicycle | 0 | 1.00 | 14.71 | 29.26 | 0.00 | 0.00 | 0.0 | 18.00 | 220.00 | ▇▁▁▁▁ |
| trans.walk | 0 | 1.00 | 65.24 | 116.51 | 0.00 | 0.00 | 20.0 | 69.75 | 788.00 | ▇▁▁▁▁ |
| trans.WfH | 0 | 1.00 | 194.77 | 154.54 | 0.00 | 76.25 | 156.5 | 298.50 | 816.00 | ▇▅▂▁▁ |
| Med_HHExp | 0 | 1.00 | 1272.24 | 529.83 | 0.00 | 924.00 | 1244.0 | 1652.75 | 2877.00 | ▂▇▇▅▁ |
| review_count | 207 | 0.03 | 43.00 | 51.90 | 1.00 | 6.00 | 15.0 | 72.50 | 128.00 | ▇▂▁▁▃ |
| rating | 207 | 0.03 | 3.43 | 1.67 | 1.00 | 2.50 | 4.5 | 4.50 | 4.50 | ▃▁▁▁▇ |
| distance | 207 | 0.03 | 2522.64 | 3445.54 | 274.39 | 399.03 | 449.6 | 3766.39 | 8603.62 | ▇▁▁▂▂ |
| ALAND | 207 | 0.03 | 350476504.00 | 0.00 | 350476504.00 | 350476504.00 | 350476504.0 | 350476504.00 | 350476504.00 | ▁▁▇▁▁ |
| AWATER | 207 | 0.03 | 2565326.00 | 0.00 | 2565326.00 | 2565326.00 | 2565326.0 | 2565326.00 | 2565326.00 | ▁▁▇▁▁ |
# Now count the Yoga Studios by tract
rental_count_tract <- count(as_tibble(rentalshop_in_tract), GEOID.x)
# Join tract geometry with the number of Yoga studios in tract
test <- st_join(FD_tract_Geom, Yelp_in_Geom %>% mutate(count = 1))
out <- test %>%
group_by(GEOID.x) %>%
summarise(count = sum(count, na.rm = T))
# Visulizasion
tm_shape(out) + tm_polygons(col = "count") + tm_shape(Yelp_in_Geom) + tm_dots()
## Join the counts of Yoga Studios to the Tract data
FD_tract_Geom_rental <- FD_tract_Geom %>%
left_join(out %>% st_set_geometry(NULL), by = c("GEOID"="GEOID.x"))
#add the new variables
FD_Add_Var<-FD_tract_Geom_rental
FD_Add_Var$PctBike <- FD_Add_Var$trans.bicycle/ FD_Add_Var$trans.total
FD_Add_Var$PctPub <- FD_Add_Var$trans.pubtrans/ FD_Add_Var$trans.total
#visualize the map and try to look at it
tmap_mode("view")
## tmap mode set to interactive viewing
## tmap mode set to interactive viewing
# household income and housing expense
HH_exp <- tm_shape(FD_Add_Var) + tm_polygons("Med_HHExp")+tm_shape(yelp_in) + tm_dots(col="blue")
HH_income <- tm_shape(FD_Add_Var) + tm_polygons("hhincome")+tm_shape(yelp_in) + tm_dots(col="blue")
tmap_arrange(HH_income, HH_exp)
# percentage of biking, percentage of using public transportation.
PBiking <- tm_shape(FD_Add_Var) + tm_polygons("PctBike")+tm_shape(yelp_in) + tm_dots(col="blue")
PPub <- tm_shape(FD_Add_Var) + tm_polygons("PctPub")+tm_shape(yelp_in) + tm_dots(col="blue")
tmap_arrange(PBiking, PPub)
It seems that household income and housing expense are a bit more correlated.
#look at a binary field that shows the presence or absence of studio in tract
FD_Add_Var$HaveBikeRental <- ifelse(FD_Add_Var$count>0, 1, 0)
FD_Add_Var$HaveBikeRental
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#Clean up the data and drop the "NA"s for important variables like household income
# Dropping the missing values
FD_Add_Var_dropnaHH2 <- FD_Add_Var[!is.na(FD_Add_Var),]
# To check if it is still a sf file
class(FD_Add_Var_dropnaHH2)
## [1] "sf" "data.frame"
# with household income
boxplot(hhincome~HaveBikeRental, data=FD_Add_Var_dropnaHH2, main="Boxplot of Bike Rental Shop by Income", xlab="Whether Bike Rental Shop are present", ylab="Household median income")
# with household expense
boxplot(Med_HHExp~HaveBikeRental, data=FD_Add_Var_dropnaHH2, main="Boxplot of Bike Rental Shop by Household Expense", xlab="Whether Bike Rental Shop are present", ylab="Household median Expense")
# with percentage of biking mode
boxplot(PctBike~HaveBikeRental, data=FD_Add_Var_dropnaHH2, main="Boxplot of Bike Rental Shop by Percentage of Biking Mode", xlab="Whether Bike Rental Shop are present", ylab="% Biking")
# with percentage of public tranportation mode
boxplot(PctPub~HaveBikeRental, data=FD_Add_Var_dropnaHH2, main="Boxplot of Bike Rental Shop by Percentage of Public Trans", xlab="Whether Bike Rental Shop are present", ylab="% Public Transportation")
binary_Bikerental1 <- glm(HaveBikeRental~hhincome, family=binomial, data=FD_Add_Var_dropnaHH2)
summary(binary_Bikerental1)
##
## Call:
## glm(formula = HaveBikeRental ~ hhincome, family = binomial, data = FD_Add_Var_dropnaHH2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.063e+00 7.483e-01 -5.429 5.65e-08 ***
## hhincome 6.297e-06 6.320e-06 0.996 0.319
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.316 on 206 degrees of freedom
## Residual deviance: 53.441 on 205 degrees of freedom
## (3846 observations deleted due to missingness)
## AIC: 57.441
##
## Number of Fisher Scoring iterations: 6
binary_Bikerental2 <- glm(HaveBikeRental~Med_HHExp, family=binomial, data=FD_Add_Var_dropnaHH2)
summary(binary_Bikerental2)
##
## Call:
## glm(formula = HaveBikeRental ~ Med_HHExp, family = binomial,
## data = FD_Add_Var_dropnaHH2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.2360750 1.1112540 -3.812 0.000138 ***
## Med_HHExp 0.0006271 0.0007258 0.864 0.387619
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.649 on 213 degrees of freedom
## Residual deviance: 60.901 on 212 degrees of freedom
## (3839 observations deleted due to missingness)
## AIC: 64.901
##
## Number of Fisher Scoring iterations: 6
For binary_Bikerental1, the p-value is approximately 0.319. This suggests that hhincome is not statistically significant at the 0.05 significance level (p > 0.05). It suggests that household income (hhincome) may not have a statistically significant effect on the probability of having a bike rental (HaveBikeRental) in this model, as indicated by the non-significant p-value.
For binary_Bikerental2, the p-value is approximately 0.387619. This suggests that Med_HHExp is not statistically significant at the 0.05 significance level (p > 0.05), this output provides information about the logistic regression model’s coefficients, their significance, and the goodness of fit of the model. It suggests that the median household expenses (Med_HHExp) may not have a statistically significant effect on the probability of having a bike rental (HaveBikeRental) in this model.
# with % biking mode
binary_Bikerental3 <- glm(HaveBikeRental~PctBike, family=binomial, data=FD_Add_Var_dropnaHH2)
summary(binary_Bikerental3)
##
## Call:
## glm(formula = HaveBikeRental ~ PctBike, family = binomial, data = FD_Add_Var_dropnaHH2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4998 0.4399 -7.956 1.78e-15 ***
## PctBike 11.3858 15.6457 0.728 0.467
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.448 on 210 degrees of freedom
## Residual deviance: 61.022 on 209 degrees of freedom
## (3842 observations deleted due to missingness)
## AIC: 65.022
##
## Number of Fisher Scoring iterations: 6
For PctBike, the p-value is approximately 0.467. This suggests that PctBike is not statistically significant at the 0.05 significance level (p > 0.05), indicating that it may not have a significant effect on the probability of having a bike rental.It suggests that the percentage of people using bikes (PctBike) may not have a statistically significant effect on the probability of having a bike rental (HaveBikeRental) in this model.
The location and number of bike rental stores in Atlanta seems not to be associated with transportation mode, household income, and household expense for each tract. Further research is needed.