Step 1. Download the Census ACS 5-year estimate data for Census Tracts in Fulton and DeKalb counties, focusing on commuting.

FD_tract <- suppressMessages(
  get_acs(geography = "tract", # or "block group", "county", "state" etc.
          state = "GA",
          county = c("Fulton", "Dekalb"),
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001",
                        race.white = "B02001_002",
                        race.black = "B02001_003",
                        trans.total = "B08006_001",
                        trans.car = "B08006_002",
                        trans.drovealone = "B08006_003",
                        trans.carpooled = "B08006_004", 
                        trans.pubtrans = "B08006_008", 
                        trans.bicycle = "B08006_014",
                        trans.walk = "B08006_015",
                        trans.WfH = "B08006_017",
                        med_housexp = "B25104_001",
                        med_realestate_taxes = "B25103_001"
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)

Clean the variables

FD_tract <- FD_tract %>%
  select(GEOID,
         hhincome = hhincomeE, # New name = old name
         race.tot = race.totE,
         race.white = race.whiteE,
         race.black = race.blackE,
         trans.total = trans.totalE,
         trans.car = trans.carE,
         trans.drovealone = trans.drovealoneE,
         trans.carpooled = trans.carpooledE,
         trans.pubtrans = trans.pubtransE,
         trans.bicycle = trans.bicycleE,
         trans.walk = trans.walkE,
         trans.WfH = trans.WfHE,
         Med_HHExp = med_housexpE,
         med_RETaxes = med_realestate_taxesE)
tmap_mode("view")
## tmap mode set to interactive viewing
hhincome <- tm_shape(FD_tract) + tm_polygons("hhincome")
HH_exp <- tm_shape(FD_tract) + tm_polygons("Med_HHExp")
tmap_arrange(hhincome, HH_exp)

Step 2. Download the City of Atlanta boundary and use it to filter the Census Tracts that either fall within or intersect with the City of Atlanta.

atlanta <- places('GA') %>% 
  filter(NAME == 'Atlanta') 

tmap_mode("view")
tm_shape(atlanta) + tm_polygons("NAMELSAD")


epsg_id <- 4326

# Converting FD_tract into a sf object
FD_tract_sf <- FD_tract %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = epsg_id)

# Converting census tract data to an sf object
atl_sf <- atlanta %>% st_sf()

# Matching the CRS for the yelp and census tract data
st_crs(FD_tract_sf) <- st_crs(atl_sf) <- st_crs("EPSG:4326")

# Creating a subset based on census boundary
intersect <- FD_tract_sf[atl_sf %>% 
                     st_union(), ,op = st_intersects]
kable(head(intersect,5))
GEOID hhincome race.tot race.white race.black trans.total trans.car trans.drovealone trans.carpooled trans.pubtrans trans.bicycle trans.walk trans.WfH Med_HHExp med_RETaxes geometry
1 13121001100 109426 5193 4060 299 3972 2482 2350 132 276 71 632 455 3432 3699 MULTIPOLYGON (((-84.38782 3…
2 13121009603 83243 4830 3561 749 3293 2786 2742 44 63 0 22 303 3071 4869 MULTIPOLYGON (((-84.38738 3…
3 13121005800 42679 1479 300 1057 736 398 341 57 271 12 43 12 576 493 MULTIPOLYGON (((-84.41692 3…
5 13121009502 86053 3867 2627 796 2087 1774 1593 181 17 0 62 161 2254 1993 MULTIPOLYGON (((-84.39472 3…
9 13121004900 84655 2835 1531 1106 1665 1097 1023 74 162 28 90 277 1108 2864 MULTIPOLYGON (((-84.38779 3…
tmap_mode("view")
tm_shape(intersect) + tm_polygons("GEOID")

Step 3. Download Yelp data on categories = bikerentals for the City of Atlanta.

Function to get tract-wise radius

get_r <- function(poly, epsg_id){
  #---------------------
  # Takes: a single POLYGON or LINESTRTING
  # Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
  #---------------------
  
  # Get bounding box of a given polygon
  bb <- st_bbox(poly)
  # Get lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Get centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
    
  # Get the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiply 1.1 to make the circle a bit larger than the Census Tract.
  # See the Yelp explanation of their radius parameter to see why we do this.
  bb_center$radius <- r*1.2
  return(bb_center)
}

Apply the above function to each Census Tract

# We use a functional (sapply) to apply this custom function to each Census Tract.
epsg_id <- 4326
r4all_apply <- intersect %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

Visualize the buffers

ready_4_yelp %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.3, col = 'red') +
  # Display the original polygon in blue
  tm_shape(intersect) + tm_borders(col= 'blue')

Function to get the yelp data through the yelp API

# FUNCTION
get_yelp <- function(tract, category){
  # ----------------------------------
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  Sys.sleep(1)
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          categories = "bikerentals", 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (1 - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(as.numeric(resp$total)/50)
  
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  
  # Throw error if more than 1000
  if (as.numeric(resp$total) >= 1000)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}

Apply the get_yelp function to retrieve business data for each category (bikerentals)

# Prepare a collector
yelp_all_list <- vector("list", length = nrow(ready_4_yelp))

# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
}

Step 4. Clean the data and map it out

Collapse the lists into a data frames

# Collapsing the list into a data.frame
yelp_bikes <- yelp_all_list %>% bind_rows() %>% as_tibble()

Remove duplicates and count the number of businesses in each category

yelp_no_duplicates <- yelp_bikes[!duplicated(yelp_bikes$name), ]
yelp_bikes_count <- nrow(yelp_no_duplicates)
print(paste("The number of businesses under the category 'bikerentals' in Atlanta is:", yelp_bikes_count))
save(yelp_no_duplicates, file = "/Users/helenalindsay/Documents/Fall_23/CP8883/yelp_bikes.RData")

Flatten nested columns that have multiple variables in one column.

load("/Users/helenalindsay/Documents/Fall_23/CP8883/yelp_bikes.RData")
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat <- yelp_no_duplicates %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) 

Delete rows that have missing data in coordinates variable. It’s okay to have NAs in other variables.

yelp_dropna <- yelp_flat %>% 
  drop_na(coordinates.longitude)%>%
  drop_na(coordinates.latitude)

Extract coordinates for each business and filter by the City of Atlanta boundary

# Extract coordinates
yelp_sf <- yelp_no_duplicates %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = epsg_id)

yelp_sf <- yelp_sf[yelp_sf %>% 
                     filter(yelp_sf$location$city %in% c("Atlanta")) %>% 
                     st_union(), ,op = st_intersects]
kable(head(yelp_sf))
id alias name image_url is_closed url review_count categories rating coordinates transactions location phone display_phone distance price geometry
JkkHRgYj0mvdgbMXFm436w civil-bikes-atlanta Civil Bikes https://s3-media4.fl.yelpcdn.com/bphoto/JqTLT-chrqtbyuoB-52gdw/o.jpg FALSE https://www.yelp.com/biz/civil-bikes-atlanta?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw 11 bikerentals , historicaltours , Bike Rentals , Historical Tours 4.5 33.74468 NULL +14043238754 (404) 323-8754 366.1830 NA POINT (-84.37784 33.74468)
UmftRC3h0h_owHEm5ZLp7Q jump-atlanta-2 JUMP https://s3-media2.fl.yelpcdn.com/bphoto/D87H00XdLWZJS-LvQkTalA/o.jpg FALSE https://www.yelp.com/biz/jump-atlanta-2?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw 1 scooterrentals , bikerentals , Scooter Rentals, Bike Rentals 1.0 33.74827 NULL +18333006106 (833) 300-6106 1238.0773 NA POINT (-84.39146 33.74827)
b3nacMG8PR77GNCaI4RBKA atlanta-bicycle-barn-atlanta Atlanta Bicycle Barn https://s3-media3.fl.yelpcdn.com/bphoto/Ik2pMce41_MRcg3svjTbSQ/o.jpg FALSE https://www.yelp.com/biz/atlanta-bicycle-barn-atlanta?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw 128 bikerentals , bike_repair_maintenance, bikes , Bike Rentals , Bike Repair/Maintenance, Bikes 4.5 33.75833 NULL 151 Sampson St NE +17708732413 (770) 873-2413 1191.7885 \[ |POINT (-84.36515 33.75833) | |tMNV5bj4rqud0cRRQiPbWA |outback-bikes-atlanta |Outback Bikes |https://s3-media4.fl.yelpcdn.com/bphoto/rnpOTcs3WwjTq1JsfV1b8w/o.jpg |FALSE |https://www.yelp.com/biz/outback-bikes-atlanta?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw | 103|bikes , bike_repair_maintenance, bikerentals , Bikes , Bike Repair/Maintenance, Bike Rentals | 4.0|33.76756 |NULL |484 Moreland Ave NE |+14046884878 |(404) 688-4878 | 675.3105|\] POINT (-84.3495 33.76756)
rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2 Pedego Electric Bikes Atlanta https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg FALSE https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw 15 bikes , bikerentals , bike_repair_maintenance, Bikes , Bike Rentals , Bike Repair/Maintenance 4.5 33.74172 NULL 414 Bill Kennedy Way +14049753915 (404) 975-3915 1452.7682 NA POINT (-84.35797 33.74172)
BozJwfoXvoDEUj-sgr7WDg podium-multisport-atlanta Podium Multisport https://s3-media1.fl.yelpcdn.com/bphoto/T78pUkBulcQ5osv4mVGTiQ/o.jpg FALSE https://www.yelp.com/biz/podium-multisport-atlanta?adjust_creative=2fwKSTvgAQf6j-1QQXYBxw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2fwKSTvgAQf6j-1QQXYBxw 20 bikes , bikerentals , Bikes , Bike Rentals 4.5 33.80510 NULL 1167 Zonolite Pl NE +14048923400 (404) 892-3400 2221.7502 $$$ POINT (-84.34143 33.8051)
tm_shape(intersect) + tm_borders() + tm_shape(yelp_sf) + tm_dots(col="rating",id="name",popup.vars=c( "review_count", "rating")) 

Step 5. Examine the associations among the variables

Appending Census data

bike_in_tract <- st_join(intersect, yelp_sf, join = st_intersects)

# Join tract geometry with the number of bike rentals in tract
test <- st_join(intersect, yelp_sf %>% mutate(count = 1))
out <- test %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))

# Check to see if the polygons and the poin data on bike rentals match
tm_shape(out) + tm_polygons(col = "count") + tm_shape(yelp_sf)  + tm_dots()

## Join back the counts of bike rentals to the Tract data
FD_tract_Geom_bike <- intersect %>%
  left_join(out %>% st_set_geometry(NULL), by = "GEOID")
FD_tract_Geom_bike$count <- as.factor(FD_tract_Geom_bike$count)

Calculate the distance to the nearest bike rental business for each tract

distance <- as.data.frame(sf::st_distance(FD_tract_Geom_bike,yelp_sf))

distance$min_value <- pmin(distance$V1, distance$V2, distance$V3, distance$V4, distance$V5, distance$V6, distance$V7)

FD_tract_distance <- cbind(FD_tract_Geom_bike,distance$min_value)
FD_tract_distance <- FD_tract_distance %>%
  rename(Distance_to_nearest_rental = distance.min_value)

breakpoints <- c(0,500,1000,2000,5000,10000,20000)  
labels <- c("0", "500-1000", "1000-2000", "2000-5000","5000-10000", "10000-20000")

# Create a new column with the categories
FD_tract_distance$Categories <- cut(FD_tract_distance$Distance_to_nearest_rental, breaks = breakpoints, labels = labels, right = FALSE)

Visualize the output

tm_shape(FD_tract_distance) + tm_polygons(col="Distance_to_nearest_rental",breaks = c(0,500,1000,2000,5000,10000,20000)) +tm_shape(yelp_sf) +tm_dots()
# Checking the data for missing values
print(skim(FD_tract_distance))

# Dropping the missing values
bike_census_dropna1 <- FD_tract_distance[!is.na(FD_tract_distance$med_RETaxes),]
bike_census_dropna2 <- bike_census_dropna1[!is.na(bike_census_dropna1$hhincome),]

# Just to check whether the 8 NAs have been dropped from hhincome
print(skim(bike_census_dropna2))

# To check if it is still a sf file
class(bike_census_dropna2)

Data exploration

plt <- boxplot(hhincome~Categories, data=bike_census_dropna2, main="Boxplot of Bike Rentals by Income", xlab="Distance from nearest bike rental (meters)", ylab="Household median income",cex.axis = 0.8)

The boxplot of bike rentals by income seems to show a slightly inverse correlation between higher income tracts and distance from bike rental businesses, implying that higher income tracts tend to be in closer proximity with a bike rental business.

boxplot(trans.bicycle~Categories, data=bike_census_dropna2, main="Boxplot of Bike Rentals by Number of Bike Commuters", xlab="Distance from nearest bike rental (meters)", ylab="Number of Bike Commuters",cex.axis = 0.8)

The boxplot of bike rentals by number of bike commuters seems to show a inverse correlation between tracts with higher number of bike commuters and distance from bike rental businesses, implying that tracts with higher number of bike commuters tend to be in closer proximity with a bike rental business.

boxplot(med_RETaxes~Categories, data=bike_census_dropna2, main="Boxplot of Bike Rentals by Median Real Estate Taxes", xlab="Distance from nearest bike rental (meters)", ylab="Median Real Estate Taxes",cex.axis = 0.8)

The boxplot of bike rentals by median real estate taxes seems to show a inverse correlation between tracts with higher median real estate taxes and distance from bike rental businesses, implying that tracts with higher median real estate taxes tend to be in closer proximity with a bike rental business.

Binary logistic regression

binary_bike1 <- glm(count~hhincome, family=binomial, data=bike_census_dropna2)
summary(binary_bike1)
## 
## Call:
## glm(formula = count ~ hhincome, family = binomial, data = bike_census_dropna2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3181  -0.3132  -0.3119  -0.3108   2.4723  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.015e+00  7.520e-01  -4.009 6.09e-05 ***
## hhincome     2.715e-07  9.065e-06   0.030    0.976    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 56.285  on 146  degrees of freedom
## Residual deviance: 56.284  on 145  degrees of freedom
## AIC: 60.284
## 
## Number of Fisher Scoring iterations: 5
binary_bike2 <- glm(count~trans.bicycle, family=binomial, data=bike_census_dropna2)
summary(binary_bike2)
## 
## Call:
## glm(formula = count ~ trans.bicycle, family = binomial, data = bike_census_dropna2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3394  -0.3125  -0.3097  -0.3097   2.4745  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.0136283  0.4429701  -6.803 1.02e-11 ***
## trans.bicycle  0.0008377  0.0098037   0.085    0.932    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 56.285  on 146  degrees of freedom
## Residual deviance: 56.277  on 145  degrees of freedom
## AIC: 60.277
## 
## Number of Fisher Scoring iterations: 5
binary_bike3 <- glm(count~med_RETaxes, family=binomial, data=bike_census_dropna2)
summary(binary_bike3)
## 
## Call:
## glm(formula = count ~ med_RETaxes, family = binomial, data = bike_census_dropna2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3356  -0.3175  -0.3082  -0.3043   2.4823  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.064e+00  5.947e-01  -5.152 2.58e-07 ***
## med_RETaxes  2.151e-05  1.398e-04   0.154    0.878    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 56.285  on 146  degrees of freedom
## Residual deviance: 56.261  on 145  degrees of freedom
## AIC: 60.261
## 
## Number of Fisher Scoring iterations: 5

Linear regression

lm_bike1 <- lm(hhincome ~ Distance_to_nearest_rental , data=bike_census_dropna2)
summary(lm_bike1)
## 
## Call:
## lm(formula = hhincome ~ Distance_to_nearest_rental, data = bike_census_dropna2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56676 -36109  -7671  20899 141361 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                74037.855   5529.321  13.390   <2e-16 ***
## Distance_to_nearest_rental    -0.839      1.060  -0.792     0.43    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42700 on 145 degrees of freedom
## Multiple R-squared:  0.004304,   Adjusted R-squared:  -0.002563 
## F-statistic: 0.6268 on 1 and 145 DF,  p-value: 0.4298
lm_bike2 <- lm(trans.bicycle~Distance_to_nearest_rental, data=bike_census_dropna2)
summary(lm_bike2)
## 
## Call:
## lm(formula = trans.bicycle ~ Distance_to_nearest_rental, data = bike_census_dropna2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.185 -20.483  -9.615   5.774 195.340 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                37.1850336  4.6255209   8.039 2.91e-13 ***
## Distance_to_nearest_rental -0.0040733  0.0008866  -4.595 9.35e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.72 on 145 degrees of freedom
## Multiple R-squared:  0.1271, Adjusted R-squared:  0.1211 
## F-statistic: 21.11 on 1 and 145 DF,  p-value: 9.346e-06
lm_bike3 <- lm(med_RETaxes~Distance_to_nearest_rental,  data=bike_census_dropna2)
summary(lm_bike3)
## 
## Call:
## lm(formula = med_RETaxes ~ Distance_to_nearest_rental, data = bike_census_dropna2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3207.6 -2064.5  -696.4  1486.6  7306.0 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3480.96730  349.07002   9.972   <2e-16 ***
## Distance_to_nearest_rental   -0.09919    0.06690  -1.482     0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2696 on 145 degrees of freedom
## Multiple R-squared:  0.01493,    Adjusted R-squared:  0.008137 
## F-statistic: 2.198 on 1 and 145 DF,  p-value: 0.1404

Correlation analysis

cor_bike1 <- cor(bike_census_dropna2$hhincome, bike_census_dropna2$Distance_to_nearest_rental)
cor_bike1
## [1] -0.06560464
cor_bike2 <- cor(bike_census_dropna2$trans.bicycle, bike_census_dropna2$Distance_to_nearest_rental)
cor_bike2
## [1] -0.3564872
cor_bike3 <- cor(bike_census_dropna2$med_RETaxes, bike_census_dropna2$Distance_to_nearest_rental)
cor_bike3
## [1] -0.1221917

Conclusion

From the binary logistic regression analysis, I found that median household income, number of bicycle commuters, nor median real estate taxes are statistically significantly related to the presence or absence of a bike rental business in the specific tract.

I then analyzed using linear regression the distance from the nearest bike rental business for each tract against the same variables; median household income, number of bicycle commuters, and median real estate taxes. I found that the number of bicycle commuters is statistically significantly related to the tract’s distance to the closest bike rental business with a p-value of 9.35e-06. The two other variables were not statistically significantly related to the dependent variable.

Finally, I examined the correlation coefficients between the three independent variables and the dependent variable. There is a weak negative correlation between median household income and distance from a bike rental business for each tract with a coefficient of -0.06560464. There is a moderate negative correlation between the number of bicycle commuters and distance from a bike rental business for each tract with a coefficient of -0.3564872. Lastly, there is a weak negative correlation between median real estate taxes and distance from a bike rental business for each tract with a coefficient of -0.1221917.

Overall, the analysis conducted in this project showed a glimpse of the spatial relationships between bike rental businesses and other socioeconomic variables. Although the scale of this analysis was small with only 7 data points for the dependent variable, I would be interested in exploring whether there could be a more significant finding when expanding the study area.