TP5-Canas Videla

Parte 1

Para este trabajo vamos a usar la ciudad de Boston. La idea es ver cómo funciona el sistema de prevención y apagado de incendios de la ciudad. Para esto vamos a usar dos datasets: 1) La ubicación de los departamentos de bomberos 2) La ubicación de los hidrantes

Bomberos de boston respondiendo ante una emergencia

Primero vamos a cargar los datasets

Vamos a graficar primero los hidrantes y los departamentos de bomberos

ggplot()+
  geom_sf (data = barrios)+
  geom_sf(data = fire_departments, color = "red", alpha = .8)+
  geom_sf(data = hydrants, color = "green", alpha = .02)+
  labs(title = "Departamentos de bomberos e hidrantes") +
  theme_void()

Ahora vamos a ver cuatos departamentos de bomberos hay en cada barrio

fire_dep <- fire_departments %>% 
  select(OBJECTID_1, LOCADDR, CT90,geometry)
barrio_firedep <- st_join(barrios, fire_dep)

## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Gbarrio_firedep <- barrio_firedep %>% 
  group_by(Name, SqMiles) %>% 
  summarise(cantidad = n())
  
ggplot()+
  geom_sf(data = Gbarrio_firedep, aes(fill = cantidad))+
  theme_void()+
  scale_fill_viridis_c(option = "D", direction = -1)+
  labs(title = "Cantidad de departamentos de bomberos por barrio",
         fill = "Cantidad")

Y ahora la cantidad de hidrantes

barrio_hydrants <- st_join(barrios, hydrants)

## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Gbarrio_hydrants <- barrio_hydrants %>% 
  group_by(Name, SqMiles) %>% 
  summarise(cantidad = n()) 
  

ggplot()+
  geom_sf(data = Gbarrio_hydrants, aes(fill = cantidad))+
  theme_void()+
  scale_fill_viridis_c(option = "D", direction = -1)+
  labs(title = "Cantidad de hidrantes por barrio",
         fill = "Cantidad")

Veamos cómo se distribuye la cantidad de hidrantes según cada barrio.

z_Gbarrio_fh <- Gbarrio_hydrants %>% st_set_geometry(NULL) %>%
  arrange(desc(cantidad))

z_Gbarrio_fh$Name <- as.factor(z_Gbarrio_fh$Name)

ggplot(z_Gbarrio_fh) +
  geom_bar(aes(x= levels(Name), weight = cantidad, fill = cantidad)) +
  labs(title = "Cantidad de hidrantes por barrio",
       x = "barrio",
       y = "cantidad") +
  theme(axis.text.x = element_text(angle = 90)) + 
  scale_fill_viridis_c(option = "D", direction = -1)

Por último, queremos crear un indicador de cobertura contra incendios. Para eso vamos a unir las dos tablas.

z_Gbarrio_firedep <- Gbarrio_firedep %>% st_set_geometry(NULL) %>%
  rename(cant_firedep = cantidad)
barrio_fd_hyd <- left_join(Gbarrio_hydrants, z_Gbarrio_firedep)

## Joining, by = c("Name", "SqMiles")

Ahora vamos a crear una nueva variable que determine la cantidad de fire hidrant por milla cuadrada.

barrio_fd_hyd <- barrio_fd_hyd %>%
  mutate(fh_sqmile = (cantidad/SqMiles))

Veamos un resumen de la nueva variable y de la cantidad de departamentos de bomberos.

summary(barrio_fd_hyd$fh_sqmile)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    0.7752  284.7486  334.1065  430.4108  382.0455 1850.0000

summary(barrio_fd_hyd$cant_firedep)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.615   2.000   6.000

Por último vamos a construir el siguiente indicador de cobertura contra incendios en base a las estadísticas descriptivas anteriores: - Alta: más de 450 FH/SqMiles o por lo menos 2 departamentos de bomberos - Media: más de 330 FH/SqMiles y por lo menos 1 departamento de bomberos - Baja: menos de 330 FH/SqMiles

barrio_fd_hyd <- barrio_fd_hyd %>%
  mutate(cobertura = case_when(
    fh_sqmile >= 450 | cant_firedep >= 2 ~ "Alta",
    fh_sqmile >= 330 & cant_firedep >= 1 ~ "Media",
    fh_sqmile <= 330 ~ "Baja"
  ))

barrio_fd_hyd$cobertura <- factor(barrio_fd_hyd$cobertura, levels= c("Alta", "Media", "Baja"))

barrio_fd_hyd <- barrio_fd_hyd %>% mutate(cobertura = case_when( fh_sqmile >= 450 | cant_firedep >= 2 ~ “Alta”, fh_sqmile >= 330 & cant_firedep >= 1 ~ “Media”, fh_sqmile <= 330 ~ “Baja” )) Así quedaría entonces el mapa de cobertura:

ggplot()+
  geom_sf(data = barrio_fd_hyd, aes(fill = cobertura)) +
  scale_fill_brewer(type= "seq", palette =8, direction = -1 ) +
  labs(title = "Nivel de Cobertura contra incendio en Boston",
       fill = "Cobertura")+
  theme_void()

Parte 2

Primero vamos generar un bounding box y un polígono de los límites de la ciudad de Boston.

bounding_box <- getbb("Boston, MA, USA")
limites <- getbb ("Boston, MA, USA", format_out =  "sf_polygon")

Veamos si nuestros límites son los correctos.

leaflet() %>% 
  addTiles() %>%
  addPolygons(data = limites)

La geometría indica los límites políticos y administrativos de la ciudad de Boston, incluyendo el mar.

Descarguemos ahora las calles de la ciudad.

require(osmdata)
boston <- opq(bounding_box) %>% 
    add_osm_feature(key = "highway")
boston_sf <- boston %>% 
    osmdata_sf()
calles <- boston_sf$osm_lines
calles <- st_intersection(calles, limites)

## although coordinates are longitude/latitude, st_intersection assumes that they are planar

## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

Veamos el mapa de calles

ggplot() +
    geom_sf(data = calles) +
  labs(title = "Calles de Boston",
       caption = "Fuente: OpenStreetMap") +
  theme_void()

Ahora veamos como son las velocidades máximas de las calles

table(calles$maxspeed)

## 
##  10 mph  15 mph  20 mph  24 mph  25 mph  30 mph  35 mph  40 mph  45 mph 
##       4       5      34       2    4902     301      63      14      50 
##   5 mph  55 mph   6 mph   7 mph signals 
##       7      97       2       2       4

Queremos obtener solo los valores numéricos de las calles, y las queremos pasar a kmh.

calles2 <- calles %>% 
  mutate(maxspeed_kmh = substr(maxspeed,1,3)) %>%
  mutate(maxspeed_kmh = 1.609*as.numeric(maxspeed_kmh))

## Warning: NAs introducidos por coerción

Veamos un resumen de las velocidades máximas

table(calles2$maxspeed_kmh)

## 
##  16.09 24.135  32.18 38.616 40.225  48.27 56.315  64.36 72.405 88.495 
##      4      5     34      2   4902    301     63     14     50     97

Veamos un gráfico de velocidades:

ggplot(calles2) +
    geom_sf(aes(color = maxspeed_kmh), alpha = 0.5) +
    scale_color_viridis_c(option = "D", direction = -1) +
      theme_void() +
    labs(title = "Boston",
         subtitle = "Vías de circulación",
         caption = "fuente: OpenStreetMap",
         color = "velocidad máxima")

A efectos de simplificar el gráfico, vamos a generar 3 categorías de calles: - Velocidades mayores a 60kmh: arteria de tránsito rápido - Velocidades mayores o iguales a 40kmh: calle - Velocidades menores a 40kmh: calle de velocidad reducida

calles2 %>%
  mutate(tipo = case_when(
    maxspeed_kmh >= 60 ~ "Arteria de tránsito rápido",
    maxspeed_kmh < 40 ~ "Calle de velocidad reducida",
    TRUE ~ "Calle")) %>%
  ggplot() +
    geom_sf(aes(color = tipo, fill = tipo)) +
    scale_color_brewer(type = "div", palette = 7) +
    scale_fill_brewer(type = "div", palette = 7) +
      theme_void() +
    labs(title = "Boston",
         subtitle = "Vías de circulación",
         caption = "fuente: OpenStreetMap")

Ahora vamos a trabajar con los hidrantes de la ciudad de Boston. Descarguemos los datos.

boston_fh <- opq(limites) %>% 
  add_osm_feature(key = "emergency", value = "fire_hydrant") %>% 
  osmdata_sf()
boston_fh <- st_intersection(boston_fh$osm_points, limites)

## although coordinates are longitude/latitude, st_intersection assumes that they are planar

## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

ggplot() +
  geom_sf(data = barrios) +
  geom_sf(data = boston_fh, alpha = 0.5) +
  labs(title = "Boston",
       subtitle = "Hidrantes",
       caption = "fuente: OpenStreetMap") +
  theme_void()

Se observa en este mapa que hay pocos hidrantes en la ciudad de Boston, y se localizan concentrados por áreas. ¿Cuántos hidrantes hay por barrio según OpenStreetMap?

barrio_hydrantsOSM <- st_join(barrios, boston_fh)

## although coordinates are longitude/latitude, st_intersects assumes that they are planar

Gbarrio_hydrantsOSM <- barrio_hydrantsOSM %>% 
  group_by(Name, SqMiles) %>% 
  summarise(cantidadOSM = n())

ggplot()+
  geom_sf(data = Gbarrio_hydrantsOSM, aes(fill = cantidadOSM))+
  theme_void()+
  scale_fill_viridis_c(option = "D", direction = -1)+
  labs(title = "Cantidad de hidrantes por barrio",
         fill = "Cantidad")

¿Hay diferencias entre lo relevado en OpenStreetMap y la fuente oficial?

Gbarrio_hydrantsOSM <- Gbarrio_hydrantsOSM %>% st_set_geometry(NULL)
barrio_comp_hydrants <- left_join(Gbarrio_hydrants, Gbarrio_hydrantsOSM) %>%
  rename(Datos_Boston = cantidad,
         Datos_OSM = cantidadOSM)

## Joining, by = c("Name", "SqMiles")

gather_barrio <- gather(barrio_comp_hydrants, "tipo", "cantidad", 3:4)

gather_barrio2 <- gather_barrio %>% st_set_geometry(NULL) 


ggplot(gather_barrio2) +
  geom_bar(aes(x= Name, weight = cantidad, fill = cantidad)) +
  labs(title = "Cantidad de hidrantes por barrio",
       x = "barrio",
       y = "cantidad") +
  theme(axis.text.x = element_text(angle = 90)) + 
  scale_fill_viridis_c(option = "D", direction = -1) +
  facet_wrap(~tipo)

ggplot() +
  geom_sf(data = gather_barrio, aes(fill = cantidad)) +
  labs(title = "Cantidad de hidrantes por barrio",
       x = "barrio",
       y = "cantidad") +
  facet_wrap(~tipo) +
  theme_void()

Se observa que existe una diferencia entre los datos de OSM y los datos provistos por Boston. ¿Cuanta diferencia hay?

barrio_comp_hydrants  <- barrio_comp_hydrants  %>% 
  mutate(diferencia = Datos_Boston - Datos_OSM)

ggplot(barrio_comp_hydrants) +
  geom_bar(aes(x= Name, weight = diferencia, fill = diferencia)) +
  labs(title = "Diferencia de Hidrantes por Barrio",
       subtitle = "Diferencia entre los hidrantes según gobierno y según OSM",
       x = "barrio",
       y = "diferencia de hidrantes") +
  theme(axis.text.x = element_text(angle = 90)) + 
  scale_fill_viridis_c(option = "D", direction = -1)

ggplot() +
  geom_sf(data = barrio_comp_hydrants, aes(fill = diferencia)) +
  labs(title = "Diferencia de Cantidad de Hidrantes",
       subtitle = "Comparativa datos Ciudad de Boston y OSM") +
  scale_fill_viridis_c(option = "D", direction = -1) + 
  theme_void()

El histograma y el mapa muestran una gran diferencia entre los datos brindados por el gobierno de Boston, y los disponibles en OpenStreetMap. ¿A qué se deben las diferencias? Creemos que los hidrantes, al ser un tema de seguridad ante una emergencia, cobran mayor importancia para el gobierno local. Por otro lado, no parecería ser un tema de interés para toda la población ya que el mapeo de hidrantes se concentra por áreas. Es importante entender que los datos de OSM son datos brindados por los usuarios y dependen del interés e importancia que dichos usuarios consideren para cada tema.

Parte 3

Tweets sobre emergencias

Para este trabajo vamos a seguir usando la ciudad de Boston y vamos a capturar los tweets que contengan las palabras “fire”, “emergency” o “firefighter”.

require(rtweet)

## Loading required package: rtweet

## Warning: package 'rtweet' was built under R version 3.6.1

## 
## Attaching package: 'rtweet'

## The following object is masked from 'package:purrr':
## 
##     flatten

Para eso usamos el código

tweet_boston<- search_tweets(q = “fire OR emergency OR firefighter”, geocode = “42.364506,-71.038887,20mi”, n = 100000, retryonratelimit = TRUE)

Primero vamos a generar las columnas lan y long como vimos en clase.

Los guardamos como csv con el siguiente codigo: save_as_csv(tweet_boston, “tweets2”, prepend_ids = TRUE, na = "“, fileEncoding =”UTF-8")

Y lo volvemos a abrir

tweet_boston <- read_csv("tweets2.csv")

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   created_at = col_datetime(format = ""),
##   display_text_width = col_double(),
##   is_quote = col_logical(),
##   is_retweet = col_logical(),
##   favorite_count = col_double(),
##   retweet_count = col_double(),
##   quote_count = col_logical(),
##   reply_count = col_logical(),
##   symbols = col_logical(),
##   ext_media_type = col_logical(),
##   quoted_created_at = col_datetime(format = ""),
##   quoted_favorite_count = col_double(),
##   quoted_retweet_count = col_double(),
##   quoted_followers_count = col_double(),
##   quoted_friends_count = col_double(),
##   quoted_statuses_count = col_double(),
##   quoted_verified = col_logical(),
##   retweet_created_at = col_datetime(format = ""),
##   retweet_favorite_count = col_double(),
##   retweet_retweet_count = col_double()
##   # ... with 15 more columns
## )

## See spec(...) for full column specifications.

## Warning: 5 parsing failures.
##  row     col           expected     actual          file
## 2704 symbols 1/0/T/F/TRUE/FALSE fire       'tweets2.csv'
## 2705 symbols 1/0/T/F/TRUE/FALSE ACB FIRE   'tweets2.csv'
## 2706 symbols 1/0/T/F/TRUE/FALSE FIRE SPRWF 'tweets2.csv'
## 2800 symbols 1/0/T/F/TRUE/FALSE GE         'tweets2.csv'
## 8492 symbols 1/0/T/F/TRUE/FALSE LYFT       'tweets2.csv'

Exploramos un poco que contiene cada columna

head(tweet_boston)

## # A tibble: 6 x 89
##   user_id status_id created_at          screen_name text  source
##   <chr>   <chr>     <dttm>              <chr>       <chr> <chr> 
## 1 x26532~ x1163196~ 2019-08-18 21:11:12 Kwasiagyem~ "Pro~ Twitt~
## 2 x27940~ x1163196~ 2019-08-18 21:09:03 nebulisms   i fe~ Twitt~
## 3 x19597~ x1163195~ 2019-08-18 21:07:56 RyanJFiske  Here~ Twitt~
## 4 x51504~ x1163195~ 2019-08-18 21:07:02 Hedge_Fund~ Nove~ Twitt~
## 5 x11877~ x1163195~ 2019-08-18 21:06:37 BlindMike_  Here~ Twitt~
## 6 x18133~ x1163195~ 2019-08-18 21:05:16 Zaradia     It's~ Twitt~
## # ... with 83 more variables: display_text_width <dbl>,
## #   reply_to_status_id <chr>, reply_to_user_id <chr>,
## #   reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## #   favorite_count <dbl>, retweet_count <dbl>, quote_count <lgl>,
## #   reply_count <lgl>, hashtags <chr>, symbols <lgl>, urls_url <chr>,
## #   urls_t.co <chr>, urls_expanded_url <chr>, media_url <chr>,
## #   media_t.co <chr>, media_expanded_url <chr>, media_type <chr>,
## #   ext_media_url <chr>, ext_media_t.co <chr>,
## #   ext_media_expanded_url <chr>, ext_media_type <lgl>,
## #   mentions_user_id <chr>, mentions_screen_name <chr>, lang <chr>,
## #   quoted_status_id <chr>, quoted_text <chr>, quoted_created_at <dttm>,
## #   quoted_source <chr>, quoted_favorite_count <dbl>,
## #   quoted_retweet_count <dbl>, quoted_user_id <chr>,
## #   quoted_screen_name <chr>, quoted_name <chr>,
## #   quoted_followers_count <dbl>, quoted_friends_count <dbl>,
## #   quoted_statuses_count <dbl>, quoted_location <chr>,
## #   quoted_description <chr>, quoted_verified <lgl>,
## #   retweet_status_id <chr>, retweet_text <chr>,
## #   retweet_created_at <dttm>, retweet_source <chr>,
## #   retweet_favorite_count <dbl>, retweet_retweet_count <dbl>,
## #   retweet_user_id <chr>, retweet_screen_name <chr>, retweet_name <chr>,
## #   retweet_followers_count <dbl>, retweet_friends_count <dbl>,
## #   retweet_statuses_count <dbl>, retweet_location <chr>,
## #   retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## #   place_name <chr>, place_full_name <chr>, place_type <chr>,
## #   country <chr>, country_code <chr>, status_url <chr>, name <chr>,
## #   location <chr>, description <chr>, url <chr>, protected <lgl>,
## #   followers_count <dbl>, friends_count <dbl>, listed_count <dbl>,
## #   statuses_count <dbl>, favourites_count <dbl>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <lgl>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>, lon <dbl>, lat <dbl>

names(tweet_boston)

##  [1] "user_id"                 "status_id"              
##  [3] "created_at"              "screen_name"            
##  [5] "text"                    "source"                 
##  [7] "display_text_width"      "reply_to_status_id"     
##  [9] "reply_to_user_id"        "reply_to_screen_name"   
## [11] "is_quote"                "is_retweet"             
## [13] "favorite_count"          "retweet_count"          
## [15] "quote_count"             "reply_count"            
## [17] "hashtags"                "symbols"                
## [19] "urls_url"                "urls_t.co"              
## [21] "urls_expanded_url"       "media_url"              
## [23] "media_t.co"              "media_expanded_url"     
## [25] "media_type"              "ext_media_url"          
## [27] "ext_media_t.co"          "ext_media_expanded_url" 
## [29] "ext_media_type"          "mentions_user_id"       
## [31] "mentions_screen_name"    "lang"                   
## [33] "quoted_status_id"        "quoted_text"            
## [35] "quoted_created_at"       "quoted_source"          
## [37] "quoted_favorite_count"   "quoted_retweet_count"   
## [39] "quoted_user_id"          "quoted_screen_name"     
## [41] "quoted_name"             "quoted_followers_count" 
## [43] "quoted_friends_count"    "quoted_statuses_count"  
## [45] "quoted_location"         "quoted_description"     
## [47] "quoted_verified"         "retweet_status_id"      
## [49] "retweet_text"            "retweet_created_at"     
## [51] "retweet_source"          "retweet_favorite_count" 
## [53] "retweet_retweet_count"   "retweet_user_id"        
## [55] "retweet_screen_name"     "retweet_name"           
## [57] "retweet_followers_count" "retweet_friends_count"  
## [59] "retweet_statuses_count"  "retweet_location"       
## [61] "retweet_description"     "retweet_verified"       
## [63] "place_url"               "place_name"             
## [65] "place_full_name"         "place_type"             
## [67] "country"                 "country_code"           
## [69] "status_url"              "name"                   
## [71] "location"                "description"            
## [73] "url"                     "protected"              
## [75] "followers_count"         "friends_count"          
## [77] "listed_count"            "statuses_count"         
## [79] "favourites_count"        "account_created_at"     
## [81] "verified"                "profile_url"            
## [83] "profile_expanded_url"    "account_lang"           
## [85] "profile_banner_url"      "profile_background_url" 
## [87] "profile_image_url"       "lon"                    
## [89] "lat"

users_data(tweet_boston) %>% head()

## # A tibble: 6 x 20
##   user_id screen_name name  location description url   protected
##   <chr>   <chr>       <chr> <chr>    <chr>       <chr> <lgl>    
## 1 x26532~ Kwasiagyem~ Isaa~ Republi~ "Any idea ~ http~ FALSE    
## 2 x27940~ nebulisms   peop~ Boston,~ IF I EVER ~ http~ FALSE    
## 3 x19597~ RyanJFiske  Ryan~ Rhode I~ #MAGA, #Tr~ http~ FALSE    
## 4 x51504~ Hedge_Fund~ RO-G~ <NA>     <U+9084><U~ http~ FALSE    
## 5 x11877~ BlindMike_  Blin~ Boston,~ Undefined ~ http~ FALSE    
## 6 x18133~ Zaradia     Deny~ Riversi~ Mother and~ <NA>  FALSE    
## # ... with 13 more variables: followers_count <dbl>, friends_count <dbl>,
## #   listed_count <dbl>, statuses_count <dbl>, favourites_count <dbl>,
## #   account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## #   profile_expanded_url <chr>, account_lang <lgl>,
## #   profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>

Ahora vamos a ver cual es el mensaje con mayor cantidad de retweets

tweet_boston %>%
  filter(!is_retweet) %>%
  filter(retweet_count == max(retweet_count)) %>%
  select(screen_name, retweet_count, followers_count, location, text)

## # A tibble: 1 x 5
##   screen_name retweet_count followers_count location  text                 
##   <chr>               <dbl>           <dbl> <chr>     <chr>                
## 1 notgojira            6378            1366 Boston, ~ Fire Emblem: Three H~

No exactamente el tipo de tweet sobre emergencias y fuego que esperabamos…..

Veamos como se distribuyen los retweets:

ggplot(filter(tweet_boston, !is_retweet))+
  geom_histogram(aes(x= retweet_count))+
  labs(title = "Distribución de retweets",
       x = "cantidad retweets",
       y = "usuarios") +
  theme_minimal()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Como vemos que la enorme mayoría de los tweets tienen muy pocos retweets. Así que para mejorar el gráfico vamos a filtrar aquellos que fueron retweeteados más de 50 veces

tweet_retweet <- tweet_boston %>%
    filter(!is_retweet) %>%
  filter(retweet_count > 50)

ggplot(data = tweet_retweet)+
  geom_histogram(aes(x= retweet_count))+
  labs(title = "Distribución de retweets",
       subtitle = "en tweets con mas de 50 rt",
       x = "cantidad retweets",
       y = "usuarios") +
  theme_minimal()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Capturamos los tweets de apróximadamente una semana. Veamos como se distribuyeron en dicha semana.

ts_plot(tweet_boston, "days")

Observamos un pico de tweets el 10 de agosto, suponemos que es porque es sábado. Separemoslos.

tweet_boston10ago <- tweet_boston %>%
  mutate(dia = as.Date(substr(created_at, 1,10))) %>%
  filter(dia == as.Date("2019-08-10"))

ts_plot(tweet_boston10ago, "hours")

Por lo que vemos en el gráfico, pareciera que la mayor cantidad de tweets se dan a las 0.00am del sábado. ¿Será por el inicio del fin de semana?

Ahora veamos cuales son los usuarios que tienen más seguidores

tweet_boston %>%
  top_n(5, followers_count) %>%
  arrange(desc(followers_count)) %>%
  select(screen_name, followers_count, location, text)

## # A tibble: 6 x 4
##   screen_name   followers_count location   text                            
##   <chr>                   <dbl> <chr>      <chr>                           
## 1 BenjaminEnfi~         1180034 New York,~ Fire, only if! https://t.co/37E~
## 2 MIT                   1025853 Cambridge~ Novel class of “ionic liquids” ~
## 3 MIT                   1025853 Cambridge~ Through her startup, MBA studen~
## 4 stoolpreside~          992633 Boston     Big Cat texted me.  What @Steve~
## 5 stoolpreside~          992633 Boston     If you work for @barstoolsports~
## 6 stoolpreside~          992633 Boston     @Hooters @barstoolsports Live l~

Y grafiquemos también la popularidad de los usuarios

options(scipen = 20)
ggplot(tweet_boston) +
  geom_histogram(aes(x=followers_count))+
   labs(title = "Distribución de seguidores",
       x = "cantidad seguidores",
       y = "usuarios") +
  theme_minimal()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Como vemos tenemos el mismo problema que con los retweets: La gran mayoria tiene muy pocos

Ahora vamos a ver desde donde tweetean los usuarios

head(tweet_boston$created_at)

## [1] "2019-08-18 21:11:12 UTC" "2019-08-18 21:09:03 UTC"
## [3] "2019-08-18 21:07:56 UTC" "2019-08-18 21:07:02 UTC"
## [5] "2019-08-18 21:06:37 UTC" "2019-08-18 21:05:16 UTC"

tweet_boston %>%
    filter(location != "", !is.na(location)) %>% 
    count(location) %>% 
    top_n(10, n) %>% 
    ggplot() +
      geom_col(aes(x = reorder(location, n), y = n)) + 
      coord_flip() +
      labs(title = "Procedencia de los usuarios",
           x = "ubicación",
           y = "cantidad")

Ahora vamos a mapear los datos

Para empezar, filtramos nuestros tweets para conservar sólo los que contienen coordenadas exactas de posición.

tweets_boston_geo <- tweet_boston %>% 
    filter(!is.na(lat), !is.na(lon))

bbox <- c(min(tweets_boston_geo$lon),
          min(tweets_boston_geo$lat),
          max(tweets_boston_geo$lon),
          max(tweets_boston_geo$lat))

require(leaflet)

paleta <- colorNumeric(
  palette = "viridis",
  domain = tweets_boston_geo$followers_count)

leaflet(tweets_boston_geo) %>% 
    addTiles() %>% 
    addCircleMarkers(popup = ~text,
                     color = ~paleta(followers_count)) %>% 
    addLegend(title = "seguidores", pal = paleta, values = ~followers_count)

## Assuming "lon" and "lat" are longitude and latitude, respectively

Parte 4

Para esta parte vamos a usar los datos de los crímenes reportados en la ciudad de Boston desde 2015 a la fecha.

crime <- read.csv("crime_incident.csv",
                   encoding = "UTF-8",
                   stringsAsFactors = FALSE)

Veamos que contiene este dataset.

names(crime)

##  [1] "INCIDENT_NUMBER"     "OFFENSE_CODE"        "OFFENSE_CODE_GROUP" 
##  [4] "OFFENSE_DESCRIPTION" "DISTRICT"            "REPORTING_AREA"     
##  [7] "SHOOTING"            "OCCURRED_ON_DATE"    "YEAR"               
## [10] "MONTH"               "DAY_OF_WEEK"         "HOUR"               
## [13] "UCR_PART"            "STREET"              "Lat"                
## [16] "Long"                "Location"

summary(crime)

##  INCIDENT_NUMBER     OFFENSE_CODE  OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION
##  Length:415262      Min.   : 111   Length:415262      Length:415262      
##  Class :character   1st Qu.:1102   Class :character   Class :character   
##  Mode  :character   Median :3001   Mode  :character   Mode  :character   
##                     Mean   :2328                                         
##                     3rd Qu.:3201                                         
##                     Max.   :3831                                         
##                                                                          
##    DISTRICT         REPORTING_AREA    SHOOTING         OCCURRED_ON_DATE  
##  Length:415262      Min.   :  0.0   Length:415262      Length:415262     
##  Class :character   1st Qu.:178.0   Class :character   Class :character  
##  Mode  :character   Median :345.0   Mode  :character   Mode  :character  
##                     Mean   :384.2                                        
##                     3rd Qu.:543.0                                        
##                     Max.   :962.0                                        
##                     NA's   :26519                                        
##       YEAR          MONTH        DAY_OF_WEEK             HOUR      
##  Min.   :2015   Min.   : 1.000   Length:415262      Min.   : 0.00  
##  1st Qu.:2016   1st Qu.: 4.000   Class :character   1st Qu.: 9.00  
##  Median :2017   Median : 7.000   Mode  :character   Median :14.00  
##  Mean   :2017   Mean   : 6.591                      Mean   :13.12  
##  3rd Qu.:2018   3rd Qu.: 9.000                      3rd Qu.:18.00  
##  Max.   :2019   Max.   :12.000                      Max.   :23.00  
##                                                                    
##    UCR_PART            STREET               Lat             Long       
##  Length:415262      Length:415262      Min.   :-1.00   Min.   :-71.18  
##  Class :character   Class :character   1st Qu.:42.30   1st Qu.:-71.10  
##  Mode  :character   Mode  :character   Median :42.33   Median :-71.08  
##                                        Mean   :42.22   Mean   :-70.92  
##                                        3rd Qu.:42.35   3rd Qu.:-71.06  
##                                        Max.   :42.40   Max.   : -1.00  
##                                        NA's   :27085   NA's   :27085   
##    Location        
##  Length:415262     
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##

Veamos como se agrupan los incidentes

table(crime$OFFENSE_CODE_GROUP)

## 
##                        Aggravated Assault 
##                                     10342 
##                                  Aircraft 
##                                        58 
##                                     Arson 
##                                       109 
##          Assembly or Gathering Violations 
##                                      1127 
##                                Auto Theft 
##                                      6044 
##                       Auto Theft Recovery 
##                                      1352 
##                                Ballistics 
##                                      1289 
##                         Biological Threat 
##                                         2 
##                                 Bomb Hoax 
##                                       109 
##              Burglary - No Property Taken 
##                                         5 
##                       Commercial Burglary 
##                                      1645 
##                          Confidence Games 
##                                      3938 
##                            Counterfeiting 
##                                      1864 
##                       Criminal Harassment 
##                                       156 
##                        Disorderly Conduct 
##                                      3218 
##                            Drug Violation 
##                                     21523 
##                              Embezzlement 
##                                       396 
##                              Evading Fare 
##                                       515 
##                                Explosives 
##                                        33 
##                      Fire Related Reports 
##                                      2429 
##                         Firearm Discovery 
##                                       874 
##                        Firearm Violations 
##                                      2409 
##                                     Fraud 
##                                      8015 
##                                  Gambling 
##                                         8 
##                                Harassment 
##                                      5371 
##                  Harbor Related Incidents 
##                                       301 
##                             HOME INVASION 
##                                        93 
##                                  Homicide 
##                                       270 
##                         HUMAN TRAFFICKING 
##                                         8 
## HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE 
##                                         3 
##                        Investigate Person 
##                                     24231 
##                        INVESTIGATE PERSON 
##                                         5 
##                      Investigate Property 
##                                     14708 
##                  Landlord/Tenant Disputes 
##                                      1298 
##                                   Larceny 
##                                     33751 
##                Larceny From Motor Vehicle 
##                                     13571 
##           License Plate Related Incidents 
##                                       739 
##                         License Violation 
##                                      2240 
##                          Liquor Violation 
##                                      1534 
##                              Manslaughter 
##                                        10 
##                        Medical Assistance 
##                                     31724 
##                    Missing Person Located 
##                                      7049 
##                   Missing Person Reported 
##                                      4711 
##           Motor Vehicle Accident Response 
##                                     48261 
##           Offenses Against Child / Family 
##                                       664 
##             Operating Under the Influence 
##                                       720 
##                                     Other 
##                                     23372 
##                            Other Burglary 
##                                       572 
##                     Phone Call Complaints 
##                                        44 
##                  Police Service Incidents 
##                                      3931 
##                Prisoner Related Incidents 
##                                       339 
##                            Property Found 
##                                      5112 
##                             Property Lost 
##                                     13061 
##                   Property Related Damage 
##                                      1158 
##                              Prostitution 
##                                       264 
##                 Recovered Stolen Property 
##                                      1845 
##                      Residential Burglary 
##                                      6722 
##              Restraining Order Violations 
##                                      2048 
##                                   Robbery 
##                                      5665 
##                           Search Warrants 
##                                      1295 
##                                   Service 
##                                       375 
##                            Simple Assault 
##                                     20717 
##                                     Towed 
##                                     14507 
##                                 Vandalism 
##                                     19568 
##                           Verbal Disputes 
##                                     17299 
##                                Violations 
##                                      7818 
##                           Warrant Arrests 
##                                     10828

Vamos a separar los incidentes relacionados con el fuego. No incluimos la categoría “Firearm” dado que se refiere a las armas de fuego, y no a incendios.

fire_incidents <- crime %>%
  filter(OFFENSE_CODE_GROUP == "Fire Related Reports") %>%
  filter(!is.na(Lat))

Primero vamos a trabajar con los datos temporales. Vemos que el dataset ya incluye variables muy útiles como year, month, day_of_week y hour.

class(fire_incidents$OCCURRED_ON_DATE)

## [1] "character"

fire_incidents <- fire_incidents %>% 
  mutate(fecha = ymd(substr(OCCURRED_ON_DATE,1,10)),
         hora = hms(substr(OCCURRED_ON_DATE,12,18)),
         periodo = substr(OCCURRED_ON_DATE,1,7))

Veamos ahora como son los reportes de según fecha

fire_incidents %>% count(periodo) %>%
  select(n) %>%
  summary(n)

##        n        
##  Min.   :21.00  
##  1st Qu.:42.00  
##  Median :47.00  
##  Mean   :46.25  
##  3rd Qu.:53.00  
##  Max.   :63.00

ggplot() + 
    geom_bar(data= fire_incidents, aes(x = periodo)) +
  geom_hline(yintercept=46.25, color="red") +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Reportes de Incendio en Boston",
       subtitle = "Periodo junio 2015 - Agosto 2019", 
       caption = "Fuente: Datos Abiertos Boston",
       y = "cantidad")

Observando el gráfico, no se detecta un patrón de reportes que este relacionado con el mes de ocurrencia. Se observa un leve incremento en los meses más cálidos (junio-julio).

Veamos ahora los tipos de reportes

ggplot() + 
    geom_bar(data= fire_incidents, 
             aes(x = YEAR, fill= OFFENSE_DESCRIPTION), 
             position = "dodge") +
    labs(title = "Reportes de Incendio en Boston",
       subtitle = "Según tipo y año", 
       caption = "Fuente: Datos Abiertos Boston",
       fill = "Tipo de reporte",
       y = "cantidad",
       x= "año")

Ahora vamos a ver si los reportes de incendio están relacionados con el mes.

fire_mes <- fire_incidents %>% 
    count(OFFENSE_DESCRIPTION, mes = month(fecha, label = TRUE, abbr = FALSE))
ggplot(fire_mes) +
    geom_line(aes(x = mes, y = n, group = OFFENSE_DESCRIPTION, color = OFFENSE_DESCRIPTION)) +
    theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Reportes de Incendio en Boston",
       subtitle = "Periodo Junio 2015 - Agosto 2019 (discriminado por mes)", 
       caption = "Fuente: Datos Abiertos Boston",
       y = "cantidad")

Vemos que en el periodo analizado, hay un incremento de los reportes de incendio en autos y edificios en los meses más calidos.

Por último vamos a observar el horario en el que se reportan los incendios.

fire_hora <-  fire_incidents %>% 
    count(OFFENSE_DESCRIPTION, hora_base = hour(hora)) %>%
    group_by(OFFENSE_DESCRIPTION) %>% 
    mutate(prom_hora = n / sum(n) *100)
ggplot(fire_hora) +
    geom_line(aes(x = hora_base, 
                  y = prom_hora, 
                  group = OFFENSE_DESCRIPTION, 
                  color = OFFENSE_DESCRIPTION)) +
    labs(title = "Reportes de Incendio en Boston",
         x = "hora", y = "%") +
    expand_limits(y = 0) +
    scale_x_continuous(breaks = 0:23)

Se observa un pico mínimo a las 6am.

¿Dónde se localizan los reportes de incendio en la ciudad?

## Source : http://tile.stamen.com/terrain-lines/12/1238/1514.png

## Source : http://tile.stamen.com/terrain-lines/12/1239/1514.png

## Source : http://tile.stamen.com/terrain-lines/12/1240/1514.png

## Source : http://tile.stamen.com/terrain-lines/12/1241/1514.png

## Source : http://tile.stamen.com/terrain-lines/12/1242/1514.png

## Source : http://tile.stamen.com/terrain-lines/12/1238/1515.png

## Source : http://tile.stamen.com/terrain-lines/12/1239/1515.png

## Source : http://tile.stamen.com/terrain-lines/12/1240/1515.png

## Source : http://tile.stamen.com/terrain-lines/12/1241/1515.png

## Source : http://tile.stamen.com/terrain-lines/12/1242/1515.png

## Source : http://tile.stamen.com/terrain-lines/12/1238/1516.png

## Source : http://tile.stamen.com/terrain-lines/12/1239/1516.png

## Source : http://tile.stamen.com/terrain-lines/12/1240/1516.png

## Source : http://tile.stamen.com/terrain-lines/12/1241/1516.png

## Source : http://tile.stamen.com/terrain-lines/12/1242/1516.png

## Source : http://tile.stamen.com/terrain-lines/12/1238/1517.png

## Source : http://tile.stamen.com/terrain-lines/12/1239/1517.png

## Source : http://tile.stamen.com/terrain-lines/12/1240/1517.png

## Source : http://tile.stamen.com/terrain-lines/12/1241/1517.png

## Source : http://tile.stamen.com/terrain-lines/12/1242/1517.png

ggmap(BOSTON) +
    geom_bin2d(data = fire_incidents, aes(x = Long, y = Lat), bins = 100) +
  scale_fill_viridis_c(option = "D", direction = -1)

## Warning: Removed 8 rows containing non-finite values (stat_bin2d).

Ahora vamos a realizar un mapa de densidad de reportes de incendio.

barrios_resumen <- barrios %>%
  select(Name, SqMiles) %>%
  mutate(id = row_number())

barrios_centroide <- barrios_resumen %>% 
  st_centroid() %>%
  st_coordinates() %>%
  as.data.frame() %>%
  mutate( id = row_number())

## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant
## over geometries of x

## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for
## longitude/latitude data

barrios_text <- left_join(barrios_resumen, barrios_centroide)

## Joining, by = "id"

fire_dep_point <- fire_departments %>%
  mutate(Y = YCOORD/1000000,
         X = XCOORD / -1000000) %>%
    select(PD, X, Y)

ggmap(BOSTON) +
  geom_text(data = barrios_text, aes(x = X, y = Y, label = Name), size = 3) + 
  geom_point(data = fire_dep_point, aes(x = X, y = Y), 
             size = 2, color = "purple", alpha = 1) +
  geom_density2d(data = fire_incidents, aes(x = Long, y = Lat, color=stat(level))) +
  scale_color_viridis_c(option = "D", direction = -1) +
  labs(title = "Densidad de reportes de incendio",
       subtitle = "Próximidad con los departamentos de bomberos",
       caption = "Datos Abiertos Boston")

## Warning: Removed 8 rows containing non-finite values (stat_density2d).

## Warning: Removed 2 rows containing missing values (geom_point).

La mayor densidad de reportes corresponde con el barrio con mayor cantidad de departamentos de bomberos.

Veamos como se distribuyen las densidades según tipo de reporte.

ggmap(BOSTON) +
    geom_density2d(data = fire_incidents, aes(x = Long, y = Lat, color=stat(level))) +
  labs(title = "Densidad de reportes de incendio",
       caption = "Datos Abiertos Boston") +
      scale_color_viridis_c(option = "D", direction = -1) +
    facet_wrap(~OFFENSE_DESCRIPTION)

## Warning: Removed 8 rows containing non-finite values (stat_density2d).

Por último veamos como varían los reportes según las horas

ggmap(BOSTON) +
    geom_density2d(data = fire_incidents, aes(x = Long, y = Lat, color=stat(level))) +
  labs(title = "Densidad de reportes de incendio",
       caption = "Datos Abiertos Boston") +
      scale_color_viridis_c(option = "D", direction = -1) +
    facet_wrap(~HOUR, nrow = 4)

## Warning: Removed 8 rows containing non-finite values (stat_density2d).

TP5

Vamos a tomar los viajes en el sistema de bicicletas de Boston.

viajes_boston <- read.csv("201907-bluebikes-tripdata.csv",
                   encoding = "UTF-8",
                   stringsAsFactors = FALSE)
estaciones_boston <- read.csv("Hubway_Stations.csv",
                   encoding = "UTF-8",
                   stringsAsFactors = FALSE)

Mejoremos el dataset de estaciones

estaciones_boston <- estaciones_boston %>%
  mutate(lon = X.U.FEFF.X, 
         lat = Y) %>%
  select(id, name, lat, lon)

Agrupamos por estacion

conteo_boston <- viajes_boston %>% 
  rename(lat_or = start.station.latitude,
         long_or = start.station.longitude,
         lat_dest = end.station.latitude,
         long_dest = end.station.longitude) %>%
  group_by(start.station.id, end.station.id, lat_or, long_or, lat_dest,long_dest) %>% 
  summarise(total = n())

Veamos cuales son los viajes mas populares

ggplot() + 
  geom_tile(data = conteo_boston, 
            aes(x = start.station.id, y = end.station.id, fill = total)) +
  scale_fill_distiller(palette = "Spectral")

Este gráfico es dificil de analizar. Tomemos solo los 50 viajes más realizados

top <- conteo_boston %>% 
  ungroup() %>% 
  filter(start.station.id != end.station.id) %>% 
  top_n(50)

## Selecting by total

ggplot() + 
  geom_tile(data = top, 
            aes(x = as.factor(start.station.id),
                y = as.factor(end.station.id),
                fill = total)) +
  scale_fill_distiller(palette = "Spectral")

Veamos en un mapa las estaciones en donde se inician por lo menos 20 viajes en julio 2019.

require(ggmap)
ggmap (BOSTON) +
  geom_jitter(data = filter(conteo_boston, total >20), aes(y = lat_or, x = long_or, color = total)) +
  scale_color_distiller(palette = "Spectral")

## Warning: Removed 23 rows containing missing values (geom_point).

Veamos los 10 recorridos más realizados

require(osrm)

## Loading required package: osrm

## Warning: package 'osrm' was built under R version 3.6.1

## Data: (c) OpenStreetMap contributors, ODbL 1.0 - http://www.openstreetmap.org/copyright

## Routing: OSRM - http://project-osrm.org/

top10_boston <- conteo_boston %>% 
  ungroup() %>% 
  top_n(10)

## Selecting by total

obtener_recorrido <- function(o_nombre, o_x, o_y, d_nombre, d_x, d_y) {
  
  ruta <- osrmRoute(src = c(o_nombre, o_x, o_y),
                    dst = c(d_nombre, d_x, d_y))
  
  cbind(start.station.id = o_nombre, end.station.id = d_nombre, ruta)
  
}


argumentos <- list(top10_boston$start.station.id, top10_boston$long_or, top10_boston$lat_or,
                   top10_boston$end.station.id, top10_boston$long_dest, top10_boston$lat_dest)

recorridos <- pmap(argumentos, obtener_recorrido) %>% 
  reduce(rbind)

top10_boston <- top10_boston %>% 
  mutate(ID = 1:10)

recorridos_df <- left_join(recorridos, top10_boston)

## Joining, by = c("start.station.id", "end.station.id")

ggmap(BOSTON) +
    geom_path(data = recorridos_df, aes(x = lon, y = lat, color = ID)) +
    theme_nothing()

Los recorridos están muy concentrados en la zona de Cambridge. Hagamos un zoom en el mapa para poder verlos mejor

bbox_zoom <- make_bbox(recorridos_df$lon, recorridos_df$lat)

BOSTON_zoom <- get_stamenmap(bbox = bbox_zoom, 
                      maptype = "terrain-lines", 
               source="stamen", zoom = 12)

Incorporemos los nombres de las estaciones

ggmap(BOSTON_zoom) +
    geom_path(data = recorridos_df, aes(x = lon, y = lat, color = factor(ID)), size = 2) +
  geom_point(data=filter(estaciones_boston, id %in% c_estaciones), aes(x= lon, y= lat), color = "black", size = 2, alpha = 0.4) +
  geom_text(data = filter(estaciones_boston, id %in% c_estaciones), aes(x= lon, y= lat, label = name))+
    theme_nothing()

Ahora queremos ver el mapa en un leaflet

obtener_recorrido_geo <- function(o_nombre, o_x, o_y, d_nombre, d_x, d_y) {
  
  ruta <- osrmRoute(src = c(o_nombre, o_x, o_y),
                    dst = c(d_nombre, d_x, d_y), 
                    returnclass = "sf")
  
  cbind(start.station.id = o_nombre, end.station.id = d_nombre, ruta)
  
}


argumentos_geo <- list(top10_boston$start.station.id, top10_boston$long_or, top10_boston$lat_or,
                   top10_boston$end.station.id, top10_boston$long_dest, top10_boston$lat_dest)

recorridos_geo <- pmap(argumentos_geo, obtener_recorrido_geo) %>% 
  reduce(rbind)

recorridos_sf <- left_join(recorridos_geo, top10_boston) %>%
  select(ID, duration, distance, total)

## Joining, by = c("start.station.id", "end.station.id")

paleta1<- colorNumeric(
  palette = "Spectral",
  domain = recorridos_sf$distance)

leaflet(recorridos_sf) %>%
  addTiles() %>%
  addPolylines(color = ~paleta1(distance)) %>%
  addLegend(title = "Distancia", pal = paleta1, values = ~distance)

Parte II

Calculamos los centroides de los barrios

barrios_centroide <- st_centroid(barrios) %>%
    mutate(id = row_number()) %>%
  select(Neighborhood_ID, Name, id)

## Warning in st_centroid.sf(barrios): st_centroid assumes attributes are
## constant over geometries of x

## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for
## longitude/latitude data

Vemos en un mapa los centroides y los departamentos de bomberos

ggplot() +
  geom_sf(data= barrios) + 
  geom_sf(data= barrios_centroide, color = "black") + 
  geom_sf(data=fire_departments, size = .9, color = "red") + 
  theme_void()

Vamos a convertir los datos en coordenadas (barrios_text y fire_dep_point del tp anterior)

barrios_df <- barrios_text %>%
  st_set_geometry(NULL) %>%
  select(Name, X, Y) %>%
  rename(bar_X= X,
         bar_Y = Y)

fire_dep_df <- fire_dep_point %>%
  st_set_geometry(NULL) %>%
  select(PD, X, Y) %>%
  filter(!is.na(X)) %>%
  filter(X != 0.00000)

combinacion <- expand.grid(barrios_df$Name, fire_dep_df$PD) %>%
  rename(Name = Var1,
         PD = Var2)
combinacion <- left_join(combinacion, barrios_df)

## Joining, by = "Name"

combinacion <- left_join(combinacion, fire_dep_df)

## Joining, by = "PD"

ggplot() +
  geom_sf(data= barrios) +
  geom_sf_text(data = barrios, aes(label = Name), size = 2)

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may
## not give correct results for longitude/latitude data

Vamos a elegir los barrios “downtown” y “dorchester”

combinacion2 <- combinacion %>%
  filter(Name == c("Downtown", "Dorchester"))

Ahora vamos a obtener el recorrido desde cada centroide a cada departamento de bomberos

obtener_recorrido_firedep <- function(o_nombre, o_x, o_y, d_nombre, d_x, d_y) {
  
  ruta <- osrmRoute(src = c(o_nombre, o_x, o_y),
                    dst = c(d_nombre, d_x, d_y), 
                    returnclass = "sf")
  
  cbind(Name = o_nombre, PD = d_nombre, ruta)
  
}

centroide_a_firedep <- list(combinacion2$Name, combinacion2$bar_X, combinacion2$bar_Y,
                   combinacion2$PD, combinacion2$X, combinacion2$Y)

recorridos_a_firedep <- pmap(centroide_a_firedep, obtener_recorrido_firedep) %>% 
  reduce(rbind)

Y los vamos a graficar

factpal<- colorNumeric(
  palette = "Spectral",
  domain = recorridos_a_firedep$distance)

recorridos_a_firedep <-mutate(recorridos_a_firedep, x = paste(Name, PD))

leaflet(recorridos_a_firedep) %>%
  addTiles() %>%
  addPolylines(color = ~factpal(distance), popup = ~x) %>%
  addLegend(title = "Distancia", pal = factpal, values = ~distance)

Como podemos ver, el recorrido más largo se da desde “Downtown” al fire department de West Roxbury pero si hacemos el zoom suficiente vemos que el barrio más centrico tiene los recorridos más cortos desde su centroide a los distintos fire departments. Esto está relacionado con el hecho de que en ese barrio hay una concentración mayor de fire departments que en otros barrios.