library(flexdashboard)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(hablar)
##
## Attaching package: 'hablar'
## The following object is masked from 'package:dplyr':
##
## na_if
library(RColorBrewer)
library(readxl)
library(jtools)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
my.token <- read_lines("Matt1.txt")
Sys.setenv("MAPBOX_TOKEN" = my.token)
library(mapview)
## GDAL version >= 3.1.0 | setting mapviewOptions(fgb = TRUE)
mapviewOptions(fgb = FALSE)
current.date <- Sys.Date()
prev.week <- (current.date - 14)
baseurl <- "http://data.cityofchicago.org/resource/v6vf-nfxy.json?$where=sr_type!='311 INFORMATION ONLY CALL' AND created_date > "
df <- fromJSON(readLines("/Users/matthewpalagyi/RFolder/Chicago.json"))
# Convert coordinates to numeric...
df <- df %>%
hablar::convert(
dbl(latitude),
dbl(longitude)
)
# Remove missing values...
df <- df %>%
filter(is.na(latitude) == FALSE)
# Create the map...
plot_mapbox(df, lat = ~latitude, lon = ~longitude, mode = 'scattermapbox') %>%
add_markers(text = ~paste(sr_type, "\n", status),
color = ~status, colors = c("purple", "green", "red"), size = 3000, hoverinfo = "text") %>%
layout(
mapbox = list(zoom = 10, center = list(lat = ~median(df$latitude),
lon = ~median(df$longitude))),
title = "311 Calls in Chicago for the Past Week",
legend = list(orientation = 'h',
font = list(size = 8)))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
This exercise asks you to practice some of what we have learned about creating interactive graphics with plotly.
I didn’t apply too much here. df holds data for the entire week, so I just removed the ‘today’ filter from the original code chunk. The new donut chart is shown in the dashboard as well.
Using a similar method to the bar chart showing calls by community area, I created an interactive bar chart that shows the percentage of 311 calls completed by call type. This was actually pretty interesting to look at. However, to better serve context, I think showing the number of calls by type would assist this graph’s purpose. In addition, there were a number of call types that had zero completed; these are difficult to interact with on the graph.
df.counts <- df %>%
count(sr_type)
df.counts.top.ten <- df.counts %>%
arrange(desc(n)) %>%
slice_head(n = 10)
df.counts.top.ten %>% plot_ly(labels = ~sr_type, values = ~n) %>%
add_pie(hole = 0.6) %>%
layout(title = "Ten Most Common Calls by Type Between 10/14/20 and 10/21/20",
showlegend = FALSE,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
df.sr.type <- df %>%
group_by(sr_type, status) %>%
count() %>%
spread(status, n)
df.pct.completed <- df.sr.type %>%
mutate(total = sum(Canceled, Completed, Open, na.rm = TRUE)) %>%
mutate(pct.completed = (Completed/total)*100)
df.pct.completed[is.na(df.pct.completed)] <- 0
plot_ly(data = df.pct.completed, x = ~sr_type, y = ~pct.completed, type = 'bar',
name = 'Percentage of 311 Calls Completed by Type',
text = ~paste(pct.completed, "percent of calls completed"),
color = I("tan")) %>%
layout(title = "Percentage of 311 Calls Completed by Type",
xaxis = list(title = "", showticklabels = TRUE),
yaxis = list(title = ""))
# Count the number of calls by community area...
df.community.area <- df %>%
group_by(community_area) %>%
count()
# Download the Community Area boundaries so that
# we can use them for mapping...
ca.polys <- st_read("https://data.cityofchicago.org/api/geospatial/cauq-8yn6?method=export&format=GeoJSON", quiet = TRUE)
# Use an inner join from dplyr to link the 311
# call tallys to the polygons...
ca.polys.tb <- inner_join(df.community.area, ca.polys,by = c("community_area" = "area_numbe"))
# Create the bar chart!
plot_ly(data = ca.polys.tb, x = ~community_area, y = ~n, type = 'bar',
name = 'Calls By CCA',
text = ~paste("Total of ", n, "calls this week in", stringr::str_to_title(ca.polys.tb$community)),
color = I("orange")) %>%
layout(title = "311 Calls By Community Area",
xaxis = list(title = "", showticklabels = FALSE),
yaxis = list(title = ""))
ca.polys.sf <- inner_join(ca.polys, df.community.area, by = c("area_numbe" ="community_area"))
potholes <- subset(df, sr_type == "Pothole in Street Complaint")
# Remove missing values
potholes <- subset(potholes, is.na(longitude) == FALSE)
potholes <- potholes %>%
mutate(DaystoComplete = as.Date(potholes$closed_date) - as.Date(potholes$created_date))
# Convert to an sf object...
potholes.sf <- st_as_sf(x = potholes, coords = c("longitude", "latitude"),
crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
# Remove missing values again...
potholes.sf <- subset(potholes.sf, is.na(closed_date) == FALSE)
mapview(ca.polys.sf, layer.name = "311 Calls", zcol = "n",
map.types = c("CartoDB.Positron", "OpenStreetMap","Esri.WorldImagery"), color = "white",
legend=TRUE, popup=paste("There were", ca.polys.sf$n, "calls from", stringr::str_to_title(ca.polys.sf$community), "last week.")) +
mapview(potholes.sf, layer.name = "Reported Potholes", cex = 4,
col.regions = brewer.pal(5, "RdYlGn"), alpha = 0.3,
popup=paste("pothole in zipcode:", potholes.sf$zip_code, "took", potholes.sf$DaystoComplete, "days to complete"))
This part of the Lab Assignment asks you to manipulate the data from the above code chunk, generate a different interactive map with mapview, and send it to RPubs.
as.Date(potholes.sf$closed_date) - as.Date(potholes.sf$created_date)Ohare had by far the most 311 calls with 3621, but it did not have any potholes. Most community areas had below 500 calls for the week. There doesn’t look to be any correlation between latitude/longitude (i.e. location) and number of calls. In regard to the potholes, they look to be fairly scattered across the city except for the northern region. The northern region had fewer potholes, and most were completed in zero days. The downtown area had a few, but these were also mostly completed in zero days. The southern portion of the city’s potholes generally took the longest to complete, especially those that were completed in and around the West Englewood Community Area.
Reflection: Very useful assignment. It was great to see how easily interactive visuals can be made with R. There were a few kinks that took hours to understand and fix, but that’s just the nature of getting to know R. I also enjoyed revisiting the dplyr functions to create the new graphs. Overall, this was a pretty engaging assignment, and I look forward to writing up some stronger interpretations next week as we learn about linear regression methods.