In this note, we use a real data set containing crime cases since 2015 in Philadelphia neighborhoods to illustrate various visualizations of visual analysis. We will use two different shapefiles that to draw the boundaries of neighborhoods and census tracts to plot various information available in the crime data set.
We will create both choropleth and reference maps to display different types of information.
The visual analysis is based on the following three data sets
## covid_vaccines_by_census_tract.geojson
phillyNeighbor <- st_read("https://pengdsci.github.io/STA553VIZ/w08/Neighborhoods_Philadelphia.geojson")
philly <- st_read("https://pengdsci.github.io/STA553VIZ/w08/PhillyNeighborhood-blocks.geojson") # block level data
phillyCrime <- read.csv("https://pengdsci.github.io/STA553VIZ/w08/PhillyCrimeSince2015.csv")
###
phillyCrime$name = toupper(gsub("-", "_", phillyCrime$neighborhood))
# extracting year from variable 'date'
slash.loc = unlist(gregexpr('/', phillyCrime$date))
slash2.loc = slash.loc[2*(1:dim(phillyCrime)[1])]
phillyCrime$year = substr(phillyCrime$date, slash2.loc+1, slash2.loc+4)
Since the crime data set has neighborhood name which will be used as a key to join the shapefile of the neighborhood. There is no variable in the crime data that can be used as a key to merge with the census tract shapefile. We aggregate relevant information at neighborhood level to make a choropleth map and make a scatterplot map to display information of individual crime cases.
SubCrime = phillyCrime[,c("dc_key","fatal","name","year")]
aggregateCrime = aggregate(SubCrime$fatal, by=list(SubCrime$name,SubCrime$year), FUN=length)
## Longitude and latitude of zip code center
ZipLon = aggregate(phillyCrime$lng, by=list(phillyCrime$zip_code), FUN=mean)
ZipLat = aggregate(phillyCrime$lat, by=list(phillyCrime$zip_code), FUN=mean)
ZipLonLat = merge(ZipLon, ZipLat, by = "Group.1")
names(ZipLonLat) = c("zip", "lng", "lat")
## Zip code crime
ZipCrimeTable = table(phillyCrime$fatal, phillyCrime$zip_code)
ZipCrime = data.frame(zip=as.numeric(names(table(phillyCrime$zip_code))),
fatal = table(phillyCrime$fatal, phillyCrime$zip_code)[1,],
nonfatal = table(phillyCrime$fatal, phillyCrime$zip_code)[2,],
total.crime = table(phillyCrime$zip_code)
)
ZipCrime = ZipCrime[, c("zip", "fatal", "nonfatal", "total.crime.Freq")]
colnames(ZipCrime) = c("zip", "fatal", "nonfatal", "total.crime")
ZipCrime = merge(ZipLonLat, ZipCrime, by = "zip")
##
pal <- colorFactor(c("red", "gold"), domain = c("Fatal", "Nonfatal"))
##
pnt = st_as_sf(data.frame(x = -75.1652, y = 39.9526),
coords = c("x", "y"),
crs = 4326)
media = st_as_sf(data.frame(x = -75.3877, y = 39.9168),
coords = c("x", "y"),
crs = 4326)
##
ageDistloc = st_as_sf(data.frame(x = -75.3677, y = 39.9168),
coords = c("x", "y"),
crs = 4326)
##
ziploc = st_as_sf(data.frame(x = -75.3477, y = 39.9168),
coords = c("x", "y"),
crs = 4326)
##
fig <- plot_ly(ZipCrime, x = ~lng, y = ~lat, color = ~zip, size = ~total.crime, #colors = colors,
type = 'scatter',
mode = 'markers',
#sizes = c(min(log(ZipCrime$total.crime)), max(log(ZipCrime$total.crime))),
marker = list(symbol = 'circle',
sizemode = 'diameter',
line = list(width = 2, color = '#FFFFFF')),
text = ~paste('Zip Code:', zip,
'<br>Total Crime:', total.crime,
'<br>Fatal Crime:', fatal,
'<br>Nonfatal Crime:', nonfatal)) %>% hide_colorbar()
##
img = "https://pengdsci.github.io/STA553VIZ/w08/PhillyCityHall.jpg"
trend = "https://pengdsci.github.io/STA553VIZ/w08/CrimeTrend.jpg"
ageDist = "https://pengdsci.github.io/STA553VIZ/w08/ageDist.jpg"
trendIcon = "https://pengdsci.github.io/STA553VIZ/w08/trend-icon.jpg"
######################################
## Data analysis
## 1. age distribution by age
fatal.cols <- c("#F76D5E", "#72D8FF")
# Basic density plot in ggplot2
fatalAge = ggplot(phillyCrime, aes(x = age, fill =fatal )) +
geom_density(alpha = 0.7) +
scale_fill_manual(values = fatal.cols)
##################################################
## 2. Frequency distribution: fatal vs nonfatal
#fatal.Dist = pander(table(phillyCrime$fatal, phillyCrime$race))
##################################################
## popup interactive graphs with plotly
fl = tempfile(fileext = ".html")
saveWidget(fig, file = fl)
##
leaflet() %>%
setView(lng=-75.2427, lat=40.0107, zoom = 11) %>%
addProviderTiles(providers$CartoDB.DarkMatter) %>%
#addTiles(providers$CartoDB.PositronNoLabels) %>%
addMiniMap() %>%
addPolygons(data = phillyNeighbor,
color = 'skyblue',
weight = 1) %>%
addCircleMarkers(data = phillyCrime,
radius = ~ifelse(fatal == "Fatal", 5, 3),
color = ~pal(fatal),
stroke = FALSE,
fillOpacity = 0.5,
popup = ~popupTable(phillyCrime)) %>%
addCircleMarkers(data = pnt,
color = "blue",
weight = 2,
label = "City Hall",
stroke = FALSE,
fillOpacity = 0.95,
group = "pnt") %>%
addPopupImages(img,
width = 100,
height = 120,
tooltip = FALSE,
group = "pnt") %>%
addCircleMarkers(data = media,
color = "red",
weight = 2,
label = "Trend",
stroke = FALSE,
fillOpacity = 0.95,
group = "media") %>%
addPopupImages(trend,
width = 500,
height = 400,
tooltip = FALSE,
group = "media") %>%
addCircleMarkers(data = ageDistloc,
color = "skyblue",
weight = 2,
label = "Age Distribution",
stroke = FALSE,
fillOpacity = 0.95,
group = "ageDistloc") %>%
addPopupImages(ageDist,
width = 500,
height = 400,
tooltip = FALSE,
group = "ageDistloc") %>%
addCircleMarkers(data = ziploc,
color = "white",
weight = 2,
label = "ZIP Location",
stroke = FALSE,
fillOpacity = 0.95,
group = "ziploc") %>%
#addPopupGraphs(list(fig),
# width = 500,
# height = 500,
# tooltip = FALSE,
# group = "ziploc") %>%
# addMarkers(
# ~Long, ~Lat, group="3" ) %>%
leafpop:::addPopupIframes(
source = fl,
group = "ziploc"
)