Advanced Map Assignment: Philly Crimes

Philly Crimes Interactive Map

The first part of this assignment is to create a reference map using the Philly Crimes dataset. This is longitudinal data that contains crime cases since 2015. It has varibles such as date, sex, fatality, and the exact location of the crime in x and y coordinates. This dataset provedes many insights and can help people in the area understand patterns and areas to avoid.

library(lubridate) 



phillyCrime <- read.csv('PhillyCrimeSince2015.csv')
#dates <- as.POSIXct(data_frame$date, format = "%m/%d/%Y %H:%M")
#format(data_frame, format="%Y")

#crime <-  data.frame( date = format(dates, format = "%m/%d/%Y %H:%M") ,
 #                         Year= format(dates, format = "%Y"), data_frame)


phillyCrime$name = toupper(gsub("-", "_", phillyCrime$neighborhood)) 
# extracting year from variable 'date'
slash.loc = unlist(gregexpr('/', phillyCrime$date))
slash2.loc = slash.loc[2*(1:dim(phillyCrime)[1])]
phillyCrime$year = substr(phillyCrime$date, slash2.loc+1, slash2.loc+4)



#############################3
SubCrime = phillyCrime[,c("dc_key","fatal","name","year")]
aggregateCrime = aggregate(SubCrime$fatal, by=list(SubCrime$name,SubCrime$year), FUN=length)
## Longitude and latitude of zip code center
ZipLon = aggregate(phillyCrime$lng, by=list(phillyCrime$zip_code), FUN=mean)
ZipLat = aggregate(phillyCrime$lat, by=list(phillyCrime$zip_code), FUN=mean)
ZipLonLat = merge(ZipLon, ZipLat, by = "Group.1")
names(ZipLonLat) = c("zip", "lng", "lat")
##  Zip code crime
ZipCrimeTable = table(phillyCrime$fatal,  phillyCrime$zip_code)
ZipCrime = data.frame(zip=as.numeric(names(table(phillyCrime$zip_code))), 
                      fatal = table(phillyCrime$fatal, phillyCrime$zip_code)[1,],
                      nonfatal = table(phillyCrime$fatal, phillyCrime$zip_code)[2,],
                      total.crime = table(phillyCrime$zip_code) 
                      )
ZipCrime = ZipCrime[, c("zip", "fatal", "nonfatal", "total.crime.Freq")]
colnames(ZipCrime) = c("zip", "fatal", "nonfatal", "total.crime")
ZipCrime = merge(ZipLonLat, ZipCrime, by = "zip")
SubCrime = phillyCrime[,c("dc_key","fatal","name","year")]
aggregateCrime = aggregate(SubCrime$fatal, by=list(SubCrime$name,SubCrime$year), FUN=length)
## Longitude and latitude of zip code center
ZipLon = aggregate(phillyCrime$lng, by=list(phillyCrime$zip_code), FUN=mean)
ZipLat = aggregate(phillyCrime$lat, by=list(phillyCrime$zip_code), FUN=mean)
ZipLonLat = merge(ZipLon, ZipLat, by = "Group.1")
names(ZipLonLat) = c("zip", "lng", "lat")
##  Zip code crime
ZipCrimeTable = table(phillyCrime$fatal,  phillyCrime$zip_code)
ZipCrime = data.frame(zip=as.numeric(names(table(phillyCrime$zip_code))), 
                      fatal = table(phillyCrime$fatal, phillyCrime$zip_code)[1,],
                      nonfatal = table(phillyCrime$fatal, phillyCrime$zip_code)[2,],
                      total.crime = table(phillyCrime$zip_code) 
                      )
ZipCrime = ZipCrime[, c("zip", "fatal", "nonfatal", "total.crime.Freq")]
colnames(ZipCrime) = c("zip", "fatal", "nonfatal", "total.crime")
ZipCrime = merge(ZipLonLat, ZipCrime, by = "zip")

Below is the code for the interactive reference map and some data analysis:

phillyNeighbor  <- st_read("https://pengdsci.github.io/STA553VIZ/w08/Neighborhoods_Philadelphia.geojson")
Reading layer `Neighborhoods_Philadelphia' from data source 
  `https://pengdsci.github.io/STA553VIZ/w08/Neighborhoods_Philadelphia.geojson' 
  using driver `GeoJSON'
Simple feature collection with 158 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -75.28027 ymin: 39.867 xmax: -74.95576 ymax: 40.13799
Geodetic CRS:  WGS 84
## Define a color palette
pal <- colorFactor(c("red", "gold"), domain = c("Fatal", "Nonfatal"))

## Define objects with geo-coordinate system to plot specific information
pnt = st_as_sf(data.frame(x = -75.1652, y = 39.9526),
                coords = c("x", "y"),
                crs = 4326)
media = st_as_sf(data.frame(x = -75.3877, y = 39.9168),
                coords = c("x", "y"),
                crs = 4326)
ageDistloc = st_as_sf(data.frame(x = -75.3677, y = 39.9168),
                coords = c("x", "y"),
                crs = 4326)
ziploc = st_as_sf(data.frame(x = -75.3477, y = 39.9168),
                coords = c("x", "y"),
                crs = 4326)
aniloc = st_as_sf(data.frame(x = -75.3277, y = 39.9168),
                coords = c("x", "y"),
                crs = 4326)
ageDistloc01 = st_as_sf(data.frame(x = -75.3077, y = 39.9168),
                coords = c("x", "y"),
                crs = 4326)
#### Caution: plot_ly DID NOT show in the popup of leaflet. 
fig <- plot_ly(ZipCrime, x = ~lng, y = ~lat, color = ~zip, size = ~total.crime, #colors = colors,
               type = 'scatter', 
               mode = 'markers', 
               #sizes = c(min(log(ZipCrime$total.crime)), max(log(ZipCrime$total.crime))),
               marker = list(symbol = 'circle', 
                           sizemode = 'diameter',
                               line = list(width = 2, color = '#FFFFFF')),
               text = ~paste('Zip Code:', zip, 
                      '<br>Total Crime:', total.crime, 
                      '<br>Fatal Crime:', fatal,
                      '<br>Nonfatal Crime:', nonfatal)) %>% hide_colorbar()
## Images to be plot on the map
img = "https://pengdsci.github.io/STA553VIZ/w08/PhillyCityHall.jpg"
trend = "https://pengdsci.github.io/STA553VIZ/w08/CrimeTrend.jpg"
ageDist = "https://pengdsci.github.io/STA553VIZ/w08/ageDist.jpg"
trendIcon = "https://pengdsci.github.io/STA553VIZ/w08/trend-icon.jpg"
crimeByYear= "https://pengdsci.github.io/STA553VIZ/w08/PhillyCrimeByYear.gif"
## HTML style for the map title
tag.map.title <- tags$style(HTML("
               .leaflet-control.map-title {
                   transform: translate(50%,50%);
                   position: fixed !important;
                   left: 5%;
                   text-align: center;
                   padding-left: 10px;
                   padding-right: 10px;
                   background: transparent;
                   font-weight: bold;
                   font-size: 18px;}
                 "))

rr <- tags$div(
   HTML('<img border="0" alt="ImageTitle" src="https://pengdsci.github.io/STA553VIZ/w08/leafTitle.png" width="200" height="45">')
 ) 

######################################
######################################
## Data analysis
## 1. age distribution by age
fatal.cols <- c("#F76D5E", "#72D8FF")
# Basic density plot in ggplot2
fatalAge = ggplot(phillyCrime, aes(x = age, fill =fatal )) +
           geom_density(alpha = 0.7)  +
           scale_fill_manual(values = fatal.cols)
##
tble = as.matrix(table(phillyCrime$race, phillyCrime$fatal))
dat.tbl = data.frame(race=rownames(tble), fatal = as.vector(tble[, 1]), non.fatal = as.vector(tble[, 2]))
popups = paste(str_pad("Race", 20, "right"),  str_pad("Fatal", 20, "right"), str_pad("Nonfatal", 20, "right"), 
"<br> Asian"  ,strrep(' ', 10),     25,strrep(' ', 6), strrep(' ', 20),  "86",
"<br> Black (Non-Hispanic)", strrep(' ', 4),  "2628",strrep(' ', 5),   "9924",
"<br> Hispanic (Black or White)", strrep(' ', 2),   "393",strrep(' ', 5),  "1482",
"<br> Other/Unknown", strrep(' ', 9), "2",strrep(' ', 5),     "128",
"<br> White (Non-Hispanic)", strrep(' ', 5),  "169", strrep(' ', 5), "683") 
#poptble <- tble %>% htmlTable
##################################################
## Crime distribution by zip code - bubble plot
pal0 <- colorNumeric(palette = viridis(256, option = "B"), domain = range(ZipCrime$total.crime))
zipfig <- leaflet() %>%
  setView(lng=-75.1527, lat=39.9707, zoom = 11) %>%
  addProviderTiles(providers$Esri.WorldGrayCanvas) %>% 
  ## plot the neighborhood boundary of Philly
  addPolygons(data = phillyNeighbor,
              color = 'skyblue',
              weight = 1)  %>%
  ##
  addCircleMarkers(data = ZipCrime,
                   radius = ~((total.crime)^(1/3)),
                   color = ~pal0(total.crime),
                   stroke = FALSE, 
                   fillOpacity = 0.5,
                   popup = ~paste('Zip Code:', zip, 
                      '<br>Total Crime:', total.crime, 
                      '<br>Fatal Crime:', fatal,
                      '<br>Nonfatal Crime:', nonfatal)) 
##################################################
## popup interactive graphs with plotly
fl = tempfile(fileext = ".html")
saveWidget(zipfig, file = fl)
###
leaflet() %>%
  setView(lng=-75.15092, lat=40.00995, zoom = 11) %>%
  #addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  ##
  addProviderTiles(providers$CartoDB.DarkMatter, group="Dark") %>%
  addProviderTiles(providers$CartoDB.DarkMatterNoLabels, group="DarkLabel") %>%  
  addProviderTiles(providers$Esri.NatGeoWorldMap, group="Esri") %>%
  #addTiles(providers$CartoDB.PositronNoLabels) %>%
  addControl(rr, position = "topleft", className="map-title") %>%
  ## mini reference map
  addMiniMap() %>%
  ## neighborhood boundary
  addPolygons(data = phillyNeighbor,
              color = 'skyblue',
              weight = 1)  %>%
  ## plot information on the map
  addCircleMarkers(data = phillyCrime,
                   radius = ~ifelse(fatal == "Fatal", 5, 3),
                   color = ~pal(fatal),
                   stroke = FALSE, 
                   fillOpacity = 0.5,
                   popup = ~popupTable(phillyCrime),
                   clusterOptions = markerClusterOptions(maxClusterRadius = 40)) %>%
  # Adding the image of city hall
  addCircleMarkers(data = pnt, 
                   color = "blue",
                   weight = 2,
                   label = "City Hall",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   group = "pnt") %>%
  addPopupImages(img, 
                  width = 100,
                  height = 120,
                  tooltip = FALSE,
                  group = "pnt")  %>%
  # Trend of crimes over the years
  addCircleMarkers(data = media, 
                   color = "red",
                   weight = 2,
                   label = "Trend",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   group = "media") %>%
  addPopupImages(trend, 
                  width = 500,
                  height = 350,
                  tooltip = FALSE,
                  group = "media") %>%
 # age distribution within the two types of crimes
 addCircleMarkers(data = ageDistloc, 
                   color = "skyblue",
                   weight = 2,
                   label = "Age Distribution",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   group = "ageDistloc") %>%
  addPopupImages(ageDist, 
                  width = 500,
                  height = 320,
                  tooltip = FALSE,
                  group = "ageDistloc") %>%
  # Crime counts by zip code: bubble plot
  addCircleMarkers(data = ziploc, 
                   color = "white",
                   weight = 2,
                   label = "ZIP Location",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   group = "ziploc") %>%
  leafpop:::addPopupIframes(
                  source = fl,
                   width = 500,
                  height = 400,
                   group = "ziploc" ) %>%
  # Animated (cumulative) crime counts by years
  addCircleMarkers(data = aniloc, 
                   color = "gold",
                   weight = 2,
                   label = "Crimes Accumulation by year",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   group = "aniloc") %>%
  addPopupImages(crimeByYear, 
                  width = 500,
                  height = 400,
                  tooltip = FALSE,
                  group = "aniloc") %>% 
  # Crime counts by zip code: bubble plot
  addCircleMarkers(data = ageDistloc01, 
                   color = "purple",
                   weight = 2,
                   label = "Contingency Table: Race vs Crime Type",
                   stroke = FALSE, 
                   fillOpacity = 0.95,
                   popup = ~popups) %>%
  addLayersControl(baseGroups = c('Dark', 'DarkLabel', 'Esri'),
                   overlayGroups = c("Crime Data"),
                   options = layersControlOptions(collapsed = TRUE)) %>%
  ##
  browsable()

The animated plot (gif) is based on the following code:

which_state <- "pennsylvania"
county_info <- map_data("county", region=which_state)
philly_info <- county_info[county_info$subregion == "philadelphia",]
phillyCrime$YEAR = as.integer(as.numeric(phillyCrime$year))
pal <- colorFactor(c("red", "gold"), domain = c("Fatal", "Nonfatal"))
##
base_map <- ggplot(data = philly_info, mapping = aes(x = long, y = lat, group = group)) +
 geom_polygon(color = "red", fill = "white") +
  coord_quickmap() +
  theme_void() 

map_with_data <- base_map +
  geom_point(data = phillyCrime, aes(x = lng, y = lat, group=YEAR))

map_with_data <- base_map +
  geom_point(data = phillyCrime, aes(x = lng, y = lat, color = fatal, fill = fatal, group=year), size = 2, alpha = 0.5)  +
  scale_shape_manual(values = c(21, 24)) +
  scale_color_manual(values = c("#FC4E07", "#E7B800") )

map_with_animation <- map_with_data +
  transition_time(YEAR) +
  ggtitle('Year: {frame_time}') +
   shadow_mark()

anim_save("PhillyCrimeByYear.gif", map_with_animation)
#animate(map_with_animation)

Analysis

When looking at the trend of the data set, there is a clear job beginning in 2019 - 2023. This is especially evident for nonfatal crimes. This time frame was during the COVID pandemic, which forced people to spend more time together at their homes, which could contribute to the spike in crime. The animated graph shows visually how much crime was committed during certain years, this showed the big spike beginning in 2019 and 2020. When looking at the age distribution for fatal and nonfatal crimes, both are very similar with the median age being about 25 years old. The popup zip code reference map indicates specific zip codes where the crimes took place. The yellow circle indicated areas with the most crime, followed by red. Areas marked by a grey circle, are those with the least amount of crime. Finally the last popup displays the which race committed fatal and nonfatal crimes.The largest group was Black, followed by Hispanics. This could be due to lower incomes household and financial stress.

Part 2 Presidental Election Map

The second part of this assignment takes tow data sets to create a map of the presidential election during 2020. The data set contains the year, political party, state, county, and votes. This map will help give a visual as to how states and areas of the United States tend to vote.

The next block of code will be data management to filter and merge the two data sets. The final merged dataset has 9942 observations based on 11 variables.

Election <- read.csv('countypresidential_election_2000-2020.csv')
fipsgeo <- read.csv('fips2geocode.csv')

Election2020 <- Election %>%
  filter(year=='2020')%>%
  filter(party == 'DEMOCRAT' | party == 'REPUBLICAN')%>%
  dplyr::select("state_po", "county_name","county_fips", "party", "candidatevotes")

colnames(Election2020)[colnames(Election2020) == 'county_fips'] <- 'fips'
Election2020fips <- merge(Election2020, fipsgeo, by = "fips")

Now that the data is aggregated, the following code will create the map.

 pal = colorFactor(palette=c("blue", "red"), domain = Election2020fips$party)

fig2 <- label.msg <- paste(paste("County:", Election2020fips$county), "\n")

leaflet(Election2020fips) %>% 
  addTiles()  %>% 
  setView( lat=32,lng=-80 , zoom=3)%>%
  addCircleMarkers(~lon, 
            ~lat,
            color = ~pal(Election2020fips$party),label = ~label.msg)%>%
 addLegend(position = "bottomright", 
            colors = c("blue", "red"),
            labels= c("Democrat", "Repubilcan"),
            title= "Political Parties",
            opacity = 0.4) 

When looking at the map, there are certain distinctions when it comes to political parties.The southern coast is predominantly republican, while the east coast and California are democrat. Zooming in on the map, you can see in rural counties the political party is republican and the cities are democrat. This is why the electoral college was created, so that the parties are represented across the US as a whole and not just in cities.