The goal of this work is to get acquinented with the Leaflet framework which allows to create and customize interactive maps using the ‘Leaflet’ JavaScript library and the ‘htmlwidgets’ package. The maps can be used directly from the R console, from ‘RStudio’, in Shiny applications and R Markdown documents.
More information can be retrieved from: https://cran.r-project.org/web/packages/leaflet/index.html
In this work we will find cities with the highest crime rates for every state from data set and and show them on the USA map which will contain marks and circles elements with radiuses depended on the value of the crime rate.
For the work we will use data from two sources:
library(dplyr)
offenceReportUrl <- "https://ucr.fbi.gov/crime-in-the-u.s/2018/preliminary-report/tables/table-4/table-4.xls"
geoPositionsUrl <- "https://www.downloadexcelfiles.com/sites/default/files/docs/lat-long-cities-usa-1507j.xlsx"
for(url in c(offenceReportUrl, geoPositionsUrl)) {
fileName <- URLdecode(url) %>% basename()
if (!file.exists(fileName)) {
download.file(url, fileName, method = "curl")
cat("\n=====>The file [", fileName, "] was downloaded successfully")
} else {
cat("\n=====>The file [", fileName, "] already exists!")
}
}
#install.packages("readxl")
library("readxl")
offenceReportFileName <- URLdecode(offenceReportUrl) %>% basename()
geoPositionsFileName <- URLdecode(geoPositionsUrl) %>% basename()
# read files
offenceReportDataFrame <- read_excel(offenceReportFileName, skip=4)
geoPositionsDataFrame <- read_excel(geoPositionsFileName)
colnames(offenceReportDataFrame)[3] <- "Year"
colnames(offenceReportDataFrame)[4] <- "Population"
colnames(offenceReportDataFrame)[5] <- "Violent_Crime"
#install.packages("dplyr")
library(dplyr)
offenceReportDataFrame <- select(filter(offenceReportDataFrame, Year == 2017), c(State, City, Year, Population, Violent_Crime))
stateList <- offenceReportDataFrame$State
notEmptyName <- "foobar"
for(i in 1 : length(stateList)) {
if(!is.na(stateList[i])) {
notEmptyName <- stateList[i]
} else {
stateList[i] <- notEmptyName
}
}
offenceReportDataFrame$State <- stateList
tidyOffenceReportDataFrame <- offenceReportDataFrame
# Calculate crime rate for every city
tidyOffenceReportDataFrame$Crime_Rate <- tidyOffenceReportDataFrame$Violent_Crime / tidyOffenceReportDataFrame$Population
# find the most crimiest city in every state
tidyOffenceReportDataFrame <- merge(aggregate(Crime_Rate ~ State, tidyOffenceReportDataFrame, max), tidyOffenceReportDataFrame, by = c("Crime_Rate"))
tidyOffenceReportDataFrame <- subset(tidyOffenceReportDataFrame, select = -c(3))
tidyOffenceReportDataFrame <- tidyOffenceReportDataFrame[,c(2, 3, 4, 1, 5)]
colnames(tidyOffenceReportDataFrame)[1] <- "State"
# sort by crime rate
tidyOffenceReportDataFrame <- tidyOffenceReportDataFrame[order(-tidyOffenceReportDataFrame$Crime_Rate), ]
# because of presence of reference numbers in the report, we need to get rid of them
tidyOffenceReportDataFrame$City <- replace(as.character(tidyOffenceReportDataFrame$City), tidyOffenceReportDataFrame$City == "NEW ORLEANS7", "NEW ORLEANS")
tidyOffenceReportDataFrame$City <- replace(as.character(tidyOffenceReportDataFrame$City), tidyOffenceReportDataFrame$City == "HONOLULU5", "HONOLULU")
tidyOffenceReportDataFrame$City <- replace(as.character(tidyOffenceReportDataFrame$City), tidyOffenceReportDataFrame$City == "HOUSTON5", "HOUSTON")
tidyOffenceReportDataFrame$City <- replace(as.character(tidyOffenceReportDataFrame$City), tidyOffenceReportDataFrame$City == "NASHVILLE METROPOLITAN", "NASHVILLE")
crimeRatesDataFrame <- tidyOffenceReportDataFrame
geoPositionsDataFrame$State <- toupper(geoPositionsDataFrame$State)
geoPositionsDataFrame$City <- toupper(geoPositionsDataFrame$City)
# merge two data sets
crimeRatesDataFrame <- merge(crimeRatesDataFrame, geoPositionsDataFrame, by = c("State", "City"))
# split geo coordinates
splitGeo <- data.frame(do.call('rbind', strsplit(as.character(crimeRatesDataFrame$Location),' ',fixed=TRUE)))
colnames(splitGeo) <- c('Latitude', 'Longitude')
# extract geo coordinates
#install.packages("stringr")
library("stringr")
splitGeo$Latitude <- as.numeric(str_extract(splitGeo$Latitude, "\\d+\\.*\\d*"))
splitGeo$Longitude <- 0 - as.numeric(str_extract(splitGeo$Longitude, "\\d+\\.*\\d*"))
crimeRatesDataFrame <- cbind(crimeRatesDataFrame, splitGeo)
geoCrimeRatesDataFrame <- crimeRatesDataFrame
#install.packages('leaflet')
library(leaflet)
map <- leaflet() %>% addTiles()
map <- map %>%
addMarkers(lat = geoCrimeRatesDataFrame$Latitude,
lng = geoCrimeRatesDataFrame$Longitude,
popup = paste(geoCrimeRatesDataFrame$City,
"<br>State: ", formatC(geoCrimeRatesDataFrame$State),
"<br>Population, people: ", formatC(geoCrimeRatesDataFrame$Population, format = "d", big.mark = ","),
"<br>Crime rate, per 100k people: ", formatC(geoCrimeRatesDataFrame$Crime_Rate * 100000, format = "d", big.mark = ",")
)
) %>%
addCircles(lat = geoCrimeRatesDataFrame$Latitude,
lng = geoCrimeRatesDataFrame$Longitude,
weight = 1,
radius = geoCrimeRatesDataFrame$Crime_Rate * 2 * 10^7,
)
map