data <- read.csv("PDI_Use_of_Force.csv", header = TRUE)
ggplot(data=data, aes(x=factor(data$OFFICER_GENDER), y=data$SUBJECT_RACE)) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))

ggplot(data=data, aes(y=factor(data$INCIDENT_DESCRIPTION), x=factor(data$DISTRICT))) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))

data2 <- read.csv("Crime_Incidents.csv", header = TRUE)
data2$DISTRICT[data2$DISTRICT == "CENTRAL BUSINESS"] = "Central Business"
data2$DISTRICT[data2$DISTRICT == "OTHER"] = "Other"

data2$DISTRICT_ZIP[data2$DISTRICT == "Central Business"] = 45202

data2$DISTRICT_ZIP[data2$DISTRICT == "1"] = 45214

data2$DISTRICT_ZIP[data2$DISTRICT == "2"] = 45208

data2$DISTRICT_ZIP[data2$DISTRICT == "3"] = 45238

data2$DISTRICT_ZIP[data2$DISTRICT == "4"] = 45229

data2$DISTRICT_ZIP[data2$DISTRICT == "5"] = 45223

data2$DISTRICT_ZIP[data2$DISTRICT == "Other"] = 45219
crime_ag_zip <- aggregate(data.frame(count=data2$OFFENSE), list(zip=data2$DISTRICT_ZIP, district=data2$DISTRICT), FUN = length)
crime_ag_zip
##     zip         district count
## 1 45214                1 29598
## 2 45208                2 35773
## 3 45238                3 75981
## 4 45229                4 50931
## 5 45223                5 45480
## 6 45202 Central Business  6095
## 7 45219            Other    78
data(zip_codes)
zips<-zip_codes[zip_codes$city=="Cincinnati",]
zips<-zips[zips$zip %in% crime_ag_zip$zip,]
#url <- "http://www2.census.gov/geo/tiger/TIGER2017/ZCTA510/tl_2017_us_zcta510.zip"
downloaddir<-getwd()
#destname<-"tl_2017_us_zcta510.zip"
#download.file(url, destname)
#unzip(destname, exdir=downloaddir, junkpaths=TRUE)


filename<-list.files(downloaddir, pattern=".shp", full.names=FALSE)
filename<-gsub(".shp", "", filename)


dat<-readOGR(downloaddir, "tl_2017_us_zcta510") 
## OGR data source with driver: ESRI Shapefile 
## Source: "E:/OneDrive/STAT 470/Police Data Case Study", layer: "tl_2017_us_zcta510"
## with 33144 features
## It has 9 fields
## Integer64 fields read as strings:  ALAND10 AWATER10
subdat<-dat[dat$GEOID10 %in% crime_ag_zip$zip,]

#subdat<-spTransform(subdat, CRS("+init=epsg:4326"))



subdat_data<-subdat@data[,c("GEOID10", "ALAND10", "AWATER10")]




subdat<-SpatialPolygonsDataFrame(subdat, data=subdat_data)
bins <- c(0, 25000, 50000, 75000, 100000)
pal <- colorBin("YlOrRd", domain = 1:100000, bins = bins)

labels <- sprintf(
  "<strong>%s</strong><br/>%g offences",
  crime_ag_zip$district, crime_ag_zip$count
) %>% lapply(htmltools::HTML)

labels1 <- sprintf(
  "<strong>%s</strong>",
  subdat$GEOID10
) %>% lapply(htmltools::HTML)

highlights <- highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE)

labeloptions <- labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")
cin <- leaflet(subdat) %>% setView(lat = 39.1031, lng = -84.5120, zoom = 12) %>% addProviderTiles(providers$CartoDB.Positron) %>% addPolygons(
  data=subdat, 
  fillColor = ~pal(crime_ag_zip$count),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.5,
  highlight = highlights,
  label = labels,
  labelOptions = labeloptions) %>%
  addLegend(pal = pal, values = ~1:100000, opacity = 0.7, title = 'No. of Offences',
  position = "bottomright")
  

cin
ggplot(data=crime_ag_zip, aes(x=crime_ag_zip$district, y=crime_ag_zip$count)) + geom_boxplot()