data <- read.csv("PDI_Use_of_Force.csv", header = TRUE)
ggplot(data=data, aes(x=factor(data$OFFICER_GENDER), y=data$SUBJECT_RACE)) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
ggplot(data=data, aes(y=factor(data$INCIDENT_DESCRIPTION), x=factor(data$DISTRICT))) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
data2 <- read.csv("Crime_Incidents.csv", header = TRUE)
data2$DISTRICT[data2$DISTRICT == "CENTRAL BUSINESS"] = "Central Business"
data2$DISTRICT[data2$DISTRICT == "OTHER"] = "Other"
data2$DISTRICT_ZIP[data2$DISTRICT == "Central Business"] = 45202
data2$DISTRICT_ZIP[data2$DISTRICT == "1"] = 45214
data2$DISTRICT_ZIP[data2$DISTRICT == "2"] = 45208
data2$DISTRICT_ZIP[data2$DISTRICT == "3"] = 45238
data2$DISTRICT_ZIP[data2$DISTRICT == "4"] = 45229
data2$DISTRICT_ZIP[data2$DISTRICT == "5"] = 45223
data2$DISTRICT_ZIP[data2$DISTRICT == "Other"] = 45219
crime_ag_zip <- aggregate(data.frame(count=data2$OFFENSE), list(zip=data2$DISTRICT_ZIP, district=data2$DISTRICT), FUN = length)
crime_ag_zip
## zip district count
## 1 45214 1 29598
## 2 45208 2 35773
## 3 45238 3 75981
## 4 45229 4 50931
## 5 45223 5 45480
## 6 45202 Central Business 6095
## 7 45219 Other 78
data(zip_codes)
zips<-zip_codes[zip_codes$city=="Cincinnati",]
zips<-zips[zips$zip %in% crime_ag_zip$zip,]
#url <- "http://www2.census.gov/geo/tiger/TIGER2017/ZCTA510/tl_2017_us_zcta510.zip"
downloaddir<-getwd()
#destname<-"tl_2017_us_zcta510.zip"
#download.file(url, destname)
#unzip(destname, exdir=downloaddir, junkpaths=TRUE)
filename<-list.files(downloaddir, pattern=".shp", full.names=FALSE)
filename<-gsub(".shp", "", filename)
dat<-readOGR(downloaddir, "tl_2017_us_zcta510")
## OGR data source with driver: ESRI Shapefile
## Source: "E:/OneDrive/STAT 470/Police Data Case Study", layer: "tl_2017_us_zcta510"
## with 33144 features
## It has 9 fields
## Integer64 fields read as strings: ALAND10 AWATER10
subdat<-dat[dat$GEOID10 %in% crime_ag_zip$zip,]
#subdat<-spTransform(subdat, CRS("+init=epsg:4326"))
subdat_data<-subdat@data[,c("GEOID10", "ALAND10", "AWATER10")]
subdat<-SpatialPolygonsDataFrame(subdat, data=subdat_data)
bins <- c(0, 25000, 50000, 75000, 100000)
pal <- colorBin("YlOrRd", domain = 1:100000, bins = bins)
labels <- sprintf(
"<strong>%s</strong><br/>%g offences",
crime_ag_zip$district, crime_ag_zip$count
) %>% lapply(htmltools::HTML)
labels1 <- sprintf(
"<strong>%s</strong>",
subdat$GEOID10
) %>% lapply(htmltools::HTML)
highlights <- highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE)
labeloptions <- labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")
cin <- leaflet(subdat) %>% setView(lat = 39.1031, lng = -84.5120, zoom = 12) %>% addProviderTiles(providers$CartoDB.Positron) %>% addPolygons(
data=subdat,
fillColor = ~pal(crime_ag_zip$count),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.5,
highlight = highlights,
label = labels,
labelOptions = labeloptions) %>%
addLegend(pal = pal, values = ~1:100000, opacity = 0.7, title = 'No. of Offences',
position = "bottomright")
cin
ggplot(data=crime_ag_zip, aes(x=crime_ag_zip$district, y=crime_ag_zip$count)) + geom_boxplot()
data123 <- read.csv("E:/OneDrive/STAT 470/Police Data Case Study/Police_Calls_For_Service__with_Time_Dispatched___Transmit_Time_.csv", header = TRUE)
ggplot(data=data123, aes(x=data123$CREATE_TIME_TIMEONLY, y=factor(data123$INCIDENT_TYPE_ID))) + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
#chisq.test(x=factor(data123$INCIDENT_TYPE_ID), y=data123$CREATE_TIME_TIMEONLY)
ggplot(data=data123, aes(x=data123$CREATE_TIME_TIMEONLY)) + geom_histogram(binwidth = 1, bins = 100) + labs(x="TIME OF DAY", y="Number of Offences", title="Histogram of Offences per Hour")
calls <- aggregate(data.frame(calls = data123$CREATE_TIME_INCIDENT), list(time = data123$CREATE_TIME_TIMEONLY), FUN = length)
calls$calls <- calls$calls/365
fit <- nls(calls ~ C + A*sin(2*pi*time*F), data=calls, start=list(A = 60, F = .08, C=60), trace=T)
## 190778.9 : 60.00 0.08 60.00
## 44101.26 : -12.57128238 0.08038137 120.15355782
## 44025.7 : -12.51370057 0.07886289 120.06720967
## 43729.19 : -12.86148809 0.07598434 120.47555874
## 42153.3 : -14.53329989 0.07008223 121.69285030
## 35143.25 : -20.77751387 0.06050761 124.69671861
## 17744.14 : -31.65156208 0.04740038 125.04874461
## 9748.549 : -57.43044538 0.04555843 120.84071964
## 9616.175 : -57.51449876 0.04614959 120.73075657
## 9614.363 : -57.60950038 0.04621784 120.79771319
## 9614.336 : -57.61304854 0.04622625 120.80322630
## 9614.336 : -57.61337979 0.04622731 120.80387633
## 9614.336 : -57.61341962 0.04622744 120.80395755
fitt <- fitted.values(fit)
summary(fit)
##
## Formula: calls ~ C + A * sin(2 * pi * time * F)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## A -57.613420 6.469929 -8.905 1.42e-08 ***
## F 0.046227 0.001172 39.426 < 2e-16 ***
## C 120.803958 4.427618 27.284 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.4 on 21 degrees of freedom
##
## Number of iterations to convergence: 12
## Achieved convergence tolerance: 3.112e-06
plot(x=calls$time, y=calls$calls, xlab="Hour of the Day", ylab="Calls Made", main="Plot of Average Calls per Hour with Model Overlay", sub="Model: Calls = 120.804 - 57.613*sin(2*pi*Hour*0.046)")
lines(x=calls$time,predict(fit), col="RED")
data1234 <- data123
plot(data1234$CREATE_TIME_TIMEONLY, data1234$ALARM_LEVEL)
chisq.test(y=factor(data1234$ALARM_LEVEL), x=data1234$CREATE_TIME_TIMEONLY)
## Warning in chisq.test(y = factor(data1234$ALARM_LEVEL), x =
## data1234$CREATE_TIME_TIMEONLY): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data1234$CREATE_TIME_TIMEONLY and factor(data1234$ALARM_LEVEL)
## X-squared = 17441, df = 805, p-value < 2.2e-16
data <- read.csv("PDI_Use_of_Force.csv", header = TRUE)
ggplot(data=data, aes(x=factor(data$OFFICER_GENDER), y=data$SUBJECT_RACE)) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
ggplot(data=data, aes(y=factor(data$INCIDENT_DESCRIPTION), x=factor(data$DISTRICT))) + geom_boxplot() + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
data2 <- read.csv("Crime_Incidents.csv", header = TRUE)
data2$DISTRICT[data2$DISTRICT == "CENTRAL BUSINESS"] = "Central Business"
data2$DISTRICT[data2$DISTRICT == "OTHER"] = "Other"
data2$DISTRICT_ZIP[data2$DISTRICT == "Central Business"] = 45202
data2$DISTRICT_ZIP[data2$DISTRICT == "1"] = 45214
data2$DISTRICT_ZIP[data2$DISTRICT == "2"] = 45208
data2$DISTRICT_ZIP[data2$DISTRICT == "3"] = 45238
data2$DISTRICT_ZIP[data2$DISTRICT == "4"] = 45229
data2$DISTRICT_ZIP[data2$DISTRICT == "5"] = 45223
data2$DISTRICT_ZIP[data2$DISTRICT == "Other"] = 45219
crime_ag_zip <- aggregate(data.frame(count=data2$OFFENSE), list(zip=data2$DISTRICT_ZIP, district=data2$DISTRICT), FUN = length)
crime_ag_zip
data(zip_codes)
zips<-zip_codes[zip_codes$city=="Cincinnati",]
zips<-zips[zips$zip %in% crime_ag_zip$zip,]
#url <- "http://www2.census.gov/geo/tiger/TIGER2017/ZCTA510/tl_2017_us_zcta510.zip"
downloaddir<-getwd()
#destname<-"tl_2017_us_zcta510.zip"
#download.file(url, destname)
#unzip(destname, exdir=downloaddir, junkpaths=TRUE)
filename<-list.files(downloaddir, pattern=".shp", full.names=FALSE)
filename<-gsub(".shp", "", filename)
dat<-readOGR(downloaddir, "tl_2017_us_zcta510")
subdat<-dat[dat$GEOID10 %in% crime_ag_zip$zip,]
#subdat<-spTransform(subdat, CRS("+init=epsg:4326"))
subdat_data<-subdat@data[,c("GEOID10", "ALAND10", "AWATER10")]
subdat<-SpatialPolygonsDataFrame(subdat, data=subdat_data)
bins <- c(0, 25000, 50000, 75000, 100000)
pal <- colorBin("YlOrRd", domain = 1:100000, bins = bins)
labels <- sprintf(
"<strong>%s</strong><br/>%g offences",
crime_ag_zip$district, crime_ag_zip$count
) %>% lapply(htmltools::HTML)
labels1 <- sprintf(
"<strong>%s</strong>",
subdat$GEOID10
) %>% lapply(htmltools::HTML)
highlights <- highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE)
labeloptions <- labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")
cin <- leaflet(subdat) %>% setView(lat = 39.1031, lng = -84.5120, zoom = 12) %>% addProviderTiles(providers$CartoDB.Positron) %>% addPolygons(
data=subdat,
fillColor = ~pal(crime_ag_zip$count),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.5,
highlight = highlights,
label = labels,
labelOptions = labeloptions) %>%
addLegend(pal = pal, values = ~1:100000, opacity = 0.7, title = 'No. of Offences',
position = "bottomright")
cin
ggplot(data=crime_ag_zip, aes(x=crime_ag_zip$district, y=crime_ag_zip$count)) + geom_boxplot()
data123 <- read.csv("E:/OneDrive/STAT 470/Police Data Case Study/Police_Calls_For_Service__with_Time_Dispatched___Transmit_Time_.csv", header = TRUE)
ggplot(data=data123, aes(x=data123$CREATE_TIME_TIMEONLY, y=factor(data123$INCIDENT_TYPE_ID))) + geom_count(color="blue") + ggtitle("Boxplot with Overlayed Density Plot") + guides(size = guide_legend("Density"))
#chisq.test(x=factor(data123$INCIDENT_TYPE_ID), y=data123$CREATE_TIME_TIMEONLY)
ggplot(data=data123, aes(x=data123$CREATE_TIME_TIMEONLY)) + geom_histogram(binwidth = 1, bins = 100) + labs(x="TIME OF DAY", y="Number of Offences", title="Histogram of Offences per Hour")
calls <- aggregate(data.frame(calls = data123$CREATE_TIME_INCIDENT), list(time = data123$CREATE_TIME_TIMEONLY), FUN = length)
calls$calls <- calls$calls/365
fit <- nls(calls ~ C + A*sin(2*pi*time*F), data=calls, start=list(A = 60, F = .08, C=60), trace=T)
fitt <- fitted.values(fit)
summary(fit)
plot(x=calls$time, y=calls$calls, xlab="Hour of the Day", ylab="Calls Made", main="Plot of Average Calls per Hour with Model Overlay", sub="Model: Calls = 120.804 - 57.613*sin(2*pi*Hour*0.046)")
lines(x=calls$time,predict(fit), col="RED")
data1234 <- data123
plot(data1234$CREATE_TIME_TIMEONLY, data1234$ALARM_LEVEL)
chisq.test(y=factor(data1234$ALARM_LEVEL), x=data1234$CREATE_TIME_TIMEONLY)