library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
X2018_UCR_PA <- read_excel("C:/Users/mhe29/OneDrive - Drexel University/CJS 310 R Files/2018.UCR.PA.xlsx")
View(X2018_UCR_PA)
library(tidyverse)
X2018_UCR_PA_cleaned <- X2018_UCR_PA %>%
rename(violent.crime = 'Violent\r\ncrime') %>%
rename(murder.manslaughter = 'Murder and\r\nnonnegligent\r\nmanslaughter') %>%
rename(aggravated.assault = 'Aggravated\r\nassault') %>%
rename(property.crime = 'Property\r\ncrime') %>%
rename(larceny.theft = 'Larceny-\r\ntheft') %>%
rename(motor.theft = 'Motor\r\nvehicle\r\ntheft') %>%
mutate(crime.rate = ((violent.crime + property.crime)/Population) * 100000)
summary(X2018_UCR_PA_cleaned)
## City Population violent.crime murder.manslaughter
## Length:989 Min. : 132 Min. : 0.00 Min. : 0.0000
## Class :character 1st Qu.: 2066 1st Qu.: 1.00 1st Qu.: 0.0000
## Mode :character Median : 4320 Median : 5.00 Median : 0.0000
## Mean : 10054 Mean : 34.16 Mean : 0.6977
## 3rd Qu.: 9088 3rd Qu.: 15.00 3rd Qu.: 0.0000
## Max. :1586916 Max. :14420.00 Max. :351.0000
## Rape Robbery aggravated.assault property.crime
## Min. : 0.000 Min. : 0.000 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 9.0
## Median : 0.000 Median : 0.000 Median : 4.00 Median : 40.0
## Mean : 2.971 Mean : 9.449 Mean : 21.05 Mean : 164.6
## 3rd Qu.: 1.000 3rd Qu.: 2.000 3rd Qu.: 11.00 3rd Qu.: 105.0
## Max. :1095.000 Max. :5262.000 Max. :7712.00 Max. :49145.0
## Burglary larceny.theft motor.theft Arson
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 1.00 1st Qu.: 7.0 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 5.00 Median : 32.0 Median : 1.00 Median : 0.000
## Mean : 21.42 Mean : 131.3 Mean : 11.84 Mean : 1.147
## 3rd Qu.: 12.00 3rd Qu.: 89.0 3rd Qu.: 4.00 3rd Qu.: 0.000
## Max. :6497.00 Max. :36968.0 Max. :5680.00 Max. :430.000
## crime.rate
## Min. : 0.0
## 1st Qu.: 456.3
## Median : 904.8
## Mean : 1261.1
## 3rd Qu.: 1701.4
## Max. :17757.0
selected.ucr <- X2018_UCR_PA_cleaned %>%
select(City, crime.rate)
view(selected.ucr)
arranged.data <- selected.ucr %>%
arrange(desc(crime.rate))
view(arranged.data)
# Define the breaks for population categories
breaks <- c(0, 10000, 50000, 100000, 500000, Inf)
# Define the labels for the population categories
labels <- c("Small", "Medium", "Large", "Very Large","Metropolitan")
# Create the new variable 'population.category'
X2018_UCR_PA_cleaned <- X2018_UCR_PA_cleaned %>%
mutate(population.category = cut(Population,
breaks = breaks,
labels = labels,
include.lowest = TRUE))
summary(X2018_UCR_PA_cleaned$population.category)
## Small Medium Large Very Large Metropolitan
## 763 209 14 2 1
crime.table <- X2018_UCR_PA_cleaned %>%
group_by(population.category) %>%
summarize(avg.crime.rate = mean (crime.rate, na.rn = TRUE), total.crimes = sum(crime.rate, na.rm = TRUE))
crime.table
crime.rate.table <- X2018_UCR_PA_cleaned %>%
ggplot(aes(x=crime.rate, fill = after_stat(count))) +
geom_histogram() +
labs(x="crime rates",
y = "frequency",
title = "Distribution of Crime Rates") +
theme_minimal()
crime.rate.table
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
