library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
X2018_UCR_PA <- read_excel("C:/Users/mhe29/OneDrive - Drexel University/CJS 310 R Files/2018.UCR.PA.xlsx")
View(X2018_UCR_PA)
library(tidyverse)
X2018_UCR_PA_cleaned <- X2018_UCR_PA %>%
  rename(violent.crime = 'Violent\r\ncrime') %>%
  rename(murder.manslaughter = 'Murder and\r\nnonnegligent\r\nmanslaughter') %>%
  rename(aggravated.assault = 'Aggravated\r\nassault') %>%
  rename(property.crime = 'Property\r\ncrime') %>%
  rename(larceny.theft = 'Larceny-\r\ntheft') %>%
  rename(motor.theft = 'Motor\r\nvehicle\r\ntheft') %>%
  mutate(crime.rate = ((violent.crime + property.crime)/Population) * 100000)
summary(X2018_UCR_PA_cleaned)
##      City             Population      violent.crime      murder.manslaughter
##  Length:989         Min.   :    132   Min.   :    0.00   Min.   :  0.0000   
##  Class :character   1st Qu.:   2066   1st Qu.:    1.00   1st Qu.:  0.0000   
##  Mode  :character   Median :   4320   Median :    5.00   Median :  0.0000   
##                     Mean   :  10054   Mean   :   34.16   Mean   :  0.6977   
##                     3rd Qu.:   9088   3rd Qu.:   15.00   3rd Qu.:  0.0000   
##                     Max.   :1586916   Max.   :14420.00   Max.   :351.0000   
##       Rape             Robbery         aggravated.assault property.crime   
##  Min.   :   0.000   Min.   :   0.000   Min.   :   0.00    Min.   :    0.0  
##  1st Qu.:   0.000   1st Qu.:   0.000   1st Qu.:   1.00    1st Qu.:    9.0  
##  Median :   0.000   Median :   0.000   Median :   4.00    Median :   40.0  
##  Mean   :   2.971   Mean   :   9.449   Mean   :  21.05    Mean   :  164.6  
##  3rd Qu.:   1.000   3rd Qu.:   2.000   3rd Qu.:  11.00    3rd Qu.:  105.0  
##  Max.   :1095.000   Max.   :5262.000   Max.   :7712.00    Max.   :49145.0  
##     Burglary       larceny.theft      motor.theft          Arson        
##  Min.   :   0.00   Min.   :    0.0   Min.   :   0.00   Min.   :  0.000  
##  1st Qu.:   1.00   1st Qu.:    7.0   1st Qu.:   0.00   1st Qu.:  0.000  
##  Median :   5.00   Median :   32.0   Median :   1.00   Median :  0.000  
##  Mean   :  21.42   Mean   :  131.3   Mean   :  11.84   Mean   :  1.147  
##  3rd Qu.:  12.00   3rd Qu.:   89.0   3rd Qu.:   4.00   3rd Qu.:  0.000  
##  Max.   :6497.00   Max.   :36968.0   Max.   :5680.00   Max.   :430.000  
##    crime.rate     
##  Min.   :    0.0  
##  1st Qu.:  456.3  
##  Median :  904.8  
##  Mean   : 1261.1  
##  3rd Qu.: 1701.4  
##  Max.   :17757.0
selected.ucr <- X2018_UCR_PA_cleaned %>%
  select(City, crime.rate)
view(selected.ucr)
arranged.data <- selected.ucr %>%
  arrange(desc(crime.rate))
view(arranged.data)
# Define the breaks for population categories
breaks <- c(0, 10000, 50000, 100000, 500000, Inf)

# Define the labels for the population categories
labels <- c("Small", "Medium", "Large", "Very Large","Metropolitan")

# Create the new variable 'population.category'
X2018_UCR_PA_cleaned <- X2018_UCR_PA_cleaned %>%
  mutate(population.category = cut(Population, 
                                   breaks = breaks, 
                                   labels = labels, 
                                   include.lowest = TRUE))

summary(X2018_UCR_PA_cleaned$population.category)
##        Small       Medium        Large   Very Large Metropolitan 
##          763          209           14            2            1
crime.table <- X2018_UCR_PA_cleaned %>%
  group_by(population.category) %>%
  summarize(avg.crime.rate = mean (crime.rate, na.rn = TRUE), total.crimes = sum(crime.rate, na.rm = TRUE))
crime.table
crime.rate.table <- X2018_UCR_PA_cleaned %>%
  ggplot(aes(x=crime.rate, fill = after_stat(count))) + 
  geom_histogram() + 
  labs(x="crime rates", 
       y = "frequency", 
       title = "Distribution of Crime Rates") + 
  theme_minimal()
  
crime.rate.table
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.