R Markdown

setwd("/Users/ingridellis/Desktop/CJS 310/Week 3")

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
X2018_UCR_PA <- read_excel("2018.UCR.PA.xlsx")
head(X2018_UCR_PA) 
## # A tibble: 6 × 12
##   City        Population `Violent\r\ncrime` Murder and\r\nnonneg…¹  Rape Robbery
##   <chr>            <dbl>              <dbl>                  <dbl> <dbl>   <dbl>
## 1 Abington T…      55631                 44                      1     6      12
## 2 Adamstown         1857                  3                      0     0       0
## 3 Adams Town…      14105                  3                      0     0       0
## 4 Adams Town…       5581                  0                      0     0       0
## 5 Akron             4015                  7                      0     1       0
## 6 Albion            1466                  0                      0     0       0
## # ℹ abbreviated name: ¹​`Murder and\r\nnonnegligent\r\nmanslaughter`
## # ℹ 6 more variables: `Aggravated\r\nassault` <dbl>, `Property\r\ncrime` <dbl>,
## #   Burglary <dbl>, `Larceny-\r\ntheft` <dbl>,
## #   `Motor\r\nvehicle\r\ntheft` <dbl>, Arson <dbl>
df_orig <- X2018_UCR_PA
head(df_orig)
## # A tibble: 6 × 12
##   City        Population `Violent\r\ncrime` Murder and\r\nnonneg…¹  Rape Robbery
##   <chr>            <dbl>              <dbl>                  <dbl> <dbl>   <dbl>
## 1 Abington T…      55631                 44                      1     6      12
## 2 Adamstown         1857                  3                      0     0       0
## 3 Adams Town…      14105                  3                      0     0       0
## 4 Adams Town…       5581                  0                      0     0       0
## 5 Akron             4015                  7                      0     1       0
## 6 Albion            1466                  0                      0     0       0
## # ℹ abbreviated name: ¹​`Murder and\r\nnonnegligent\r\nmanslaughter`
## # ℹ 6 more variables: `Aggravated\r\nassault` <dbl>, `Property\r\ncrime` <dbl>,
## #   Burglary <dbl>, `Larceny-\r\ntheft` <dbl>,
## #   `Motor\r\nvehicle\r\ntheft` <dbl>, Arson <dbl>
list.files()
## [1] "2018.UCR.PA.xlsx"                        
## [2] "502b78b2-7b33-43e4-bcde-63ffd1943dc6.png"
## [3] "Week 3 Assignment.md"                    
## [4] "Week 3 Assignment.Rmd"                   
## [5] "Week-3-Assignment.Rmd"                   
## [6] "Week3Classwork.R"
summary(df_orig)
##      City             Population       Violent\r\ncrime 
##  Length:989         Min.   :    132   Min.   :    0.00  
##  Class :character   1st Qu.:   2066   1st Qu.:    1.00  
##  Mode  :character   Median :   4320   Median :    5.00  
##                     Mean   :  10054   Mean   :   34.16  
##                     3rd Qu.:   9088   3rd Qu.:   15.00  
##                     Max.   :1586916   Max.   :14420.00  
##  Murder and\r\nnonnegligent\r\nmanslaughter      Rape         
##  Min.   :  0.0000                           Min.   :   0.000  
##  1st Qu.:  0.0000                           1st Qu.:   0.000  
##  Median :  0.0000                           Median :   0.000  
##  Mean   :  0.6977                           Mean   :   2.971  
##  3rd Qu.:  0.0000                           3rd Qu.:   1.000  
##  Max.   :351.0000                           Max.   :1095.000  
##     Robbery         Aggravated\r\nassault Property\r\ncrime    Burglary      
##  Min.   :   0.000   Min.   :   0.00       Min.   :    0.0   Min.   :   0.00  
##  1st Qu.:   0.000   1st Qu.:   1.00       1st Qu.:    9.0   1st Qu.:   1.00  
##  Median :   0.000   Median :   4.00       Median :   40.0   Median :   5.00  
##  Mean   :   9.449   Mean   :  21.05       Mean   :  164.6   Mean   :  21.42  
##  3rd Qu.:   2.000   3rd Qu.:  11.00       3rd Qu.:  105.0   3rd Qu.:  12.00  
##  Max.   :5262.000   Max.   :7712.00       Max.   :49145.0   Max.   :6497.00  
##  Larceny-\r\ntheft Motor\r\nvehicle\r\ntheft     Arson        
##  Min.   :    0.0   Min.   :   0.00           Min.   :  0.000  
##  1st Qu.:    7.0   1st Qu.:   0.00           1st Qu.:  0.000  
##  Median :   32.0   Median :   1.00           Median :  0.000  
##  Mean   :  131.3   Mean   :  11.84           Mean   :  1.147  
##  3rd Qu.:   89.0   3rd Qu.:   4.00           3rd Qu.:  0.000  
##  Max.   :36968.0   Max.   :5680.00           Max.   :430.000
### Renaming the variables
df <- df_orig %>%
  rename(violent.crime = 'Violent\r\ncrime') %>%
  rename(murder.manslaughter = 'Murder and\r\nnonnegligent\r\nmanslaughter') %>%
  rename(aggravated.assault = 'Aggravated\r\nassault') %>%
  rename(property.crime = 'Property\r\ncrime') %>%
  rename(larceny.theft = 'Larceny-\r\ntheft') %>%
  rename(motor.theft = 'Motor\r\nvehicle\r\ntheft')
summary(df)
##      City             Population      violent.crime      murder.manslaughter
##  Length:989         Min.   :    132   Min.   :    0.00   Min.   :  0.0000   
##  Class :character   1st Qu.:   2066   1st Qu.:    1.00   1st Qu.:  0.0000   
##  Mode  :character   Median :   4320   Median :    5.00   Median :  0.0000   
##                     Mean   :  10054   Mean   :   34.16   Mean   :  0.6977   
##                     3rd Qu.:   9088   3rd Qu.:   15.00   3rd Qu.:  0.0000   
##                     Max.   :1586916   Max.   :14420.00   Max.   :351.0000   
##       Rape             Robbery         aggravated.assault property.crime   
##  Min.   :   0.000   Min.   :   0.000   Min.   :   0.00    Min.   :    0.0  
##  1st Qu.:   0.000   1st Qu.:   0.000   1st Qu.:   1.00    1st Qu.:    9.0  
##  Median :   0.000   Median :   0.000   Median :   4.00    Median :   40.0  
##  Mean   :   2.971   Mean   :   9.449   Mean   :  21.05    Mean   :  164.6  
##  3rd Qu.:   1.000   3rd Qu.:   2.000   3rd Qu.:  11.00    3rd Qu.:  105.0  
##  Max.   :1095.000   Max.   :5262.000   Max.   :7712.00    Max.   :49145.0  
##     Burglary       larceny.theft      motor.theft          Arson        
##  Min.   :   0.00   Min.   :    0.0   Min.   :   0.00   Min.   :  0.000  
##  1st Qu.:   1.00   1st Qu.:    7.0   1st Qu.:   0.00   1st Qu.:  0.000  
##  Median :   5.00   Median :   32.0   Median :   1.00   Median :  0.000  
##  Mean   :  21.42   Mean   :  131.3   Mean   :  11.84   Mean   :  1.147  
##  3rd Qu.:  12.00   3rd Qu.:   89.0   3rd Qu.:   4.00   3rd Qu.:  0.000  
##  Max.   :6497.00   Max.   :36968.0   Max.   :5680.00   Max.   :430.000
### Adding a crime rate variable. Total number of part I offenses divided by the 
### population of each city and multiplied by 100,000
df <- df %>%
mutate(crime.rate = ((violent.crime + property.crime)/Population)*100000)
summary(df) 
##      City             Population      violent.crime      murder.manslaughter
##  Length:989         Min.   :    132   Min.   :    0.00   Min.   :  0.0000   
##  Class :character   1st Qu.:   2066   1st Qu.:    1.00   1st Qu.:  0.0000   
##  Mode  :character   Median :   4320   Median :    5.00   Median :  0.0000   
##                     Mean   :  10054   Mean   :   34.16   Mean   :  0.6977   
##                     3rd Qu.:   9088   3rd Qu.:   15.00   3rd Qu.:  0.0000   
##                     Max.   :1586916   Max.   :14420.00   Max.   :351.0000   
##       Rape             Robbery         aggravated.assault property.crime   
##  Min.   :   0.000   Min.   :   0.000   Min.   :   0.00    Min.   :    0.0  
##  1st Qu.:   0.000   1st Qu.:   0.000   1st Qu.:   1.00    1st Qu.:    9.0  
##  Median :   0.000   Median :   0.000   Median :   4.00    Median :   40.0  
##  Mean   :   2.971   Mean   :   9.449   Mean   :  21.05    Mean   :  164.6  
##  3rd Qu.:   1.000   3rd Qu.:   2.000   3rd Qu.:  11.00    3rd Qu.:  105.0  
##  Max.   :1095.000   Max.   :5262.000   Max.   :7712.00    Max.   :49145.0  
##     Burglary       larceny.theft      motor.theft          Arson        
##  Min.   :   0.00   Min.   :    0.0   Min.   :   0.00   Min.   :  0.000  
##  1st Qu.:   1.00   1st Qu.:    7.0   1st Qu.:   0.00   1st Qu.:  0.000  
##  Median :   5.00   Median :   32.0   Median :   1.00   Median :  0.000  
##  Mean   :  21.42   Mean   :  131.3   Mean   :  11.84   Mean   :  1.147  
##  3rd Qu.:  12.00   3rd Qu.:   89.0   3rd Qu.:   4.00   3rd Qu.:  0.000  
##  Max.   :6497.00   Max.   :36968.0   Max.   :5680.00   Max.   :430.000  
##    crime.rate     
##  Min.   :    0.0  
##  1st Qu.:  456.3  
##  Median :  904.8  
##  Mean   : 1261.1  
##  3rd Qu.: 1701.4  
##  Max.   :17757.0
### Crime rate for each city
selected.ucr <- df %>%
select(City, crime.rate)
head(selected.ucr) 
## # A tibble: 6 × 2
##   City                                 crime.rate
##   <chr>                                     <dbl>
## 1 Abington Township, Montgomery County      1785.
## 2 Adamstown                                  915.
## 3 Adams Township, Butler County              347.
## 4 Adams Township, Cambria County             197.
## 5 Akron                                     1021.
## 6 Albion                                     887.
### filtered so highest crime rate cities are at the top
ordered.data <- selected.ucr %>%
arrange(desc(crime.rate))
head(ordered.data) 
## # A tibble: 6 × 2
##   City                                  crime.rate
##   <chr>                                      <dbl>
## 1 Wilkes-Barre Township                     17757.
## 2 Frazer Township                           14425.
## 3 Eddystone                                 10904.
## 4 Homestead                                  9962.
## 5 Southwest Regional, Washington County      6818.
## 6 Muncy Township                             6679.
### categorizing town by population

# Define the breaks for population categories
breaks <- c(0, 10000, 50000, 100000, 500000, Inf)

# Define the labels for the population categories
labels <- c("Small", "Medium", "Large", "Very Large", "Metropolitan")

# Create a new variable 'population_category' based on the population ranges
df <- df %>%
mutate(population.category = cut(Population, breaks = breaks, labels = labels, include.lowest = TRUE))
summary(df$population.category) 
##        Small       Medium        Large   Very Large Metropolitan 
##          763          209           14            2            1
### average crime rate and total number of crimes for each population category
crime.table <- df %>% group_by(population.category) %>%
summarize(avg.crime.rate = mean(crime.rate, na.rm = TRUE), total.crimes = sum(crime.rate, na.rm = TRUE))
crime.table 
## # A tibble: 5 × 3
##   population.category avg.crime.rate total.crimes
##   <fct>                        <dbl>        <dbl>
## 1 Small                        1184.      903674.
## 2 Medium                       1466.      306468.
## 3 Large                        1917.       26833.
## 4 Very Large                   3125.        6250.
## 5 Metropolitan                 4006.        4006.
### create a graph to see the distribution of crime rates across Pennsylvania cities
crime.rate.table <- df %>%
  ggplot(aes(x = crime.rate, fill = ..count..)) +
  geom_histogram() +
  labs(x = "Crime rates", y = "Frequency", title = "Distribution of Crime Rates") +
  theme_minimal()
crime.rate.table 
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.